In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# Load dataset
df = pd.read_csv("/Users/sivakarthick/Hub/flask/model/train_u6lujuX_CVtuZ9i.csv")

# Drop Loan_ID column
df = df.drop(columns=['Loan_ID'])

# Encode target
df['Loan_Status'] = df['Loan_Status'].map({'Y': 1, 'N': 0})

# Handle missing values
for col in df.select_dtypes(include='object').columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

for col in df.select_dtypes(include=['float64', 'int64']).columns:
    df[col].fillna(df[col].median(), inplace=True)

# Encode categorical features
label_encoders = {}
for col in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split features and target
X = df.drop('Loan_Status', axis=1)
y = df['Loan_Status']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# XGBoost model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7398373983739838
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.47      0.56        43
           1       0.76      0.89      0.82        80

    accuracy                           0.74       123
   macro avg       0.72      0.68      0.69       123
weighted avg       0.73      0.74      0.73       123



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting value

In [6]:
import joblib

# Save model
joblib.dump(model, 'loan_approval_model.pkl')
print("Model saved as loan_approval_model.pkl")

Model saved as loan_approval_model.pkl


In [7]:
model = joblib.load('loan_approval_model.pkl')


In [8]:
# Save encoders
joblib.dump(label_encoders, 'label_encoders.pkl')


['label_encoders.pkl']

In [9]:
label_encoders = joblib.load('label_encoders.pkl')


In [10]:
# Example input (should match the training features)
input_data = {
    'Gender': ['Male'],
    'Married': ['Yes'],
    'Dependents': ['0'],
    'Education': ['Graduate'],
    'Self_Employed': ['No'],
    'ApplicantIncome': [5000],
    'CoapplicantIncome': [0.0],
    'LoanAmount': [150],
    'Loan_Amount_Term': [360],
    'Credit_History': [1.0],
    'Property_Area': ['Urban']
}

# Convert to DataFrame
input_df = pd.DataFrame(input_data)


In [11]:
# Encode input using the same label encoders
for col in input_df.select_dtypes(include='object').columns:
    le = label_encoders[col]
    input_df[col] = le.transform(input_df[col])


In [12]:
# Predict
prediction = model.predict(input_df)

# Output result
result = 'Approved' if prediction[0] == 1 else 'Rejected'
print("Loan Status Prediction:", result)


Loan Status Prediction: Approved
