In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
df = pd.read_csv("train.csv")

In [3]:
df = df[['Gender', 'Married', 'Education', 'Self_Employed',
         'LoanAmount', 'Loan_Amount_Term', 'Credit_History',
         'Property_Area', 'Loan_Status']]

In [4]:
for col in ['Gender', 'Married', 'Self_Employed']:
    df[col].fillna(df[col].mode()[0], inplace=True)
df['LoanAmount'].fillna(df['LoanAmount'].median(), inplace=True)
df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mode()[0], inplace=True)
df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['LoanAmount'].fillna(df['LoanAmount'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we

In [5]:
df['LoanAmount'] = np.log1p(df['LoanAmount'])

In [6]:
df['Is_Graduate'] = df['Education'].map({'Graduate': 1, 'Not Graduate': 0})
df['Is_Self_Employed'] = df['Self_Employed'].map({'Yes': 1, 'No': 0})
df['Is_Married'] = df['Married'].map({'Yes': 1, 'No': 0})
df['Is_Male'] = df['Gender'].map({'Male': 1, 'Female': 0})
df['Loan_Per_Term'] = df['LoanAmount'] / df['Loan_Amount_Term']

In [7]:
df.drop(['Gender', 'Married', 'Education', 'Self_Employed'], axis=1, inplace=True)

In [8]:
df = pd.get_dummies(df, columns=['Property_Area'], drop_first=True)

In [9]:
scaler = StandardScaler()
df[['LoanAmount', 'Loan_Amount_Term', 'Loan_Per_Term']] = scaler.fit_transform(
    df[['LoanAmount', 'Loan_Amount_Term', 'Loan_Per_Term']]
)

In [10]:
df['Loan_Status'] = df['Loan_Status'].map({'Y': 1, 'N': 0})

In [11]:
X = df.drop('Loan_Status', axis=1)
y = df['Loan_Status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'bootstrap': [True, False]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid,
                           cv=5, n_jobs=-1, scoring='f1', verbose=1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(f"✅ Best Parameters: {grid_search.best_params_}")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
✅ Best Parameters: {'bootstrap': True, 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}


In [13]:
y_pred = best_model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"\n📊 Evaluation Metrics:")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1-Score : {f1:.4f}")


📊 Evaluation Metrics:
Accuracy : 0.7805
Precision: 0.7573
Recall   : 0.9750
F1-Score : 0.8525


In [14]:
with open("loan_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("\n✅ Model and scaler saved using pickle: 'loan_model.pkl' and 'scaler.pkl'")


✅ Model and scaler saved using pickle: 'loan_model.pkl' and 'scaler.pkl'
