In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report

df = pd.read_csv("loan_prediction.csv")

df['Dependents'] = df['Dependents'].replace('3+', 3).astype(float)


for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].fillna(df[col].mode()[0])
    else:
        df[col] = df[col].fillna(df[col].median())

encoder = LabelEncoder()
for col in ['Gender','Married','Education','Self_Employed','Property_Area','Loan_Status']:
    df[col] = encoder.fit_transform(df[col].astype(str))

X = df.drop(columns=['Loan_ID','Loan_Status'])
y = df['Loan_Status']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

log_model = LogisticRegression(max_iter=2000)
tree_model = DecisionTreeClassifier(random_state=42)
forest_model = RandomForestClassifier(random_state=42)

log_model.fit(X_train, y_train)
tree_model.fit(X_train, y_train)
forest_model.fit(X_train, y_train)


models = {
    "Logistic Regression": log_model,
    "Decision Tree": tree_model,
    "Random Forest": forest_model
}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"\n📊 {name} Performance:")
    print(f"Accuracy  : {accuracy_score(y_test, y_pred):.4f}")
    print(f"Precision : {precision_score(y_test, y_pred):.4f}")
    print(f"Recall    : {recall_score(y_test, y_pred):.4f}")
    print(f"F1 Score  : {f1_score(y_test, y_pred):.4f}")
    print(f"ROC-AUC   : {roc_auc_score(y_test, y_pred):.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))


df['Predicted_Logistic'] = log_model.predict(X)
df['Predicted_Tree'] = tree_model.predict(X)
df['Predicted_Forest'] = forest_model.predict(X)


reverse_map = {0:'N', 1:'Y'}
df['Loan_Status'] = df['Loan_Status'].map(reverse_map)
df['Predicted_Logistic'] = df['Predicted_Logistic'].map(reverse_map)
df['Predicted_Tree'] = df['Predicted_Tree'].map(reverse_map)
df['Predicted_Forest'] = df['Predicted_Forest'].map(reverse_map)


df.to_csv("loan_predictions_with_future.csv", index=False)

print("\n File saved: loan_predictions_with_future.csv (with actual + 3 predicted columns)")


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



📊 Logistic Regression Performance:
Accuracy  : 0.7886
Precision : 0.7596
Recall    : 0.9875
F1 Score  : 0.8587
ROC-AUC   : 0.7031

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.42      0.58        43
           1       0.76      0.99      0.86        80

    accuracy                           0.79       123
   macro avg       0.85      0.70      0.72       123
weighted avg       0.83      0.79      0.76       123


📊 Decision Tree Performance:
Accuracy  : 0.6911
Precision : 0.7561
Recall    : 0.7750
F1 Score  : 0.7654
ROC-AUC   : 0.6549

Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.53      0.55        43
           1       0.76      0.78      0.77        80

    accuracy                           0.69       123
   macro avg       0.66      0.65      0.66       123
weighted avg       0.69      0.69      0.69       123


📊 Random Forest Performance:
Accuracy  