In [82]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils.multiclass import unique_labels

In [83]:
train = pd.read_csv("/kaggle/input/loan-prediction-data/train.txt", sep=",")   # adjust sep if needed
test = pd.read_csv("/kaggle/input/loan-prediction-data/test.txt", sep=",")

In [84]:
print("Train shape:", train.shape)
print("Test shape:", test.shape)

Train shape: (614, 13)
Test shape: (367, 12)


In [85]:
X = train.drop("Loan_Status", axis=1)
y = train["Loan_Status"]

In [86]:
num_cols = X.select_dtypes(include=np.number).columns
cat_cols = X.select_dtypes(exclude=np.number).columns

In [87]:
imputer_num = SimpleImputer(strategy="mean")
imputer_cat = SimpleImputer(strategy="most_frequent")

In [88]:
X[num_cols] = imputer_num.fit_transform(X[num_cols])
X[cat_cols] = imputer_cat.fit_transform(X[cat_cols])


In [89]:
test[num_cols] = imputer_num.transform(test[num_cols])
test[cat_cols] = imputer_cat.transform(test[cat_cols])

In [90]:
# Drop Loan_ID from both train and test
X = train.drop(columns=["Loan_ID", "Loan_Status"])
y = train["Loan_Status"]

test_ids = test["Loan_ID"]  # save IDs for submission later
test = test.drop(columns=["Loan_ID"])


In [91]:
le_dict = {}
for col in X.select_dtypes(include="object").columns:
    le = LabelEncoder()
    combined = pd.concat([X[col], test[col]], axis=0)  # fit on both train+test
    le.fit(combined)
    X[col] = le.transform(X[col])
    test[col] = le.transform(test[col])
    le_dict[col] = le

In [92]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [107]:
def plot_conf_matrix(y_val, y_val_pred, title="Confusion Matrix"):
    cm = confusion_matrix(y_true, y_pred)
    labels = unique_labels(y_val, y_val_pred)  # dynamic labels
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=labels, yticklabels=labels)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(title)
    plt.show()

In [108]:
def evaluate_model(model, X_tr, y_tr, X_val, y_val):
    model.fit(X_tr, y_tr)
    preds = model.predict(X_val)
    print(model.__class__.__name__)
    print(confusion_matrix(y_val, preds))
    print(classification_report(y_val, preds))
    return model

In [101]:
imputer = SimpleImputer(strategy="median")

In [102]:
# Fit on training set, transform train and validation sets
X_train = imputer.fit_transform(X_train)
X_val = imputer.transform(X_val)

In [103]:
# Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
evaluate_model(log_reg, X_train, y_train, X_val, y_val)

LogisticRegression
[[22 16]
 [ 1 84]]
              precision    recall  f1-score   support

           N       0.96      0.58      0.72        38
           Y       0.84      0.99      0.91        85

    accuracy                           0.86       123
   macro avg       0.90      0.78      0.81       123
weighted avg       0.88      0.86      0.85       123



In [104]:
# Decision Tree
dtree = DecisionTreeClassifier(max_depth=5, random_state=42)
evaluate_model(dtree, X_train, y_train, X_val, y_val)

DecisionTreeClassifier
[[21 17]
 [ 5 80]]
              precision    recall  f1-score   support

           N       0.81      0.55      0.66        38
           Y       0.82      0.94      0.88        85

    accuracy                           0.82       123
   macro avg       0.82      0.75      0.77       123
weighted avg       0.82      0.82      0.81       123



In [105]:
final_model = log_reg.fit(X_train, y_train)
test_preds = final_model.predict(test)


  If False, the input will be checked for consistency with


In [106]:
pd.DataFrame({"Loan_Status_Pred": test_preds}).to_csv("loan_predictions.csv", index=False)