In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pickle

In [25]:
df = pd.read_csv(r"C:\Users\User\Downloads\Home-Loan-Prediction-main\Home-Loan-Prediction-main\loan_data.csv")

In [26]:
df['employment_type'] = df['employment_type'].map({'salaried': 0, 'self-employed': 1})
df['prediction'] = df['prediction'].map({'yes': 1, 'no': 0})

In [27]:
X = df[['gross_income', 'tenure', 'interest_rate', 'other_emis', 'credit_score', 'age', 'employment_type', 'loan_amount']]
y = df['prediction']

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "SVM": SVC(),
    "KNN": KNeighborsClassifier()
}

In [30]:
model_scores = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    model_scores[name] = acc
    print(f"{name} Accuracy: {acc:.4f}")

Logistic Regression Accuracy: 0.4000
Random Forest Accuracy: 0.6000
Decision Tree Accuracy: 0.6000
SVM Accuracy: 0.4500
KNN Accuracy: 0.4500


In [31]:
best_model_name = max(model_scores, key=model_scores.get)
best_model = models[best_model_name]
print(f"\n✅ Best Model: {best_model_name} with Accuracy = {model_scores[best_model_name]:.4f}")


✅ Best Model: Random Forest with Accuracy = 0.6000


In [32]:
with open('best_home_loan_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)