In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

df = pd.read_csv('/content/train_u6lujuX_CVtuZ9i.csv')

# Drop missing values for simplicity (you can impute for more realism)
df = df.dropna()

# Convert '3+' to numeric 3
df['Dependents'] = df['Dependents'].replace('3+', 3).astype(int)



# Encode categorical features
categorical_cols = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status']
for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# Features and target
X = df.drop(['Loan_ID', 'Loan_Status'], axis=1)
y = df['Loan_Status']

In [6]:
df

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,1,1,1,0,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,1,1,0,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,LP001006,1,1,0,1,0,2583,2358.0,120.0,360.0,1.0,2,1
4,LP001008,1,0,0,0,0,6000,0.0,141.0,360.0,1.0,2,1
5,LP001011,1,1,2,0,1,5417,4196.0,267.0,360.0,1.0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,LP002978,0,0,0,0,0,2900,0.0,71.0,360.0,1.0,0,1
610,LP002979,1,1,3,0,0,4106,0.0,40.0,180.0,1.0,0,1
611,LP002983,1,1,1,0,0,8072,240.0,253.0,360.0,1.0,2,1
612,LP002984,1,1,2,0,0,7583,0.0,187.0,360.0,1.0,2,1


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'SVM': SVC()
}

def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    return {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1': f1_score(y_test, y_pred)
    }

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    results[name] = evaluate(model, X_test, y_test)

for name, metrics in results.items():
    print(f"\n{name}")
    for metric, score in metrics.items():
        print(f"{metric}: {score:.4f}")


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Logistic Regression
Accuracy: 0.8229
Precision: 0.8000
Recall: 1.0000
F1: 0.8889

Random Forest
Accuracy: 0.7917
Precision: 0.8000
Recall: 0.9412
F1: 0.8649

Gradient Boosting
Accuracy: 0.7812
Precision: 0.7901
Recall: 0.9412
F1: 0.8591

SVM
Accuracy: 0.7083
Precision: 0.7083
Recall: 1.0000
F1: 0.8293


In [10]:
from sklearn.model_selection import GridSearchCV

grid_params = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}

grid_search = GridSearchCV(RandomForestClassifier(), grid_params, cv=5, scoring='f1')
grid_search.fit(X_train, y_train)
best_grid = grid_search.best_estimator_


In [11]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

random_params = {
    'n_estimators': randint(100, 300),
    'max_depth': randint(3, 10),
    'learning_rate': [0.01, 0.05, 0.1]
}

random_search = RandomizedSearchCV(GradientBoostingClassifier(), random_params, n_iter=10, cv=5, scoring='f1', random_state=42)
random_search.fit(X_train, y_train)
best_random = random_search.best_estimator_


In [12]:
tuned_models = {
    'Tuned Random Forest (Grid)': best_grid,
    'Tuned Gradient Boosting (Random)': best_random
}

for name, model in tuned_models.items():
    print(f"\n{name}")
    metrics = evaluate(model, X_test, y_test)
    for metric, score in metrics.items():
        print(f"{metric}: {score:.4f}")



Tuned Random Forest (Grid)
Accuracy: 0.8125
Precision: 0.8125
Recall: 0.9559
F1: 0.8784

Tuned Gradient Boosting (Random)
Accuracy: 0.8021
Precision: 0.7882
Recall: 0.9853
F1: 0.8758


In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

param_grid_lr = {
    'C': [0.01, 0.1, 1, 10],                 # Regularization strength
    'solver': ['liblinear', 'lbfgs']         # Optimization algorithms
}

grid_lr = GridSearchCV(
    LogisticRegression(max_iter=1000),
    param_grid_lr,
    cv=5,
    scoring='f1'
)

grid_lr.fit(X_train, y_train)
best_lr = grid_lr.best_estimator_


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [14]:
lr_metrics = evaluate(best_lr, X_test, y_test)

print("\nTuned Logistic Regression")
for metric, score in lr_metrics.items():
    print(f"{metric}: {score:.4f}")



Tuned Logistic Regression
Accuracy: 0.8229
Precision: 0.8000
Recall: 1.0000
F1: 0.8889


In [17]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Define parameter grid
param_grid_svm = {
    'C': [0.01,0.1, 1, 10, 100],              # Regularization strength
    'gamma': [10,1, 0.1, 0.01, 0.001],      # Kernel coefficient
    'kernel': ['rbf']                    # RBF kernel for non-linear separation
}

grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=5, scoring='f1')
grid_svm.fit(X_train, y_train)
best_svm = grid_svm.best_estimator_


In [18]:
svm_metrics = evaluate(best_svm, X_test, y_test)

print("\nTuned SVM")
for metric, score in svm_metrics.items():
    print(f"{metric}: {score:.4f}")



Tuned SVM
Accuracy: 0.6771
Precision: 0.7033
Recall: 0.9412
F1: 0.8050


#After Tunning

In [19]:
final_models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Tuned RF (GridSearchCV)': best_grid,
    'Tuned GB (RandomizedSearchCV)': best_random,
    'Tuned SVM': best_svm
}

for name, model in final_models.items():
    model.fit(X_train, y_train)
    metrics = evaluate(model, X_test, y_test)
    print(f"\n{name}")
    for metric, score in metrics.items():
        print(f"{metric}: {score:.4f}")


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Logistic Regression
Accuracy: 0.8229
Precision: 0.8000
Recall: 1.0000
F1: 0.8889

Tuned RF (GridSearchCV)
Accuracy: 0.8229
Precision: 0.8228
Recall: 0.9559
F1: 0.8844

Tuned GB (RandomizedSearchCV)
Accuracy: 0.8021
Precision: 0.7882
Recall: 0.9853
F1: 0.8758

Tuned SVM
Accuracy: 0.6771
Precision: 0.7033
Recall: 0.9412
F1: 0.8050


#Logistic Regression is the best model

In [39]:
new_applicant = pd.DataFrame([{
    'Gender': 1,
    'Married': 1,
    'Dependents': 0,
    'Education': 1,
    'Self_Employed': 0,
    'ApplicantIncome': 2583,
    'CoapplicantIncome': 2358.0,
    'LoanAmount': 120.0,
    'Loan_Amount_Term': 360.0,
    'Credit_History': 1.0,
    'Property_Area': 2
}])

prediction = best_lr.predict(new_applicant)
status = 'Approved' if prediction[0] == 1 else 'Rejected'
print("Loan Prediction:", status)

proba = best_lr.predict_proba(new_applicant)
print(f"Approval Probability: {proba[0][1]:.4f}")


Loan Prediction: Approved
Approval Probability: 0.7814


In [40]:
new_applicant = pd.DataFrame([{
    'Gender': 0,
    'Married': 0,
    'Dependents': 0,
    'Education': 0,
    'Self_Employed': 1,
    'ApplicantIncome': 4583,
    'CoapplicantIncome': 0.0,
    'LoanAmount': 133.0,
    'Loan_Amount_Term': 360.0,
    'Credit_History': 0.0,
    'Property_Area': 1
}])

prediction = best_lr.predict(new_applicant)
prob = best_lr.predict_proba(new_applicant)
status = 'Approved' if prediction[0] == 1 else 'Rejected'

print("Loan Prediction:", status)
print(f"Approval Probability: {prob[0][1]:.4f}")


Loan Prediction: Rejected
Approval Probability: 0.1371


In [41]:
# Collect input from user
gender = int(input("Gender (0=Female, 1=Male): "))
married = int(input("Married (0=No, 1=Yes): "))
dependents = int(input("Dependents (0,1,2,3): "))
education = int(input("Education (0=Graduate, 1=Not Graduate): "))
self_employed = int(input("Self_Employed (0=No, 1=Yes): "))
income = float(input("ApplicantIncome: "))
co_income = float(input("CoapplicantIncome: "))
loan_amt = float(input("LoanAmount: "))
loan_term = float(input("Loan_Amount_Term: "))
credit = float(input("Credit_History (0 or 1): "))
property_area = int(input("Property_Area (0=Rural, 1=Semiurban, 2=Urban): "))

# Create DataFrame
user_input = pd.DataFrame([{
    'Gender': gender,
    'Married': married,
    'Dependents': dependents,
    'Education': education,
    'Self_Employed': self_employed,
    'ApplicantIncome': income,
    'CoapplicantIncome': co_income,
    'LoanAmount': loan_amt,
    'Loan_Amount_Term': loan_term,
    'Credit_History': credit,
    'Property_Area': property_area
}])

# Predict
prediction = best_lr.predict(user_input)
prob = best_lr.predict_proba(user_input)

print("Loan Status:", "Approved ✅" if prediction[0] == 1 else "Rejected ❌")
print(f"Approval Probability: {prob[0][1]:.2f}")


Gender (0=Female, 1=Male): 1
Married (0=No, 1=Yes): 0
Dependents (0,1,2,3): 1
Education (0=Graduate, 1=Not Graduate): 1
Self_Employed (0=No, 1=Yes): 0
ApplicantIncome: 2600
CoapplicantIncome: 2100
LoanAmount: 144
Loan_Amount_Term: 360
Credit_History (0 or 1): 1
Property_Area (0=Rural, 1=Semiurban, 2=Urban): 1
Loan Status: Approved ✅
Approval Probability: 0.67


In [64]:
# 👤 Collect inputs from the user interactively
gender = int(input("Gender (0=Female, 1=Male): "))
married = int(input("Married (0=No, 1=Yes): "))
dependents = int(input("Dependents (0,1,2,3): "))
education = int(input("Education (0=Graduate, 1=Not Graduate): "))
self_employed = int(input("Self_Employed (0=No, 1=Yes): "))
income = float(input("ApplicantIncome: "))
co_income = float(input("CoapplicantIncome: "))
loan_amt = float(input("LoanAmount: "))
loan_term = float(input("Loan_Amount_Term: "))
credit = float(input("Credit_History (0 or 1): "))
property_area = int(input("Property_Area (0=Rural, 1=Semiurban, 2=Urban): "))

# 🧾 Create a DataFrame with user input
user_input = pd.DataFrame([{
    'Gender': gender,
    'Married': married,
    'Dependents': dependents,
    'Education': education,
    'Self_Employed': self_employed,
    'ApplicantIncome': income,
    'CoapplicantIncome': co_income,
    'LoanAmount': loan_amt,
    'Loan_Amount_Term': loan_term,
    'Credit_History': credit,
    'Property_Area': property_area
}])

# 🔍 Predict with Tuned Random Forest
prediction = best_grid.predict(user_input)
prob = best_grid.predict_proba(user_input)

# 🧠 Output the results
print("Loan Status:", "Approved ✅" if prediction[0] == 1 else "Rejected ❌")
print(f"Approval Probability: {prob[0][1]:.2f}")


Gender (0=Female, 1=Male): 1
Married (0=No, 1=Yes): 0
Dependents (0,1,2,3): 1
Education (0=Graduate, 1=Not Graduate): 1
Self_Employed (0=No, 1=Yes): 0
ApplicantIncome: 2000
CoapplicantIncome: 2103
LoanAmount: 150
Loan_Amount_Term: 350
Credit_History (0 or 1): 1
Property_Area (0=Rural, 1=Semiurban, 2=Urban): 0
Loan Status: Approved ✅
Approval Probability: 0.83
