In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score, precision_score, recall_score,f1_score
import numpy as np

data = pd.read_csv('Task 3 and 4_Loan_Data.csv')

data.fillna(method='ffill', inplace=True)

X = data.drop('default', axis=1)
y = data['default']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}
model_performance = {}

for model_name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
    auc = roc_auc_score(y_test, y_pred_proba)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    model_performance[model_name] = {
        "AUC": auc,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1
    }

    best_model_name = max(model_performance, key=lambda X: model_performance[X]['AUC'])
    best_model = models[best_model_name]

    best_model.fit(scaler.fit_transform(x), y)

    def expected_loss(borrower_details):
        borrower_details_scaled = scaler.transform([borrower_details])
        pd = best_model.predict_proba(borrower_details_scaled)[:, 1][0]
        recovery_rate = 0.10
        expected_loss_value = (1 - recovery_rate) * pd
        return expected_loss_value

    borrower_details = X.iloc[0].values
    loss = expected_loss(borrower_details)
    print(f"Expected Loss: {loss}")

    print("Model Performance:")
    for model_name, metrics in model_performance.items():
        print(f"{model_name}: AUC={metrics['AUC']:.4f}, Precisiom={metrics['Precision']:.4f}, Recall={metrics['Recall']:.4f}, F1-Score={metrics['F1-Score']:.4f}")

    
    
    
     



Expected Loss: 2.9570209919527952e-12
Model Performance:
Logistic Regression: AUC=1.0000, Precisiom=0.9981, Recall=0.9852, F1-Score=0.9916




Expected Loss: 2.9570209919527952e-12
Model Performance:
Logistic Regression: AUC=1.0000, Precisiom=0.9981, Recall=0.9852, F1-Score=0.9916
Decision Tree: AUC=0.9918, Precisiom=0.9926, Recall=0.9852, F1-Score=0.9889




Expected Loss: 2.9570209919527952e-12
Model Performance:
Logistic Regression: AUC=1.0000, Precisiom=0.9981, Recall=0.9852, F1-Score=0.9916
Decision Tree: AUC=0.9918, Precisiom=0.9926, Recall=0.9852, F1-Score=0.9889
Random Forest: AUC=0.9998, Precisiom=0.9888, Recall=0.9815, F1-Score=0.9852




Expected Loss: 2.9570209919527952e-12
Model Performance:
Logistic Regression: AUC=1.0000, Precisiom=0.9981, Recall=0.9852, F1-Score=0.9916
Decision Tree: AUC=0.9918, Precisiom=0.9926, Recall=0.9852, F1-Score=0.9889
Random Forest: AUC=0.9998, Precisiom=0.9888, Recall=0.9815, F1-Score=0.9852
Gradient Boosting: AUC=0.9999, Precisiom=0.9944, Recall=0.9871, F1-Score=0.9907
