In [3]:
# ==========================================
# JP Morgan Quantitative Analyst Internship
# Task: Probability of Default & Expected Loss Model
# ==========================================

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

In [5]:
# Step 1: Load the dataset
df  = pd.read_csv("Task 3 and 4_Loan_Data.csv")
df.head()

Unnamed: 0,customer_id,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score,default
0,8153374,0,5221.545193,3915.471226,78039.38546,5,605,0
1,7442532,5,1958.928726,8228.75252,26648.43525,2,572,1
2,2256073,0,3363.009259,2027.83085,65866.71246,4,602,0
3,4885975,0,4766.648001,2501.730397,74356.88347,5,612,0
4,4700614,1,1345.827718,1768.826187,23448.32631,6,631,0


In [7]:
# Step 2: Define features (X) and target (y)
x = df.drop(columns = ["customer_id","default"])
y = df["default"]

In [8]:
# Step 3: Split data for training and testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [11]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [12]:
# Step 5: Train logistic regression model
model = LogisticRegression(max_iter = 1000)
model.fit(x_train_scaled,y_train)


In [13]:
# Step 6: Evaluate model
y_pred = model.predict(x_test_scaled)
y_prob = model.predict_proba(x_test_scaled)[:,1]

print("Model Evaluation")
print("---------------")
print("Accuracy", round(accuracy_score(y_test,y_pred),3))
print("AUC Score", round(roc_auc_score(y_test,y_pred),3))

Model Evaluation
---------------
Accuracy 0.996
AUC Score 0.989


In [18]:
# Step 7: Function to predict Expected Loss
def predicted_expected_loss(borrower_details, model=model, scaler = scaler, recover_rate = 0.1):
     """
     borrower_details: dict containing borrower's financial info
     """
     borrower_df = pd.DataFrame([borrower_details]) 
     borrower_scaled = scaler.transform(borrower_df)

     # Probability of Default (PD)
     pd_prob = model.predict_proba(borrower_scaled)[0][1]

     # Expected Loss = PD Ã— (1 - Recovery Rate) Ã— Loan Amount
     loan_amount = borrower_details.get("loan_amt_outstanding",0)
     expected_loss = pd_prob * (1- recover_rate) * loan_amount

     return{
         "probability of Default": round(pd_prob, 3),
         "Expected Loss ($)": round(expected_loss, 2)
     }


In [22]:
# Step 8: Test the function with a sample borrower
sample_borrower = {
    "credit_lines_outstanding": 3,
    "loan_amt_outstanding": 15000,
    "total_debt_outstanding": 40000,
    "income": 55000,
    "years_employed": 5,
    "fico_score": 620
}

result = predicted_expected_loss(sample_borrower)
print("\nðŸ’° Sample Borrower Risk Prediction")
print("----------------------------------")
print(result)


ðŸ’° Sample Borrower Risk Prediction
----------------------------------
{'probability of Default': 1.0, 'Expected Loss ($)': 13499.99}
