Task 3
***
Build a model using Python that will estimate the probability of default for a borrower
The model needs to take as input the details of the borrower such as income, total loans outstanding, credit score and others. 

In [None]:
# Importing required modules
from datetime import datetime

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Load the data
df = pd.read_csv("Data/Loan_Data.csv")

In [None]:
# Visualise the data
df.head()

In [None]:
# Data Preprocessing
# Clean and preprocess the data, handling missing values and encoding categorical variables
df.info()

In [None]:
# Change datatype of customer id column
df["customer_id"] = df["customer_id"].astype("str")

In [None]:
df.info()

In [None]:
# Generate descriptive statistics
df.describe()

In [None]:
df.head()

In [None]:
# Feature scaling is used to
# Standardize or normalize numerical features to ensure that they are on a similar scale
# we will going to scale fico_score

In [None]:
X = df.drop(["default", "customer_id"], axis=1)
y = df["default"]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train["fico_score"] = scaler.fit_transform(X_train[["fico_score"]].values)
X_test["fico_score"] = scaler.fit_transform(X_test[["fico_score"]].values)

In [None]:
# Initialize and train the logistic regression model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [None]:
# Print the evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_rep)

In [None]:
# Extract coefficients and intercept
coefficients = model.coef_[0]
print("Coefficients: ", coefficients)
intercept = model.intercept_[0]
print("Intercept: ", intercept)

In [None]:
def scaled(score):
    """
    Performs max min normalization
    """
    scaled_point = (score - df["fico_score"].min()) / (
        df["fico_score"].max() - df["fico_score"].min()
    )
    return scaled_point

In [None]:
# Define the logistic function
def logistic_function(features):
    """
    This function calculates the probability of default of a loan given the features
    """
    features[-1] = scaled(features[-1])
    # Calculate the linear combination of features and coefficients
    linear_combination = np.dot(features, coefficients) + intercept
    # Apply the logistic function (sigmoid)
    return 1 / (1 + np.exp(-linear_combination))

In [None]:
def expected_loss_on_loan(
    credit_lines_outstanding,
    loan_amt_outstanding,
    total_debt_outstanding,
    income,
    years_employed,
    fico_score,
    recovery_rate=0.10,
):
    """
    Takes in the properties of a loan and outputs the expected loss (£)

    Parameters:
    - credit_lines_outstanding: the number of active credit lines that a borrower has
    - loan_amt_outstanding: the total amount of money that a borrower still owes on their outstanding loans
    - total_debt_outstanding:  the total debt outstanding of a borrower
    - income: borrower's income
    - years_employed: number of years the borrower was employed
    - fico_score: credit score of borrower

    Returns:
    - expected_loss: expected loss (£) in case of default
    """
    features = [
        credit_lines_outstanding,
        loan_amt_outstanding,
        total_debt_outstanding,
        income,
        years_employed,
        fico_score,
    ]
    probability_of_default = logistic_function(features)
    expected_loss = round((probability_of_default - recovery_rate) * features[1], 1)
    if expected_loss > 0:
        expected_loss = expected_loss
    if expected_loss <= 0:
        expected_loss = f"We anticipate Probability of Default equal to {round(probability_of_default*100, 0)}% on this loan."
    return expected_loss

In [None]:
expected_loss_on_loan(0, 5221, 3915, 78039, 5, 605, 0.10)

In [None]:
expected_loss_on_loan(5, 1959, 8229, 26648, 2, 572, 0.10)