<a href="https://colab.research.google.com/github/liateg/loan-default-classifier-ml/blob/main/loan-default.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Project: Loan Default Classifier**
This project aims to build a machine learning model that predicts whether a borrower is likely to default on a loan. The dataset includes information about borrowers' financial and demographic attributes. The goal is to analyze the data, preprocess it, and train classification models to identify patterns that can help lenders assess risk and make informed decisions.

In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [3]:
df=pd.read_csv('/content/sample_data/Loan_default.csv')
df.head()

Unnamed: 0,LoanID,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
0,I38PQUQS96,56,85994,50587,520,80,4,15.23,36,0.44,Bachelor's,Full-time,Divorced,Yes,Yes,Other,Yes,0.0
1,HPSK72WA7R,69,50432,124440,458,15,1,4.81,60,0.68,Master's,Full-time,Married,No,No,Other,Yes,0.0
2,C1OZ6DPJ8Y,46,84208,129188,451,26,3,21.17,24,0.31,Master's,Unemployed,Divorced,Yes,Yes,Auto,No,1.0
3,V2KKSFM3UN,32,31713,44799,743,0,3,7.07,24,0.23,High School,Full-time,Married,No,No,Business,No,0.0
4,EY08JDHTZP,60,20437,9139,633,8,4,6.51,48,0.73,Bachelor's,Unemployed,Divorced,No,Yes,Auto,No,0.0


In [4]:
df.shape

(194065, 18)

In [10]:
X=df.drop(columns=['LoanID','Default'],axis=1)
Y=df['Default']

In [8]:
X.head()
print(X.shape)

(194065, 16)


In [11]:
Y.head()
print(Y.shape)

(194065,)


In [12]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 194065 entries, 0 to 194064
Data columns (total 16 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   Age             194065 non-null  int64  
 1   Income          194065 non-null  int64  
 2   LoanAmount      194065 non-null  int64  
 3   CreditScore     194065 non-null  int64  
 4   MonthsEmployed  194065 non-null  int64  
 5   NumCreditLines  194065 non-null  int64  
 6   InterestRate    194065 non-null  float64
 7   LoanTerm        194065 non-null  int64  
 8   DTIRatio        194065 non-null  float64
 9   Education       194064 non-null  object 
 10  EmploymentType  194064 non-null  object 
 11  MaritalStatus   194064 non-null  object 
 12  HasMortgage     194064 non-null  object 
 13  HasDependents   194064 non-null  object 
 14  LoanPurpose     194064 non-null  object 
 15  HasCoSigner     194064 non-null  object 
dtypes: float64(2), int64(7), object(7)
memory usage: 23.7+ M

In [20]:
#seprate the number columns and categorical columns in X
num_cols=X.select_dtypes(include=np.number).columns
cat_cols=X.select_dtypes(exclude=np.number).columns

In [21]:
num_cols

Index(['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed',
       'NumCreditLines', 'InterestRate', 'LoanTerm', 'DTIRatio'],
      dtype='object')

In [22]:
Xnum=X[num_cols]
Xcat=X[cat_cols]

In [23]:
Xnum.head()

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio
0,56,85994,50587,520,80,4,15.23,36,0.44
1,69,50432,124440,458,15,1,4.81,60,0.68
2,46,84208,129188,451,26,3,21.17,24,0.31
3,32,31713,44799,743,0,3,7.07,24,0.23
4,60,20437,9139,633,8,4,6.51,48,0.73


In [24]:
Xcat.head()

Unnamed: 0,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner
0,Bachelor's,Full-time,Divorced,Yes,Yes,Other,Yes
1,Master's,Full-time,Married,No,No,Other,Yes
2,Master's,Unemployed,Divorced,Yes,Yes,Auto,No
3,High School,Full-time,Married,No,No,Business,No
4,Bachelor's,Unemployed,Divorced,No,Yes,Auto,No


In [27]:
Xcat_encoded = pd.get_dummies(Xcat, drop_first=True).astype(int)


In [28]:
Xcat_encoded.head()

Unnamed: 0,Education_High School,Education_Master's,Education_PhD,EmploymentType_Part-time,EmploymentType_Self-employed,EmploymentType_Unemployed,MaritalStatus_Married,MaritalStatus_Single,HasMortgage_Yes,HasDependents_Yes,LoanPurpose_Business,LoanPurpose_Education,LoanPurpose_Home,LoanPurpose_Other,HasCoSigner_Yes
0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1
1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1
2,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0
3,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0
4,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0


In [29]:
X=pd.concat([Xnum,Xcat_encoded],axis=1)

In [30]:
X.isna().sum()

Unnamed: 0,0
Age,0
Income,0
LoanAmount,0
CreditScore,0
MonthsEmployed,0
NumCreditLines,0
InterestRate,0
LoanTerm,0
DTIRatio,0
Education_High School,0


In [32]:
scaler=StandardScaler()
X[num_cols]=scaler.fit_transform(X[num_cols])

In [34]:
X_train,X_teat,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)