#**Credit Scoring and Segmentation model using Python**

In [1]:
# import all liabires
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"



In [2]:
data = pd.read_csv('/content/credit_scoring.csv')
data

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.20,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828.0,2,3289000,6.28,36,Personal Loan
...,...,...,...,...,...,...,...,...,...,...,...,...
995,59,Male,Divorced,High School,Employed,0.74,1285.0,8,3530000,12.99,48,Auto Loan
996,64,Male,Divorced,Bachelor,Unemployed,0.77,1857.0,2,1377000,18.02,60,Home Loan
997,63,Female,Single,Master,Self-Employed,0.18,2628.0,10,2443000,18.95,12,Personal Loan
998,51,Female,Married,PhD,Self-Employed,0.32,1142.0,3,1301000,1.80,24,Auto Loan


In [3]:
data.isnull().sum()

Unnamed: 0,0
Age,0
Gender,0
Marital Status,0
Education Level,0
Employment Status,0
Credit Utilization Ratio,0
Payment History,0
Number of Credit Accounts,0
Loan Amount,0
Interest Rate,0


In [4]:
data.describe()

Unnamed: 0,Age,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,42.702,0.50995,1452.814,5.58,2471401.0,10.6866,37.128
std,13.266771,0.291057,827.934146,2.933634,1387047.0,5.479058,17.436274
min,20.0,0.0,0.0,1.0,108000.0,1.01,12.0
25%,31.0,0.25,763.75,3.0,1298000.0,6.0225,24.0
50%,42.0,0.53,1428.0,6.0,2437500.0,10.705,36.0
75%,54.0,0.75,2142.0,8.0,3653250.0,15.44,48.0
max,65.0,1.0,2857.0,10.0,4996000.0,19.99,60.0


In [5]:
# distribution of credit_utilization
credit_utilization_fig = px.box(data, y='Credit Utilization Ratio',
                                title='Credit Utilization Ratio Distribution')
credit_utilization_fig.show()

In [11]:
# Distribution of loan amount
loan_amount_fig = px.histogram(data , x ="Loan Amount" ,nbins =20 , title = 'Loan amount' )
loan_amount_fig.show()

In [12]:
numeric_df = data.select_dtypes(include=['number'])
numeric_df

Unnamed: 0,Age,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term
0,60,0.22,2685.0,2,4675000,2.65,48
1,25,0.20,2371.0,9,3619000,5.19,60
2,30,0.22,2771.0,6,957000,2.76,12
3,58,0.12,1371.0,2,4731000,6.57,60
4,32,0.99,828.0,2,3289000,6.28,36
...,...,...,...,...,...,...,...
995,59,0.74,1285.0,8,3530000,12.99,48
996,64,0.77,1857.0,2,1377000,18.02,60
997,63,0.18,2628.0,10,2443000,18.95,12
998,51,0.32,1142.0,3,1301000,1.80,24


In [13]:
corelation_fig = px.imshow(numeric_df.corr(), title='Correlation Matrix')
corelation_fig.show()

In [17]:
data["Employment Status"].value_counts()

Unnamed: 0_level_0,count
Employment Status,Unnamed: 1_level_1
Self-Employed,347
Employed,328
Unemployed,325


#**Calculating Credit Scores**
The dataset doesn’t have any feature representing the credit scores of individuals. To calculate the credit scores, we need to use an appropriate technique. There are several widely used techniques for calculating credit scores, each with its own calculation process. One example is the FICO score, a commonly used credit scoring model in the industry.

In [18]:
educational_level = {"Master" : 0 , "High School" : 1 , "PhD" : 2 , "Bachelor" : 3 }
educational_status = {"Self-Employed":0,"Employed":1,"Unemployed":2}

In [19]:
data["Employment Status"] = data["Employment Status"].map(educational_status)
data["Education Level"] = data["Education Level"].map(educational_level)

In [20]:
data

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,0,1,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,1,2,0.20,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,0,1,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,2,2,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,3,0,0.99,828.0,2,3289000,6.28,36,Personal Loan
...,...,...,...,...,...,...,...,...,...,...,...,...
995,59,Male,Divorced,1,1,0.74,1285.0,8,3530000,12.99,48,Auto Loan
996,64,Male,Divorced,3,2,0.77,1857.0,2,1377000,18.02,60,Home Loan
997,63,Female,Single,0,0,0.18,2628.0,10,2443000,18.95,12,Personal Loan
998,51,Female,Married,2,0,0.32,1142.0,3,1301000,1.80,24,Auto Loan


In [24]:
# Calcuate the credit score
credit_scores = [] # Changed the variable name to credit_scores
for index ,row in data.iterrows():
  payment_history = row["Payment History"]
  credit_utilization = row["Credit Utilization Ratio"]
  educational_level= row["Education Level"]
  educational_status = row["Employment Status"]
  no_of_credit_acc = row["Number of Credit Accounts"]
  credit_score = (payment_history * 0.35) + (credit_utilization * 30) + (educational_level * 10) + (educational_status * 10) + (no_of_credit_acc * 15) # This line calculates the credit score and assigns it to the variable 'credit_score'
  credit_scores.append(credit_score) # Appending the calculated credit_score to the list 'credit_scores'
data["Credit Score"] = credit_scores # Assigning the list of credit scores to the new 'Credit Score' column in the DataFrame
print(data.head())


   Age  Gender Marital Status  Education Level  Employment Status  \
0   60    Male        Married                0                  1   
1   25    Male        Married                1                  2   
2   30  Female         Single                0                  1   
3   58  Female        Married                2                  2   
4   32    Male        Married                3                  0   

   Credit Utilization Ratio  Payment History  Number of Credit Accounts  \
0                      0.22           2685.0                          2   
1                      0.20           2371.0                          9   
2                      0.22           2771.0                          6   
3                      0.12           1371.0                          2   
4                      0.99            828.0                          2   

   Loan Amount  Interest Rate  Loan Term   Type of Loan  Credit Score  
0      4675000           2.65         48  Personal Loan       

In [25]:
data.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan,Credit Score
0,60,Male,Married,0,1,0.22,2685.0,2,4675000,2.65,48,Personal Loan,986.35
1,25,Male,Married,1,2,0.2,2371.0,9,3619000,5.19,60,Auto Loan,1000.85
2,30,Female,Single,0,1,0.22,2771.0,6,957000,2.76,12,Auto Loan,1076.45
3,58,Female,Married,2,2,0.12,1371.0,2,4731000,6.57,60,Auto Loan,553.45
4,32,Male,Married,3,0,0.99,828.0,2,3289000,6.28,36,Personal Loan,379.5


#**Segmentation Based on Credit Scores**

In [26]:
from sklearn.cluster import KMeans


In [27]:
X = data[["Credit Score"]]
kmeans = KMeans(n_clusters = 4 ,n_init = 10 , random_state=42)
kmeans.fit(X)
data['Segment'] = kmeans.labels_


In [28]:
# Convert the 'Segment' column to category data type
data['Segment'] = data['Segment'].astype('category')

# Visualize the segments using Plotly
fig = px.scatter(data, x=data.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])
fig.update_layout(
    xaxis_title='Customer Index',
    yaxis_title='Credit Score',
    title='Customer Segmentation based on Credit Scores')
fig.show()

In [29]:
data['Segment'] = data['Segment'].map({2: 'Very Low',
                                       0: 'Low',
                                       1: 'Good',
                                       3: "Excellent"})

# Convert the 'Segment' column to category data type
data['Segment'] = data['Segment'].astype('category')

# Visualize the segments using Plotly
fig = px.scatter(data, x=data.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])
fig.update_layout(
    xaxis_title='Customer Index',
    yaxis_title='Credit Score',
    title='Customer Segmentation based on Credit Scores'
)
fig.show()

# **Summary**
Credit scoring and segmentation refer to the process of evaluating the creditworthiness of individuals or businesses and dividing them into distinct groups based on their credit profiles. It aims to assess the likelihood of borrowers repaying their debts and helps financial institutions make informed decisions regarding lending and managing credit risk. I hope you liked this article on Credit Scoring and Segmentation using Python. Feel free to ask valuable questions in the comments section below.