In [1]:
# Import Dependencies
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report

In [10]:
# Read In CSV File
credit_data = pd.read_csv('Resources/credit_data.csv')
credit_data.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.2,2371,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828,2,3289000,6.28,36,Personal Loan


In [30]:
# Add A Column That Bucket's Age Groups By 20's, 30's, 40's, 50's, and 60's
# Define conditions for age ranges
conditions = [
    (credit_data['Age'] >= 20) & (credit_data['Age'] <= 29),
    (credit_data['Age'] >= 30) & (credit_data['Age'] <= 39),
    (credit_data['Age'] >= 40) & (credit_data['Age'] <= 49),
    (credit_data['Age'] >= 50) & (credit_data['Age'] <= 59),
    (credit_data['Age'] >= 60) & (credit_data['Age'] <= 69)
]

# Define corresponding labels
labels = ['twenties', 'thirties', 'forties', 'fifties', 'sixties']

# Create new column
credit_data['Age Group'] = np.select(conditions, labels, default='other')

# Preview the result
credit_data[['Age', 'Age Group']].head()

# Save to a new CSV file
credit_data.to_csv('Resources/credit_data_updated.csv', index=False)

In [36]:
# Read In Updated CSV File
credit_data_updated = pd.read_csv('Resources/credit_data_updated.csv')
credit_data_updated.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan,Age Group
0,60,Male,Married,Master,Employed,0.22,2685,2,4675000,2.65,48,Personal Loan,sixties
1,25,Male,Married,High School,Unemployed,0.2,2371,9,3619000,5.19,60,Auto Loan,twenties
2,30,Female,Single,Master,Employed,0.22,2771,6,957000,2.76,12,Auto Loan,thirties
3,58,Female,Married,PhD,Unemployed,0.12,1371,2,4731000,6.57,60,Auto Loan,fifties
4,32,Male,Married,Bachelor,Self-Employed,0.99,828,2,3289000,6.28,36,Personal Loan,thirties


In [32]:
# Inspect the data types for each column and adjust accordingly
credit_data_updated.info()

credit_data_updated[' Loan Amount '] = (
    credit_data_updated[' Loan Amount ']
    .astype(str)
    .str.strip()
    .str.replace(',', '', regex=False)
)

credit_data_updated[' Loan Amount '] = credit_data_updated[' Loan Amount '].astype(int)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Age                        1000 non-null   int64  
 1   Gender                     1000 non-null   object 
 2   Marital Status             1000 non-null   object 
 3   Education Level            1000 non-null   object 
 4   Employment Status          1000 non-null   object 
 5   Credit Utilization Ratio   1000 non-null   float64
 6    Payment History           1000 non-null   object 
 7   Number of Credit Accounts  1000 non-null   int64  
 8    Loan Amount               1000 non-null   int64  
 9   Interest Rate              1000 non-null   float64
 10  Loan Term                  1000 non-null   int64  
 11  Type of Loan               1000 non-null   object 
 12  Age Group                  1000 non-null   object 
dtypes: float64(2), int64(4), object(7)
memory usage: 

In [None]:
# Analysis: Find Count Of Loan Type Per Gender
loan_type_by_gender = credit_data_updated.groupby(['Gender', 'Type of Loan']).size().reset_index(name='Count')
loan_type_by_gender_pivot = loan_type_by_gender.pivot(index='Gender', columns='Type of Loan', values='Count').fillna(0)
print(loan_type_by_gender_pivot)

Type of Loan  Auto Loan  Home Loan  Personal Loan
Gender                                           
Female              168        171            160
Male                180        157            164


In [34]:
# Analysis: Find Average Loan Amount Per Education Status
avg_loan_by_education = credit_data_updated.groupby('Education Level')[' Loan Amount '].mean().sort_values(ascending=False)

avg_loan_by_education_formatted = avg_loan_by_education.apply(lambda x: f"{x:,.0f}")
print(avg_loan_by_education_formatted)

Education Level
Bachelor       2,542,095
Master         2,503,557
PhD            2,470,261
High School    2,370,104
Name:  Loan Amount , dtype: object


In [35]:
# Analysis: Find Average Term Length Per Type Of Loan
avg_term_by_loan_type = credit_data_updated.groupby('Type of Loan')['Loan Term'].mean().sort_values(ascending=False)
print(avg_term_by_loan_type)

Type of Loan
Home Loan        37.573171
Auto Loan        37.103448
Personal Loan    36.703704
Name: Loan Term, dtype: float64


In [None]:
# Use Machine Learning to Predict The Loan Details Of A Specific Demographic
