# Bank Loan Analysis Project

#### Importing Libraries

In [27]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import plotly.express as px     

#### Loading Dataset

In [30]:
df=pd.read_csv('financial_loan.csv')
df.sample(10)

Unnamed: 0,id,address_state,application_type,emp_length,emp_title,grade,home_ownership,issue_date,last_credit_pull_date,last_payment_date,...,sub_grade,term,verification_status,annual_income,dti,installment,int_rate,loan_amount,total_acc,total_payment
11723,509214,MA,INDIVIDUAL,10+ years,IBM,C,RENT,10-04-2021,16-04-2021,11-11-2021,...,C2,36 months,Not Verified,53544.0,0.1407,60.75,0.1311,1800,23,2081
25215,863854,OH,INDIVIDUAL,10+ years,Pilkiington North America,A,MORTGAGE,11-08-2021,14-09-2021,14-09-2021,...,A1,36 months,Not Verified,120000.0,0.0715,437.32,0.0542,14500,28,15743
8342,1027758,MN,INDIVIDUAL,10+ years,Fairview Health Services,A,MORTGAGE,11-11-2021,16-04-2021,12-06-2021,...,A4,36 months,Not Verified,85000.0,0.1272,469.36,0.079,15000,53,15367
24606,884223,FL,INDIVIDUAL,2 years,iGov Inc,B,RENT,11-09-2021,16-05-2021,16-05-2021,...,B4,60 months,Verified,125004.0,0.1858,500.24,0.1242,30000,29,27501
25687,524037,PA,INDIVIDUAL,4 years,Leeds,B,MORTGAGE,10-06-2021,14-01-2021,11-04-2021,...,B4,36 months,Not Verified,21000.0,0.1811,98.92,0.1149,3000,7,3256
35771,383164,GA,INDIVIDUAL,9 years,Beauty Mart Supplies,A,MORTGAGE,09-03-2021,12-07-2021,12-03-2021,...,A5,36 months,Not Verified,48000.0,0.2452,318.54,0.0963,9925,14,11467
31024,776547,CA,INDIVIDUAL,< 1 year,Kforce Professional / Kaiser Permanente,D,RENT,11-06-2021,16-03-2021,16-03-2021,...,D3,60 months,Verified,47839.92,0.1914,368.69,0.1649,15000,32,22061
4512,551889,IL,INDIVIDUAL,< 1 year,MSpace,E,RENT,10-07-2021,16-05-2021,12-04-2021,...,E1,36 months,Source Verified,65000.0,0.1215,530.7,0.1645,15000,12,18192
15571,892098,MI,INDIVIDUAL,8 years,ALLIANT CREDIT UNION,C,RENT,11-09-2021,16-05-2021,13-04-2021,...,C1,36 months,Source Verified,90000.0,0.1963,237.52,0.1349,7000,23,8127
3055,565538,CA,INDIVIDUAL,4 years,Stargate Studios,B,RENT,10-08-2021,13-08-2021,13-09-2021,...,B5,36 months,Not Verified,50000.0,0.1519,92.82,0.1186,2800,19,3342


#### Basic Analysis

In [31]:
df.isnull().sum()

id                          0
address_state               0
application_type            0
emp_length                  0
emp_title                1438
grade                       0
home_ownership              0
issue_date                  0
last_credit_pull_date       0
last_payment_date           0
loan_status                 0
next_payment_date           0
member_id                   0
purpose                     0
sub_grade                   0
term                        0
verification_status         0
annual_income               0
dti                         0
installment                 0
int_rate                    0
loan_amount                 0
total_acc                   0
total_payment               0
dtype: int64

In [32]:
df.shape

(38576, 24)

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38576 entries, 0 to 38575
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     38576 non-null  int64  
 1   address_state          38576 non-null  object 
 2   application_type       38576 non-null  object 
 3   emp_length             38576 non-null  object 
 4   emp_title              37138 non-null  object 
 5   grade                  38576 non-null  object 
 6   home_ownership         38576 non-null  object 
 7   issue_date             38576 non-null  object 
 8   last_credit_pull_date  38576 non-null  object 
 9   last_payment_date      38576 non-null  object 
 10  loan_status            38576 non-null  object 
 11  next_payment_date      38576 non-null  object 
 12  member_id              38576 non-null  int64  
 13  purpose                38576 non-null  object 
 14  sub_grade              38576 non-null  object 
 15  te

In [34]:
# Identify the columns to be converted to datetime
date_cols=['issue_date','last_credit_pull_date','last_payment_date','next_payment_date']

# Convert the columns to datetime
for col in date_cols:
    df[col]=pd.to_datetime(df[col], format='%d-%m-%Y', errors='coerce')

# Verify the data types of the columns
print(df[date_cols].dtypes)

# Display the first 5 rows of the dataframe with the converted columns
print(df[date_cols].head())

issue_date               datetime64[ns]
last_credit_pull_date    datetime64[ns]
last_payment_date        datetime64[ns]
next_payment_date        datetime64[ns]
dtype: object
  issue_date last_credit_pull_date last_payment_date next_payment_date
0 2021-02-11            2021-09-13        2021-04-13        2021-05-13
1 2021-01-01            2021-12-14        2021-01-15        2021-02-15
2 2021-01-05            2021-12-12        2021-01-09        2021-02-09
3 2021-02-25            2021-12-12        2021-03-12        2021-04-12
4 2021-01-01            2021-12-14        2021-01-15        2021-02-15


In [35]:
df.to_csv('financial_loan.csv', index=False)

In [36]:
df.describe()

Unnamed: 0,id,issue_date,last_credit_pull_date,last_payment_date,next_payment_date,member_id,annual_income,dti,installment,int_rate,loan_amount,total_acc,total_payment
count,38576.0,38576,38576,38576,38576,38576.0,38576.0,38576.0,38576.0,38576.0,38576.0,38576.0,38576.0
mean,681037.1,2021-07-16 02:31:35.562007040,2021-06-08 13:36:34.193280512,2021-06-26 09:52:08.909166080,2021-07-26 20:42:20.605557760,847651.5,69644.54,0.133274,326.862965,0.120488,11296.066855,22.132544,12263.348533
min,54734.0,2021-01-01 00:00:00,2021-01-08 00:00:00,2021-01-08 00:00:00,2021-02-08 00:00:00,70699.0,4000.0,0.0,15.69,0.0542,500.0,2.0,34.0
25%,513517.0,2021-04-11 00:00:00,2021-04-15 00:00:00,2021-03-16 00:00:00,2021-04-16 00:00:00,662978.8,41500.0,0.0821,168.45,0.0932,5500.0,14.0,5633.0
50%,662728.0,2021-07-11 00:00:00,2021-05-16 00:00:00,2021-06-14 00:00:00,2021-07-14 00:00:00,847356.5,60000.0,0.1342,283.045,0.1186,10000.0,20.0,10042.0
75%,836506.0,2021-10-11 00:00:00,2021-08-13 00:00:00,2021-09-15 00:00:00,2021-10-15 00:00:00,1045652.0,83200.5,0.1859,434.4425,0.1459,15000.0,29.0,16658.0
max,1077501.0,2021-12-12 00:00:00,2022-01-20 00:00:00,2021-12-15 00:00:00,2022-01-15 00:00:00,1314167.0,6000000.0,0.2999,1305.19,0.2459,35000.0,90.0,58564.0
std,211324.6,,,,,266810.5,64293.68,0.066662,209.092,0.037164,7460.746022,11.392282,9051.104777


### Problem Solving

#### Total Loan Applications

In [37]:
total_loan_app=df['id'].count()
print('Total loan applicants:-',total_loan_app)

Total loan applicants:- 38576


#### MTD Loan Applications

In [38]:
latest_issue_date=df['issue_date'].max()
latest_year=latest_issue_date.year
latest_month=latest_issue_date.month

mtd_data=df[(df['issue_date'].dt.year==latest_year) & 
              (df['issue_date'].dt.month==latest_month)]
mtd_loan_app=mtd_data['id'].count()

print(f"MTD Loan Applications(for {latest_issue_date.strftime('%B %Y')}):- {mtd_loan_app}")

MTD Loan Applications(for December 2021):- 4314


#### Total Funded Amount

In [39]:
total_fund=df['loan_amount'].sum()/1000000
print('Total Funded Amount:- ${:.2f}M'.format(total_fund))

Total Funded Amount:- $435.76M


#### MTD - Total Funded Amount

In [40]:
latest_issue_date=df['issue_date'].max()
latest_year=latest_issue_date.year
latest_month=latest_issue_date.month

mtd_data=df[(df['issue_date'].dt.year==latest_year) & 
              (df['issue_date'].dt.month==latest_month)]

total_fund=mtd_data['loan_amount'].sum()/1000000

print('MTD Total Funded Amount:- ${:.2f}M'.format(total_fund))

MTD Total Funded Amount:- $53.98M


#### MTD Total Amount Received

In [41]:
latest_issue_date=df['issue_date'].max()
latest_year=latest_issue_date.year
latest_month=latest_issue_date.month

mtd_data=df[(df['issue_date'].dt.year==latest_year) & 
              (df['issue_date'].dt.month==latest_month)]

total_received=mtd_data['total_payment'].sum()/1000000

print('MTD Total Funded Amount:- ${:.2f}M'.format(total_received))

MTD Total Funded Amount:- $58.07M


#### Avg Interest Rate

In [42]:
avg_int_rate=df['int_rate'].mean()*100
print('Avg Interest Rate:- {:.2f}%'.format(avg_int_rate))

Avg Interest Rate:- 12.05%


#### Avg Debt-to-Income Ratio(DTI)

In [43]:
avg_dti=df['dti'].mean()*100
print('Avg DTI:- {:.2f}'.format(avg_dti))

Avg DTI:- 13.33


#### Good Loan Metrics

In [44]:
good_loans=df[df['loan_status'].isin(['Fully Paid','Current'])]

total_loan_app=df['id'].count()

good_loan_app=good_loans['id'].count()
good_loan_fund=good_loans['loan_amount'].sum()/1000000
good_loan_received=good_loans['total_payment'].sum()/1000000

gl_perc=(good_loan_app/total_loan_app)*100

print('Good Loan App:-',good_loan_app)
print('Good Loan Fund:- ${:.2f}M'.format(good_loan_fund))
print('Good Loan Received:- ${:.2f}M'.format(good_loan_received))
print('Perc of Good Loan App:- {:.2f}%'.format(gl_perc))


Good Loan App:- 33243
Good Loan Fund:- $370.22M
Good Loan Received:- $435.79M
Perc of Good Loan App:- 86.18%


In [45]:
df

Unnamed: 0,id,address_state,application_type,emp_length,emp_title,grade,home_ownership,issue_date,last_credit_pull_date,last_payment_date,...,sub_grade,term,verification_status,annual_income,dti,installment,int_rate,loan_amount,total_acc,total_payment
0,1077430,GA,INDIVIDUAL,< 1 year,Ryder,C,RENT,2021-02-11,2021-09-13,2021-04-13,...,C4,60 months,Source Verified,30000.0,0.0100,59.83,0.1527,2500,4,1009
1,1072053,CA,INDIVIDUAL,9 years,MKC Accounting,E,RENT,2021-01-01,2021-12-14,2021-01-15,...,E1,36 months,Source Verified,48000.0,0.0535,109.43,0.1864,3000,4,3939
2,1069243,CA,INDIVIDUAL,4 years,Chemat Technology Inc,C,RENT,2021-01-05,2021-12-12,2021-01-09,...,C5,36 months,Not Verified,50000.0,0.2088,421.65,0.1596,12000,11,3522
3,1041756,TX,INDIVIDUAL,< 1 year,barnes distribution,B,MORTGAGE,2021-02-25,2021-12-12,2021-03-12,...,B2,60 months,Source Verified,42000.0,0.0540,97.06,0.1065,4500,9,4911
4,1068350,IL,INDIVIDUAL,10+ years,J&J Steel Inc,A,MORTGAGE,2021-01-01,2021-12-14,2021-01-15,...,A1,36 months,Verified,83000.0,0.0231,106.53,0.0603,3500,28,3835
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38571,803452,NJ,INDIVIDUAL,< 1 year,Joseph M Sanzari Company,C,MORTGAGE,2021-07-11,2021-05-16,2021-05-16,...,C1,60 months,Verified,100000.0,0.1986,551.64,0.1299,24250,33,31946
38572,970377,NY,INDIVIDUAL,8 years,Swat Fame,C,RENT,2021-10-11,2021-04-16,2021-05-16,...,C1,60 months,Verified,50000.0,0.0458,579.72,0.1349,25200,18,31870
38573,875376,CA,INDIVIDUAL,5 years,Anaheim Regional Medical Center,D,RENT,2021-09-11,2021-05-16,2021-05-16,...,D5,60 months,Verified,65000.0,0.1734,627.93,0.1749,25000,20,35721
38574,972997,NY,INDIVIDUAL,5 years,Brooklyn Radiology,D,RENT,2021-10-11,2021-05-16,2021-05-16,...,D5,60 months,Verified,368000.0,0.0009,612.72,0.1825,24000,9,33677
