# Bank Loan Analysis Project

#### Importing Libraries

In [4]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import plotly.express as px

#### Loading Dataset

In [5]:
df=pd.read_csv('financial_loan.csv')
df.sample(10)

Unnamed: 0,id,address_state,application_type,emp_length,emp_title,grade,home_ownership,issue_date,last_credit_pull_date,last_payment_date,...,sub_grade,term,verification_status,annual_income,dti,installment,int_rate,loan_amount,total_acc,total_payment
37203,547816,NY,INDIVIDUAL,3 years,Sam Ash Music,D,RENT,10-07-2021,15-07-2021,15-08-2021,...,D5,60 months,Verified,24000.0,0.0925,336.72,0.1632,13750,13,20203
6882,1060481,FL,INDIVIDUAL,1 year,Kendall Regional Medical Center,B,RENT,11-12-2021,14-12-2021,14-12-2021,...,B2,36 months,Not Verified,26000.0,0.0978,325.74,0.1065,10000,12,11726
29506,810924,OH,INDIVIDUAL,< 1 year,Honda R&D,B,RENT,11-07-2021,14-07-2021,14-07-2021,...,B2,36 months,Source Verified,60000.0,0.1684,292.91,0.1059,9000,10,10545
31570,623481,CA,INDIVIDUAL,10+ years,Port of Oakland,C,MORTGAGE,10-12-2021,15-12-2021,15-12-2021,...,C3,60 months,Verified,140000.0,0.0426,408.24,0.1298,25000,17,24494
23694,782452,MD,INDIVIDUAL,5 years,US Army Corps of Engineers,C,RENT,11-06-2021,12-12-2021,12-12-2021,...,C1,60 months,Verified,74872.0,0.2269,454.96,0.1299,20000,23,23206
8416,881995,OR,INDIVIDUAL,5 years,Huron Consulting Group,A,MORTGAGE,11-09-2021,16-02-2021,14-10-2021,...,A5,36 months,Not Verified,53808.0,0.1588,190.52,0.089,6000,15,6859
4653,863348,MN,INDIVIDUAL,2 years,"Cargill, Incorporated",A,MORTGAGE,11-08-2021,16-05-2021,14-09-2021,...,A4,36 months,Verified,71500.0,0.1328,466.53,0.0749,15000,19,16795
14025,576013,OH,INDIVIDUAL,< 1 year,Centurum,C,MORTGAGE,10-09-2021,14-01-2021,13-02-2021,...,C5,36 months,Source Verified,105000.0,0.0888,207.18,0.1472,6000,16,7390
33297,997677,WA,INDIVIDUAL,8 years,University of Washington Medical Center,D,RENT,11-10-2021,15-05-2021,14-11-2021,...,D1,36 months,Not Verified,68736.0,0.0716,42.37,0.1629,1200,15,1525
22602,862325,TX,INDIVIDUAL,< 1 year,Apple Computer,B,MORTGAGE,11-09-2021,16-05-2021,15-05-2021,...,B4,60 months,Verified,63000.0,0.2434,440.86,0.1149,20050,24,25919


#### Basic Analysis

In [6]:
df.isnull().sum()

id                          0
address_state               0
application_type            0
emp_length                  0
emp_title                1438
grade                       0
home_ownership              0
issue_date                  0
last_credit_pull_date       0
last_payment_date           0
loan_status                 0
next_payment_date           0
member_id                   0
purpose                     0
sub_grade                   0
term                        0
verification_status         0
annual_income               0
dti                         0
installment                 0
int_rate                    0
loan_amount                 0
total_acc                   0
total_payment               0
dtype: int64

In [7]:
df.shape

(38576, 24)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38576 entries, 0 to 38575
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   id                     38576 non-null  int64         
 1   address_state          38576 non-null  object        
 2   application_type       38576 non-null  object        
 3   emp_length             38576 non-null  object        
 4   emp_title              37138 non-null  object        
 5   grade                  38576 non-null  object        
 6   home_ownership         38576 non-null  object        
 7   issue_date             38576 non-null  datetime64[ns]
 8   last_credit_pull_date  38576 non-null  datetime64[ns]
 9   last_payment_date      38576 non-null  datetime64[ns]
 10  loan_status            38576 non-null  object        
 11  next_payment_date      38576 non-null  datetime64[ns]
 12  member_id              38576 non-null  int64         
 13  p

In [18]:
# Identify the columns to be converted to datetime
date_cols=['issue_date','last_credit_pull_date','last_payment_date','next_payment_date']

# Convert the columns to datetime
for col in date_cols:
    df[col]=pd.to_datetime(df[col], format='%d-%m-%Y', errors='coerce')

# Verify the data types of the columns
print(df[date_cols].dtypes)

# Display the first 5 rows of the dataframe with the converted columns
print(df[date_cols].head())

issue_date               datetime64[ns]
last_credit_pull_date    datetime64[ns]
last_payment_date        datetime64[ns]
next_payment_date        datetime64[ns]
dtype: object
  issue_date last_credit_pull_date last_payment_date next_payment_date
0 2021-02-11            2021-09-13        2021-04-13        2021-05-13
1 2021-01-01            2021-12-14        2021-01-15        2021-02-15
2 2021-01-05            2021-12-12        2021-01-09        2021-02-09
3 2021-02-25            2021-12-12        2021-03-12        2021-04-12
4 2021-01-01            2021-12-14        2021-01-15        2021-02-15


In [15]:
df.to_csv('financial_loan.csv', index=False)

In [19]:
df.describe()

Unnamed: 0,id,issue_date,last_credit_pull_date,last_payment_date,next_payment_date,member_id,annual_income,dti,installment,int_rate,loan_amount,total_acc,total_payment
count,38576.0,38576,38576,38576,38576,38576.0,38576.0,38576.0,38576.0,38576.0,38576.0,38576.0,38576.0
mean,681037.1,2021-07-16 02:31:35.562007040,2021-06-08 13:36:34.193280512,2021-06-26 09:52:08.909166080,2021-07-26 20:42:20.605557760,847651.5,69644.54,0.133274,326.862965,0.120488,11296.066855,22.132544,12263.348533
min,54734.0,2021-01-01 00:00:00,2021-01-08 00:00:00,2021-01-08 00:00:00,2021-02-08 00:00:00,70699.0,4000.0,0.0,15.69,0.0542,500.0,2.0,34.0
25%,513517.0,2021-04-11 00:00:00,2021-04-15 00:00:00,2021-03-16 00:00:00,2021-04-16 00:00:00,662978.8,41500.0,0.0821,168.45,0.0932,5500.0,14.0,5633.0
50%,662728.0,2021-07-11 00:00:00,2021-05-16 00:00:00,2021-06-14 00:00:00,2021-07-14 00:00:00,847356.5,60000.0,0.1342,283.045,0.1186,10000.0,20.0,10042.0
75%,836506.0,2021-10-11 00:00:00,2021-08-13 00:00:00,2021-09-15 00:00:00,2021-10-15 00:00:00,1045652.0,83200.5,0.1859,434.4425,0.1459,15000.0,29.0,16658.0
max,1077501.0,2021-12-12 00:00:00,2022-01-20 00:00:00,2021-12-15 00:00:00,2022-01-15 00:00:00,1314167.0,6000000.0,0.2999,1305.19,0.2459,35000.0,90.0,58564.0
std,211324.6,,,,,266810.5,64293.68,0.066662,209.092,0.037164,7460.746022,11.392282,9051.104777


### Problem Solving

#### Total Loan Applications

In [23]:
total_loan_app=df['id'].count()
print('Total loan applicants:-',total_loan_app)

Total loan applicants:- 38576


#### MTD Loan Applications

In [31]:
latest_issue_date=df['issue_date'].max()
latest_year=latest_issue_date.year
latest_month=latest_issue_date.month

mtd_data=df[(df['issue_date'].dt.year==latest_year) & 
              (df['issue_date'].dt.month==latest_month)]
mtd_loan_app=mtd_data['id'].count()

print(f"MTD Loan Applications(for {latest_issue_date.strftime('%B %Y')}):- {mtd_loan_app}")

MTD Loan Applications(for December 2021):- 4314


#### Total Funded Amount

In [40]:
total_fund=df['loan_amount'].sum()/1000000
print('Total Funded Amount:- ${:.2f}M'.format(total_fund))

Total Funded Amount:- $435.76M


#### MTD - Total Funded Amount

In [43]:
latest_issue_date=df['issue_date'].max()
latest_year=latest_issue_date.year
latest_month=latest_issue_date.month

mtd_data=df[(df['issue_date'].dt.year==latest_year) & 
              (df['issue_date'].dt.month==latest_month)]

total_fund=mtd_data['loan_amount'].sum()/1000000

print('MTD Total Funded Amount:- ${:.2f}M'.format(total_fund))

MTD Total Funded Amount:- $53.98M


#### MTD Total Amount Received

In [45]:
latest_issue_date=df['issue_date'].max()
latest_year=latest_issue_date.year
latest_month=latest_issue_date.month

mtd_data=df[(df['issue_date'].dt.year==latest_year) & 
              (df['issue_date'].dt.month==latest_month)]

total_received=mtd_data['total_payment'].sum()/1000000

print('MTD Total Funded Amount:- ${:.2f}M'.format(total_received))

MTD Total Funded Amount:- $58.07M


#### Avg Interest Rate

In [51]:
avg_int_rate=df['int_rate'].mean()*100
print('Avg Interest Rate:- {:.2f}%'.format(avg_int_rate))

Avg Interest Rate:- 12.05%


#### Avg Debt-to-Income Ratio(DTI)

In [55]:
avg_dti=df['dti'].mean()*100
print('Avg DTI:- {:.2f}'.format(avg_dti))

Avg DTI:- 13.33


#### Good Loan Metrics

In [56]:
good_loans=df[df['loan_status'].isin(['Fully Paid','Current'])]

total_loan_app=df['id'].count()

good_loan_app=good_loans['id'].count()
good_loan_fund=good_loans['loan_amount'].sum()/1000000
good_loan_received=good_loans['total_payment'].sum()/1000000

gl_perc=(good_loan_app/total_loan_app)*100

print('Good Loan App:-',good_loan_app)
print('Good Loan Fund:- ${:.2f}M'.format(good_loan_fund))
print('Good Loan Received:- ${:.2f}M'.format(good_loan_received))
print('Perc of Good Loan App:- {:.2f}%'.format(gl_perc))


Good Loan App:- 33243
Good Loan Fund:- $370.22M
Good Loan Received:- $435.79M
Perc of Good Loan App:- 86.18%
