In [91]:
#Load the libraryss
import pandas as pd #To work with dataset
import numpy as np #Math library
import seaborn as sns #Graph library that use matplot in background
import matplotlib.pyplot as plt #to plot some parameters in seaborn
import plotly.express as px

In [92]:
# Look at the data

loan_inp = pd.read_csv("./loan.csv",low_memory=False)
loan_inp.head()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit
0,1077501,1296599,5000,5000,4975.0,36 months,10.65%,162.87,B,B2,...,,,,,0.0,0.0,,,,
1,1077430,1314167,2500,2500,2500.0,60 months,15.27%,59.83,C,C4,...,,,,,0.0,0.0,,,,
2,1077175,1313524,2400,2400,2400.0,36 months,15.96%,84.33,C,C5,...,,,,,0.0,0.0,,,,
3,1076863,1277178,10000,10000,10000.0,36 months,13.49%,339.31,C,C1,...,,,,,0.0,0.0,,,,
4,1075358,1311748,3000,3000,3000.0,60 months,12.69%,67.79,B,B5,...,,,,,0.0,0.0,,,,


In [93]:
# understand the data
loan_inp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39717 entries, 0 to 39716
Columns: 111 entries, id to total_il_high_credit_limit
dtypes: float64(74), int64(13), object(24)
memory usage: 33.6+ MB


#**Data understanding**
1. Employment length 0 means the customer is employeed between 0-1 year.
2. All records in the given data is of individual application type.
3. Loan status is of 3 types: <br>
  a. **Fully paid**: Applicant has fully paid the loan.<br />
  b. **Current**: Applicant is in the process of paying the instalments, i.e. the tenure of the loan is not yet completed. These candidates are not labelled as 'defaulted'. <br />
  c. **Charged-off**: Applicant has not paid the instalments in due time for a long period of time, i.e. he/she has defaulted on the loan 
4. Customers has different type of Home ownership: RENT, OWN, MORTGAGE, OTHER.









#**Data Cleaning**

---

1. Remove the columns that only contains the NA values.
2. Remove the columns that has more than 50% NA values.
3. Remove columns that has more than 50% empty values.
4. Standardise Precision
5. Fixing rows and columns.
6. Remove Outliers
7. Remove those rows where Funded amount by investers is less than the Loan amount applied by the customer.
8. Remove irrevelant Data.
9. Convert the Interest Rate column to numeric value.

In [94]:
# Data cleaning

# Remove columns that only contains NA values.
loan = loan_inp.dropna(axis=1, how='all', inplace=False)
print(loan_inp.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39717 entries, 0 to 39716
Columns: 111 entries, id to total_il_high_credit_limit
dtypes: float64(74), int64(13), object(24)
memory usage: 33.6+ MB
None


In [95]:
# Remove the columns that has more than 50% NA values
print(len(loan.columns))
loan = loan[loan.columns[loan.isna().mean() < 0.5]]
print(len(loan.columns))

# 3 columns are removed.

57
54


In [96]:
# Remove columns that has more than 50% empty values.
loan = loan[loan.columns[loan.isnull().mean() < 0.5]]


In [97]:
# Standardise Precision

# get the list of columns that has float data type.
float_columns = loan.select_dtypes(include=[float])

# Iterate on the columns and round their values to two decimal places.
for col in float_columns.columns:
    loan[col] = loan[col].round(2);

In [98]:
# Fixing rows and columns

# Create a separate year column from issue_d column which is of the format mm-yy
loan["issue_year"] = loan["issue_d"].apply(lambda x: x[-2:])

# Create a separate column for missue month.
loan["issue_month"] = loan["issue_d"].apply(lambda x: x[:-3])

# Truncate a months string from a term column and make the column of type int
loan["term_int"] = loan["term"].apply(lambda x: int(x[0:3]))

In [99]:
# Current loan status is irrelevant in analysing the data.
# Hence remove those rows containing the Current as loan status
print(loan.shape)
loan.drop(loan[loan["loan_status"] == "Current"].index, inplace=True)
print(loan.shape)

(39717, 57)
(38577, 57)


In [100]:
# Remove Outliers

# Get the outliers of the loan_status vs loan_amnt 
fig = px.box(loan, x="loan_status", y="loan_amnt")
fig.show()

# Figure shows ChargedOff doesn't has the outliers

# Fig of Fully Paid (loan status) shows it has some outliers.Hence remove it.
fully_paid_percentile = loan.groupby("loan_status")["loan_amnt"].quantile(0.95)["Fully Paid"]
charged_off_percentile = loan.groupby("loan_status")["loan_amnt"].quantile(0.95)["Charged Off"]
print("Charged Off P95", charged_off_percentile)


q3 = loan.groupby("loan_status")["loan_amnt"].quantile(0.75)["Fully Paid"]
q1 = loan.groupby("loan_status")["loan_amnt"].quantile(0.25)["Fully Paid"]
IQR = q3 - q1
upper = q3 + 1.5*IQR
lower = q1 - 1.5*IQR
print("Upper Bound for Qutlier for Fully Paid", upper)
print("Upper Bound for Qutlier for Fully Paid", lower)

print("Fully Paid P95",fully_paid_percentile)

q3_charged_off = loan.groupby("loan_status")["loan_amnt"].quantile(0.75)["Charged Off"]
q1_charged_off = loan.groupby("loan_status")["loan_amnt"].quantile(0.25)["Charged Off"]
IQR_Charged_off = q3_charged_off - q1_charged_off
upper_charged_off = q3_charged_off + 1.5*IQR_Charged_off
lower_charged_off = q1_charged_off - 1.5*IQR_Charged_off
print("Upper Bound for Qutlier for Charged Off", upper_charged_off)
print("Upper Bound for Qutlier for Charged Off", lower_charged_off)

# P95 of loan_status looks like this:
# Charged Off    34606.25
# Current        35000.00
# Fully Paid     30000.00

#Cleaning the values more than P95 of Fully Paid loan status
loan.drop(loan[(loan["loan_status"] == "Fully Paid") & (loan["loan_amnt"] > fully_paid_percentile)].index, inplace=True)

#Cleaning the values more than P95 of Charged Off  loan status
loan.drop(loan[(loan["loan_status"] == "Charged Off") & (loan["loan_amnt"] > charged_off_percentile)].index, inplace=True)

Charged Off P95 27300.0
Upper Bound for Qutlier for Fully Paid 29700.0
Upper Bound for Qutlier for Fully Paid -9500.0
Fully Paid P95 25000.0
Upper Bound for Qutlier for Charged Off 32850.0
Upper Bound for Qutlier for Charged Off -10750.0


In [101]:
# Remove the outliers from the annual income.

fig = px.box(loan, x="loan_status", y="annual_inc")
fig.show()

# loan_temp = loan

q3 = loan.groupby("loan_status")["annual_inc"].quantile(0.75)["Fully Paid"]
q1 = loan.groupby("loan_status")["annual_inc"].quantile(0.25)["Fully Paid"]
IQR = q3 - q1
upper = q3 + 1.5*IQR
lower = q1 - 1.5*IQR

fully_paid_percentile = loan.groupby("loan_status")["annual_inc"].quantile(0.95)["Fully Paid"]
charged_off_percentile = loan.groupby("loan_status")["annual_inc"].quantile(0.95)["Charged Off"]

print(fully_paid_percentile, charged_off_percentile)

loan.drop(loan[(loan["loan_status"] == "Fully Paid") & (loan["annual_inc"] > fully_paid_percentile)].index, inplace=True)
loan.drop(loan[(loan["loan_status"] == "Charged Off") & (loan["annual_inc"] > charged_off_percentile)].index, inplace=True)

fig = px.box(loan, x="loan_status", y="annual_inc")
fig.show()

139000.0 123998.6


In [102]:
# Convert the Interest Rate column to numeric value.

loan["int_rate"] = loan["int_rate"].apply(lambda x: str(x)[:-1])
loan["int_rate"] = pd.to_numeric(loan["int_rate"], errors='coerce')

#**Data Analysis**

---




In [103]:
# Describe the data

# Describe the Loan Amoun applied by borrower.
print(loan.describe()["loan_amnt"].round(2))
# Average Loan applied is: 11219
# Median of amount is 10,000

count    35328.00
mean     10017.81
std       6084.79
min        500.00
25%       5000.00
50%       9000.00
75%      14000.00
max      27300.00
Name: loan_amnt, dtype: float64


**Good and Bad customer count in the data**

In [104]:
import plotly.offline as py 
py.init_notebook_mode(connected=True) # this code, allow us to work with offline plotly version
import plotly.graph_objs as go # it's like "plt" of matplot
import plotly.tools as tls # It's useful to we get some tools of plotly
import warnings # This library will be used to ignore some warnings
from collections import Counter # To do counter of some features
import plotly.io as pio
pio.renderers.default = "colab" #Colab configs for plotly

tr0 = go.Bar(
            x = loan[loan["loan_status"]== 'Fully Paid']["loan_status"].value_counts().index.values,
            y = loan[loan["loan_status"]== 'Fully Paid']["loan_status"].value_counts().values,
            name='Fully Paid'
    )

tr1 = go.Bar(
            x = loan[loan["loan_status"]== 'Charged Off']["loan_status"].value_counts().index.values,
            y = loan[loan["loan_status"]== 'Charged Off']["loan_status"].value_counts().values,
            name='Charged Off'
    )

data = [tr0, tr1]

layout = go.Layout(
    
)

layout = go.Layout(
    yaxis=dict(
        title='Count'
    ),
    xaxis=dict(
        title='Loan Status'
    ),
    title='Dependent variable distribution'
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='grouped-bar')

**Home Distribution for Loan Status**

In [120]:
FullyPaid_home_ownership = loan[loan["loan_status"]== 'Fully Paid']["home_ownership"].value_counts().values
charged_off_home_ownership = loan[loan["loan_status"]== 'Charged Off']["home_ownership"].value_counts().values

LA_SF_percent_change = [100*(SF_count - LA_count) / SF_count for SF_count, LA_count in zip(FullyPaid_home_ownership, charged_off_home_ownership)]

#First plot
tr0 = go.Bar(
    x = loan[loan["loan_status"]== 'Fully Paid']["home_ownership"].value_counts().index.values,
    y = FullyPaid_home_ownership,
    text=[f"{percent_change:.0f}%" if percent_change > 0 else f"{percent_change:.0f}%" 
            for percent_change in LA_SF_percent_change ],
        textposition='inside',
        textfont_size=18,
        textfont_color='blue',
    name='Fully Paid'
)

#Second plot
tr1 = go.Bar(
    x = loan[loan["loan_status"]== 'Charged Off']["home_ownership"].value_counts().index.values,
    y = charged_off_home_ownership,
    name="Charged Off"
)

data = [tr0, tr1]

layout = go.Layout(
    title='Home Distribuition'
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Home-Grouped')

**Distribution of Fully Paid and Charged Off loan status with the term frequency**



1.   We can see the More is the loan term there are high chances of having the loan getting charged off. I.e 60 months term has more charged off loan status
2.   Fully Paid loan status is maximum with lower loan term, i.e 36 months.



In [106]:
df_good = loan.loc[loan["loan_status"] == 'Fully Paid']['term_int'].values.tolist()
df_bad = loan.loc[loan["loan_status"] == 'Charged Off']['term_int'].values.tolist()

tr0 = go.Histogram(
    x=df_good,
    xbins=dict(start=np.min(df_good), size=0.50, end=np.max(df_good)+1),
    histnorm='probability',
    name="Fully Paid"
)

#Second plot
tr1 = go.Histogram(
    x=df_bad,
    xbins=dict(start=np.min(df_bad), size=0.50, end=np.max(df_bad)+1),
    histnorm='probability',
    name="Charged Off"
)

fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
                          subplot_titles=('Good','Bad'))

#setting the figs
fig.append_trace(tr0, 1, 1)
fig.append_trace(tr1, 1, 2)
# fig.append_trace(tr2, 2, 1)

fig['layout'].update(showlegend=True, title='Term Distribuition', bargap=0.05)
py.iplot(fig, filename='custom-sized-subplot-with-subplot-titles')


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



**Employement length vs Loan Status**


1.   We can observe more the tenure of employement less are the numbers of defaulters.
2. 10+ Years experience person with higher loan amount are more likely to defaulters.



In [124]:
FullyPaid_emp_length = loan[loan["loan_status"]== 'Fully Paid']["emp_length"].value_counts().values
charged_off_emp_length = loan[loan["loan_status"]== 'Charged Off']["emp_length"].value_counts().values

LA_SF_percent_change = [100*(SF_count - LA_count) / SF_count for SF_count, LA_count in zip(FullyPaid_emp_length, charged_off_emp_length)]


tr0 = go.Bar(
            x = loan[loan["loan_status"]== 'Fully Paid']["emp_length"].value_counts().index.values,
            y = loan[loan["loan_status"]== 'Fully Paid']["emp_length"].value_counts().values,
            text=[f"{percent_change:.0f}%" if percent_change > 0 else f"{percent_change:.0f}%" 
            for percent_change in LA_SF_percent_change ],
        textposition='outside',
        textfont_size=18,
        textfont_color='blue',
            name='Fully Paid'
    )

tr1 = go.Bar(
            x = loan[loan["loan_status"]== 'Charged Off']["emp_length"].value_counts().index.values,
            y = loan[loan["loan_status"]== 'Charged Off']["emp_length"].value_counts().values,
            name='Charged Off'
    )

tr2 = go.Box(
    x = loan[loan["loan_status"]== 'Fully Paid']["emp_length"],
    y = loan[loan["loan_status"]== 'Fully Paid']["loan_amnt"],
    name=tr0.name
)

#Second plot 2
tr3 = go.Box(
    x = loan[loan["loan_status"]== 'Charged Off']["emp_length"],
    y = loan[loan["loan_status"]== 'Charged Off']["loan_amnt"],
    name=tr1.name
)

data = [tr0, tr1, tr2, tr3]

fig = tls.make_subplots(rows=1, cols=2, 
                        subplot_titles=('Employee Length', 'Loan Amount by Employee Length'))
fig.append_trace(tr0, 1, 1)
fig.append_trace(tr1, 1, 1)
fig.append_trace(tr2, 1, 2)
fig.append_trace(tr3, 1, 2)

fig['layout'].update(height=600, width=1600, title='Employee Length Distribution', boxmode='group')

py.iplot(fig, filename='grouped-bar')

**Loan issued percentage as per customer Employment**

In [126]:
fig = px.pie(loan, values="loan_amnt", names='emp_length')
fig.show()

**Loan Staus distribution vs Purpose of Loan**



1.   Loan taken for Renewable energy is always going to be completed.




In [127]:
FullyPaid_purpose = loan[loan["loan_status"]== 'Fully Paid']["purpose"].value_counts().values
charged_off_purpose = loan[loan["loan_status"]== 'Charged Off']["purpose"].value_counts().values

LA_SF_percent_change = [100*(SF_count - LA_count) / SF_count for SF_count, LA_count in zip(FullyPaid_purpose, charged_off_purpose)]

#First plot
tr0 = go.Bar(
    x = loan[loan["loan_status"]== 'Fully Paid']["purpose"].value_counts().index.values,
    y = loan[loan["loan_status"]== 'Fully Paid']["purpose"].value_counts().values,
    text=[f"{percent_change:.0f}%" if percent_change > 0 else f"{percent_change:.0f}%" 
            for percent_change in LA_SF_percent_change ],
        textposition='outside',
        textfont_size=18,
        textfont_color='blue',
    name='Fully Paid'
)

#Second plot
tr1 = go.Bar(
    x = loan[loan["loan_status"]== 'Charged Off']["purpose"].value_counts().index.values,
    y = loan[loan["loan_status"]== 'Charged Off']["purpose"].value_counts().values,
    name="Charged off"
)

data = [tr0, tr1]

layout = go.Layout(
    title='Loan Purpose distribution'
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Home-Grouped')

**Loan status depending on Loan Amount**
1. After 15K of loan amount, the percentage of loan status being charged off is more.

In [129]:
df_good = loan.loc[loan["loan_status"] == 'Fully Paid']['loan_amnt'].values.tolist()
df_bad = loan.loc[loan["loan_status"] == 'Charged Off']['loan_amnt'].values.tolist()

tr0 = go.Histogram(
    x=df_good,
    xbins=dict(start=np.min(df_good), size=1000, end=np.max(df_good)),
    histnorm='probability',
    name="Fully Paid"
)

#Second plot
tr1 = go.Histogram(
    x=df_bad,
    xbins=dict(start=np.min(df_bad), size=1000, end=np.max(df_bad)),
    histnorm='probability',
    name="Charged Off"
)

fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
                          subplot_titles=('Fully Paid','Charged Off'))

#setting the figs
fig.append_trace(tr0, 1, 1)
fig.append_trace(tr1, 1, 2)
# fig.append_trace(tr2, 2, 1)

fig['layout'].update(height=600, width=1600, showlegend=True, title='Loan Amount Distribuition', bargap=0.5)
py.iplot(fig, filename='custom-sized-subplot-with-subplot-titles')

**Loan Status dependency on Interest Rate**

1. Interest rate till 11.41% has more chances of loan getting fully paid off.
2. As the intrest rate increases the number of loans getting charged off increase.

In [132]:
df_good = loan.loc[loan["loan_status"] == 'Fully Paid']['int_rate'].values.tolist()
df_bad = loan.loc[loan["loan_status"] == 'Charged Off']['int_rate'].values.tolist()

tr0 = go.Histogram(
    x=df_good,
    xbins=dict(start=np.min(df_good), size=2, end=np.max(df_good)),
    histnorm='probability',
    name="Fully Paid"
)

#Second plot
tr1 = go.Histogram(
    x=df_bad,
    xbins=dict(start=np.min(df_bad), size=2, end=np.max(df_bad)),
    histnorm='probability',
    name="Charged Off"
)

fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
                          subplot_titles=('Fully Paid','Charged Off'))

#setting the figs
fig.append_trace(tr0, 1, 1)
fig.append_trace(tr1, 1, 2)
# fig.append_trace(tr2, 2, 1)

fig['layout'].update(showlegend=True, title='Interest Rate Distribuition', bargap=0.5)
py.iplot(fig, filename='custom-sized-subplot-with-subplot-titles')

**Loan status dependency on Loan Grade**

1. 42% of the G grade loan issued are Charged off.
2. A grade loan has 86% of loan issued been Fully paid.

In [135]:
FullyPaid_grade = loan[loan["loan_status"]== 'Fully Paid']["grade"].value_counts().values
charged_off_grade = loan[loan["loan_status"]== 'Charged Off']["grade"].value_counts().values

LA_SF_percent_change = [100*(SF_count - LA_count) / SF_count for SF_count, LA_count in zip(FullyPaid_grade, charged_off_grade)]


#First plot
tr0 = go.Bar(
    x = loan[loan["loan_status"]== 'Fully Paid']["grade"].value_counts().index.values,
    text=[f"{percent_change:.0f}%" if percent_change > 0 else f"{percent_change:.0f}%" 
            for percent_change in LA_SF_percent_change ],
        textposition='inside',
        textfont_size=18,
        textfont_color='red',
    y = loan[loan["loan_status"]== 'Fully Paid']["grade"].value_counts().values,
    name='Good credit'
)

#Second plot
tr1 = go.Bar(
    x = loan[loan["loan_status"]== 'Charged Off']["grade"].value_counts().index.values,
    y = loan[loan["loan_status"]== 'Charged Off']["grade"].value_counts().values,
    name="Bad Credit"
)

data = [tr0, tr1]

layout = go.Layout(
    title='Grade Distribuition'
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Home-Grouped')

**Loan status dependent on Annual Income**
1. Customer with annual income between 19K - 24K, are more likely to have their loan charged off.
2. Customer with annual income between 29K - 39K, are more likely to have their loan charged off.
3. 

In [112]:
df_good = loan.loc[loan["loan_status"] == 'Fully Paid']['annual_inc'].values.tolist()
df_bad = loan.loc[loan["loan_status"] == 'Charged Off']['annual_inc'].values.tolist()

tr0 = go.Histogram(
    x=df_good,
    xbins=dict(start=np.min(df_good), size=5000, end=np.max(df_good)),
    histnorm='probability',
    name="Fully Paid"
)

#Second plot
tr1 = go.Histogram(
    x=df_bad,
    xbins=dict(start=np.min(df_bad), size=5000, end=np.max(df_bad)),
    histnorm='probability',
    name="Charged Off"
)

fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
                          subplot_titles=('Fully Paid','Charged Off'))

#setting the figs
fig.append_trace(tr0, 1, 1)
fig.append_trace(tr1, 1, 2)
# fig.append_trace(tr2, 2, 1)

fig['layout'].update(showlegend=True, title='Annual Income Distribuition', bargap=0.5)
py.iplot(fig, filename='custom-sized-subplot-with-subplot-titles')

**Loan Status distribution as per the Loan issued Year**

1. Loans issues in year 2009, are 86% Fully paid loans



In [136]:
FullyPaid_issueYear = loan[loan["loan_status"]== 'Fully Paid']["issue_year"].value_counts().values
charged_off_issue_year = loan[loan["loan_status"]== 'Charged Off']["issue_year"].value_counts().values

LA_SF_percent_change = [100*(LA_count - SF_count) / SF_count for SF_count, LA_count in zip(FullyPaid_issueYear, charged_off_issue_year)]

#First plot
tr0 = go.Bar(
    x = loan[loan["loan_status"]== 'Fully Paid']["issue_year"].value_counts().index.values,
    y = FullyPaid_issueYear,
    name='Fully Paid'
)

#Second plot
tr1 = go.Bar(
    x = loan[loan["loan_status"]== 'Charged Off']["issue_year"].value_counts().index.values,
    y = charged_off_issue_year,
    text=[f"+{percent_change:.0f}%" if percent_change > 0 else f"{percent_change:.0f}%" 
            for percent_change in LA_SF_percent_change ],
        textposition='outside',
        textfont_size=18,
        textfont_color='red',
    name="Charged Off"
)

data = [tr0, tr1]

layout = go.Layout(
    title='Year Distribuition'
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Home-Grouped')

**Loan Issue month distribution**
1. In Feb month 86% of the loan issues are Fully Paid

In [114]:
FullyPaid_issueMonth = loan[loan["loan_status"]== 'Fully Paid']["issue_month"].value_counts().values
charged_off_issue_month = loan[loan["loan_status"]== 'Charged Off']["issue_month"].value_counts().values

LA_SF_percent_change = [100*(LA_count - SF_count) / SF_count for SF_count, LA_count in zip(FullyPaid_issueMonth, charged_off_issue_month)]

#First plot
tr0 = go.Bar(
    x = loan[loan["loan_status"]== 'Fully Paid']["issue_month"].value_counts().index.values,
    y = FullyPaid_issueMonth,
    name='Fully Paid'
)

#Second plot
tr1 = go.Bar(
    x = loan[loan["loan_status"]== 'Charged Off']["issue_month"].value_counts().index.values,
    y = charged_off_issue_month,
    text=[f"+{percent_change:.0f}%" if percent_change > 0 else f"{percent_change:.0f}%" 
            for percent_change in LA_SF_percent_change ],
        textposition='outside',
        textfont_size=18,
        textfont_color='red',
    name="Charged Off"
)

data = [tr0, tr1]

layout = go.Layout(
    title='Month Distribuition'
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Home-Grouped')

**Pie Chart distribution of overall Loan Status for the Lending Company**

1. Out of total loan issues, 84.5% of loans are Fully Paid.

In [115]:
fig = px.pie(loan, values="loan_amnt", names='loan_status')
fig.show()