In [1]:
import pandas as pd
import numpy as np

In [2]:
path = 'PanadsBankLoan.csv'
bank = pd.read_csv(path)
categorical_var = bank.select_dtypes(include = 'object')
print(categorical_var.head())

    Loan_ID Gender Married Dependents     Education Self_Employed  \
0  LP001002   Male      No          0      Graduate            No   
1  LP001003   Male     Yes          1      Graduate            No   
2  LP001005   Male     Yes          0      Graduate           Yes   
3  LP001006   Male     Yes          0  Not Graduate            No   
4  LP001008   Male      No          0      Graduate            No   

  Property_Area Loan_Status  
0         Urban           Y  
1         Rural           N  
2         Urban           Y  
3         Urban           Y  
4         Urban           Y  


In [3]:
numerical_var = bank.select_dtypes(include = 'number')
print(numerical_var.head())

   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0             5849                0.0         NaN             360.0   
1             4583             1508.0       128.0             360.0   
2             3000                0.0        66.0             360.0   
3             2583             2358.0       120.0             360.0   
4             6000                0.0       141.0             360.0   

   Credit_History  
0             1.0  
1             1.0  
2             1.0  
3             1.0  
4             1.0  


In [4]:
banks = bank.drop(['Loan_ID'],axis=1)
print(banks.head())

  Gender Married Dependents     Education Self_Employed  ApplicantIncome  \
0   Male      No          0      Graduate            No             5849   
1   Male     Yes          1      Graduate            No             4583   
2   Male     Yes          0      Graduate           Yes             3000   
3   Male     Yes          0  Not Graduate            No             2583   
4   Male      No          0      Graduate            No             6000   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0                0.0         NaN             360.0             1.0   
1             1508.0       128.0             360.0             1.0   
2                0.0        66.0             360.0             1.0   
3             2358.0       120.0             360.0             1.0   
4                0.0       141.0             360.0             1.0   

  Property_Area Loan_Status  
0         Urban           Y  
1         Rural           N  
2         Urban           Y  
3 

In [5]:
print(banks.isnull().sum())

Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64


In [6]:
bank_mode = banks.mode()
print(bank_mode)

  Gender Married Dependents Education Self_Employed  ApplicantIncome  \
0   Male     Yes          0  Graduate            No             2500   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0                0.0       120.0             360.0             1.0   

  Property_Area Loan_Status  
0     Semiurban           Y  


In [7]:
banks['Gender'].fillna(banks['Gender'].mode()[0], inplace=True)
banks['Married'].fillna(banks['Married'].mode()[0], inplace=True)
banks['Dependents'].fillna(banks['Dependents'].mode()[0], inplace=True)
banks['Self_Employed'].fillna(banks['Self_Employed'].mode()[0], inplace=True)
banks['LoanAmount'].fillna(banks['LoanAmount'].mode()[0], inplace=True)
banks['Loan_Amount_Term'].fillna(banks['Loan_Amount_Term'].mode()[0], inplace=True)
banks['Credit_History'].fillna(banks['Credit_History'].mode()[0], inplace=True)

print(banks.isnull().sum())

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64


In [8]:
avg_loan_amount = pd.pivot_table(banks, values='LoanAmount', index=['Gender', 'Married','Self_Employed'], aggfunc=np.mean)
print(avg_loan_amount)

                              LoanAmount
Gender Married Self_Employed            
Female No      No             114.768116
               Yes            125.272727
       Yes     No             133.714286
               Yes            282.250000
Male   No      No             129.508621
               Yes            180.588235
       Yes     No             152.608150
               Yes            167.420000


In [9]:
loan_approved_se = banks[(banks['Self_Employed'] == 'Yes') & (banks['Loan_Status'] == 'Y')]['Self_Employed'].count()
print(loan_approved_se)

56


In [10]:
loan_approved_nse = banks[(banks['Self_Employed'] == 'No') & (banks['Loan_Status'] == 'Y')]['Self_Employed'].count()
print(loan_approved_nse)

366


In [11]:
percentage_se = loan_approved_se * 100 / (loan_approved_se + loan_approved_nse)
print(percentage_se)

13.270142180094787


In [12]:
percentage_nse = loan_approved_nse * 100 / (loan_approved_se + loan_approved_nse)
print(percentage_nse)

86.72985781990522


In [13]:
loan_term = banks['Loan_Amount_Term'].apply(lambda x: x/12)
print(loan_term)

0      30.0
1      30.0
2      30.0
3      30.0
4      30.0
5      30.0
6      30.0
7      30.0
8      30.0
9      30.0
10     30.0
11     30.0
12     30.0
13     30.0
14     10.0
15     30.0
16     20.0
17     30.0
18     30.0
19     30.0
20     30.0
21     30.0
22     30.0
23     30.0
24     30.0
25     30.0
26     30.0
27     30.0
28     30.0
29     30.0
       ... 
584    30.0
585     7.0
586    30.0
587    30.0
588    30.0
589    30.0
590    15.0
591    20.0
592    30.0
593    15.0
594    30.0
595    30.0
596    30.0
597    30.0
598    30.0
599    30.0
600    15.0
601    30.0
602    30.0
603    30.0
604    30.0
605    15.0
606    30.0
607    30.0
608    30.0
609    30.0
610    15.0
611    30.0
612    30.0
613    30.0
Name: Loan_Amount_Term, Length: 614, dtype: float64


In [14]:
big_loan_term = loan_term[(loan_term >= 25)].count()
print(big_loan_term)

554


In [15]:
loan_groupby = banks.groupby('Loan_Status')
print(loan_groupby.first())

            Gender Married Dependents Education Self_Employed  \
Loan_Status                                                     
N             Male     Yes          1  Graduate            No   
Y             Male      No          0  Graduate            No   

             ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
Loan_Status                                                                     
N                       4583             1508.0       128.0             360.0   
Y                       5849                0.0       120.0             360.0   

             Credit_History Property_Area  
Loan_Status                                
N                       1.0         Rural  
Y                       1.0         Urban  


In [16]:
loan_groupby = loan_groupby['ApplicantIncome', 'Credit_History']
print(loan_groupby.first())

             ApplicantIncome  Credit_History
Loan_Status                                 
N                       4583             1.0
Y                       5849             1.0


In [17]:
mean_values = loan_groupby.mean()
print(mean_values)

             ApplicantIncome  Credit_History
Loan_Status                                 
N                5446.078125        0.572917
Y                5384.068720        0.983412
