In [92]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [93]:
df = pd.read_csv('loan_sanction.csv')


## Converting to binary

In [94]:
#Gender - convert to binary
#Male - 1, Female - 0
def gender_map(x):
    if x == "Male":
        return 1
    if x == "Female":
        return 0

df["Gender"] = df["Gender"].map(gender_map)
print(df["Gender"])


0      1.0
1      1.0
2      1.0
3      1.0
4      1.0
      ... 
609    0.0
610    1.0
611    1.0
612    1.0
613    0.0
Name: Gender, Length: 614, dtype: float64


In [95]:
#Married - convert to binary
#Yes - 1, No - 0
def married_map(x):
    if x == "Yes":
        return 1
    if x == "No":
        return 0

df["Married"] = df["Married"].map(married_map)

print(df["Married"])

0      0.0
1      1.0
2      1.0
3      1.0
4      0.0
      ... 
609    0.0
610    1.0
611    1.0
612    1.0
613    0.0
Name: Married, Length: 614, dtype: float64


In [96]:
#Dependents - change 3+ to 3
def dep_map (x):
    if x == 0:
        return 0
    if x == 1:
        return 1
    if x == 2:
        return 2
    if x == "3+":
        return 3
    return 0
df["Dependents"] = df["Dependents"].map(dep_map)
df['Dependents'] = df['Dependents'].astype(str).astype(int)

In [97]:
#Education - convert to binary
#Graduate - 1, Not Graduate - 0
def edu_map(x):
    if x == "Graduate":
        return 1
    if x == "Not Graduate":
        return 0
    return None

df["Education"] = df["Education"].map(edu_map)
print(df["Education"])

0      1
1      1
2      1
3      0
4      1
      ..
609    1
610    1
611    1
612    1
613    1
Name: Education, Length: 614, dtype: int64


In [98]:
#Self_Employed - convert to binary
#Yes - 1, No - 0
def self_emp_map(x):
    if x == "Yes":
        return 1
    if x == "No":
        return 0
    return None

df["Self_Employed"] = df["Self_Employed"].map(self_emp_map)
print(df["Self_Employed"])

0      0.0
1      0.0
2      1.0
3      0.0
4      0.0
      ... 
609    0.0
610    0.0
611    0.0
612    0.0
613    1.0
Name: Self_Employed, Length: 614, dtype: float64


In [99]:
#Property Area - convert to 1 to 3
#1 - urban, 2 - semiurban, 3 - rural
def prop_map(x):
    if x == "Urban":
        return 1
    if x == "Semiurban":
        return 2
    if x == "Rural":
        return 3
    return None

df["Property_Area"] = df["Property_Area"].map(prop_map)
print(df["Property_Area"])

0      1
1      3
2      1
3      1
4      1
      ..
609    3
610    3
611    1
612    1
613    2
Name: Property_Area, Length: 614, dtype: int64


In [100]:
#Loan Status - convert to binary
#Y - 1, N - 0
def loan_map(x):
    if x == "Y":
        return 1
    if x == "N":
        return 0
    return None

df["Loan_Status"] = df["Loan_Status"].map(loan_map)
print(df["Loan_Status"])


0      1
1      0
2      1
3      1
4      1
      ..
609    1
610    1
611    1
612    1
613    0
Name: Loan_Status, Length: 614, dtype: int64


In [101]:
print(df.info())
df.corr()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             601 non-null    float64
 2   Married            611 non-null    float64
 3   Dependents         614 non-null    int32  
 4   Education          614 non-null    int64  
 5   Self_Employed      582 non-null    float64
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         592 non-null    float64
 9   Loan_Amount_Term   600 non-null    float64
 10  Credit_History     564 non-null    float64
 11  Property_Area      614 non-null    int64  
 12  Loan_Status        614 non-null    int64  
dtypes: float64(7), int32(1), int64(4), object(1)
memory usage: 60.1+ KB
None


Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
Gender,1.0,0.369612,0.09371,-0.049258,-0.009829,0.053989,0.083946,0.106947,-0.075117,0.016337,0.024556,0.019857
Married,0.369612,1.0,0.133848,-0.014223,0.001909,0.051332,0.07777,0.149519,-0.10381,0.004381,-0.002918,0.08928
Dependents,0.09371,0.133848,1.0,-0.055288,0.001711,0.156687,0.041491,0.156271,-0.07785,-0.059706,0.05183,-0.026123
Education,-0.049258,-0.014223,-0.055288,1.0,0.012333,0.14076,0.06229,0.171133,0.078784,0.081822,-0.065243,0.085884
Self_Employed,-0.009829,0.001909,0.001711,0.012333,1.0,0.140826,-0.011152,0.123931,-0.037069,0.003883,0.031214,-0.002303
ApplicantIncome,0.053989,0.051332,0.156687,0.14076,0.140826,1.0,-0.116605,0.570909,-0.045306,-0.014715,0.0095,-0.00471
CoapplicantIncome,0.083946,0.07777,0.041491,0.06229,-0.011152,-0.116605,1.0,0.188619,-0.059878,-0.002056,-0.010522,-0.059187
LoanAmount,0.106947,0.149519,0.156271,0.171133,0.123931,0.570909,0.188619,1.0,0.039447,-0.008433,0.045792,-0.037318
Loan_Amount_Term,-0.075117,-0.10381,-0.07785,0.078784,-0.037069,-0.045306,-0.059878,0.039447,1.0,0.00147,0.078748,-0.021268
Credit_History,0.016337,0.004381,-0.059706,0.081822,0.003883,-0.014715,-0.002056,-0.008433,0.00147,1.0,0.001969,0.561678
