## Loan approval column encoding (pre-processing)

In [1]:
# Import dependences.
import pandas as pd
from path import Path

In [2]:
# Load data
file_path = Path("../Resources/loans_data.csv")
loans_df = pd.read_csv(file_path)
loans_df.head()

Unnamed: 0,amount,term,month,age,education,gender,bad
0,1000,30,June,45,High School or Below,male,0
1,1000,30,July,50,Bachelor,female,0
2,1000,30,August,33,Bachelor,female,0
3,1000,15,September,27,college,male,0
4,1000,30,October,28,college,female,0


### Binary Encoding for Education and Gender

In [3]:
# Binary encoding using Pandas (multiple columns)
loans_data_encoded_df = pd.get_dummies(loans_df, columns=["education", "gender"])
loans_data_encoded_df.head()

Unnamed: 0,amount,term,month,age,bad,education_Bachelor,education_High School or Below,education_Master or Above,education_college,gender_female,gender_male
0,1000,30,June,45,0,0,1,0,0,0,1
1,1000,30,July,50,0,1,0,0,0,1,0
2,1000,30,August,33,0,1,0,0,0,1,0
3,1000,15,September,27,0,0,0,0,1,0,1
4,1000,30,October,28,0,0,0,0,1,1,0


### Binary Encoding for Month

In [4]:
# Creating an instance of label encoder
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
loans_data_encoded_df["month_le"] = label_encoder.fit_transform(loans_data_encoded_df["month"])
loans_data_encoded_df.head()

Unnamed: 0,amount,term,month,age,bad,education_Bachelor,education_High School or Below,education_Master or Above,education_college,gender_female,gender_male,month_le
0,1000,30,June,45,0,0,1,0,0,0,1,6
1,1000,30,July,50,0,1,0,0,0,1,0,5
2,1000,30,August,33,0,1,0,0,0,1,0,1
3,1000,15,September,27,0,0,0,0,1,0,1,11
4,1000,30,October,28,0,0,0,0,1,1,0,10


In [5]:
# Months dictionary
months_num = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12,
}

In [6]:
# Months' names encoded using the dictionary values
loans_data_encoded_df["month_num"] = loans_data_encoded_df["month"].apply(lambda x: months_num[x])
loans_data_encoded_df.head()

Unnamed: 0,amount,term,month,age,bad,education_Bachelor,education_High School or Below,education_Master or Above,education_college,gender_female,gender_male,month_le,month_num
0,1000,30,June,45,0,0,1,0,0,0,1,6,6
1,1000,30,July,50,0,1,0,0,0,1,0,5,7
2,1000,30,August,33,0,1,0,0,0,1,0,1,8
3,1000,15,September,27,0,0,0,0,1,0,1,11,9
4,1000,30,October,28,0,0,0,0,1,1,0,10,10


In [7]:
# Drop the month and month_le columns
loans_data_encoded_df = loans_data_encoded_df.drop(["month", "month_le"], axis=1)
loans_data_encoded_df.head()

Unnamed: 0,amount,term,age,bad,education_Bachelor,education_High School or Below,education_Master or Above,education_college,gender_female,gender_male,month_num
0,1000,30,45,0,0,1,0,0,0,1,6
1,1000,30,50,0,1,0,0,0,1,0,7
2,1000,30,33,0,1,0,0,0,1,0,8
3,1000,15,27,0,0,0,0,1,0,1,9
4,1000,30,28,0,0,0,0,1,1,0,10


### Save DataFrame to CSV

In [15]:
# Save loans_data_encoded_df as a csv.
file_path = Path("../Resources/loans_data_encoded.csv")
loans_data_encoded_df.to_csv (r'../Resources/loans_data_encoded.csv', index = False, header=True)