# Loan Approval Prediction using:
##### Decision Tree Classification
##### Random Forest Classification

## Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

## Importing the dataset

In [None]:
df_train = pd.read_csv("./loan_data.csv")
df_train.head()

In [None]:
df_train.info()

In [None]:
df_train.isnull().sum()

## Exploratory Data Analysis

### number of customers approved loan

In [None]:
df_train['Loan_Status'].value_counts().plot.bar()

### applicants with property in various areas

In [None]:
area = df_train['Property_Area'].value_counts()
plt.pie(area, labels=area.index, autopct='%1.1f%%')
plt.show()

### loan status according to property area

In [None]:
cb = pd.crosstab(df_train['Property_Area'], df_train['Loan_Status'])
cb.plot.bar(stacked=False)
plt.show()

### loan amount from certain property area and loan status

In [None]:
sns.boxplot(x="Property_Area", y="LoanAmount", hue='Loan_Status', data=df_train)

### loan amount and marital status, with loan status

In [None]:
sns.boxplot(x="Married", y="LoanAmount", hue='Loan_Status', data=df_train)

### loan amount and number of dependents, with loan status

In [None]:
sns.boxplot(x="Dependents", y="LoanAmount", hue='Loan_Status', data=df_train)

### loan amount and education status, with loan status

In [None]:
sns.boxplot(x="Education", y="LoanAmount", hue='Loan_Status', data=df_train)

### loan amount by the person who is self-employed or not, with loan status

In [None]:
sns.boxplot(x="Self_Employed", y="LoanAmount", hue='Loan_Status', data=df_train)

### loan amount and credit history, with loan status

In [None]:
sns.boxplot(x="Credit_History", y="LoanAmount", hue='Loan_Status', data=df_train)

### loan amount and applicant income, with loan status

In [None]:
sns.scatterplot(x="ApplicantIncome", y="LoanAmount", hue='Loan_Status', data=df_train)


### loan amount and co-applicant income, with loan status

In [None]:
sns.scatterplot(x="CoapplicantIncome", y="LoanAmount", hue='Loan_Status', data=df_train)

## Split categorical columns and numerical columns

### categorical columns

In [None]:
train_cat = df_train.copy()
train_cat = train_cat.drop(['Loan_ID', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term'], axis=1)

In [None]:
train_cat.isnull().sum().sort_values(ascending=False)

In [None]:
train_cat.fillna(method='ffill', inplace=True)

In [None]:
train_cat.isnull().sum().any()

### numerical columns

In [None]:
train_num = df_train.copy()
train_num = train_num.drop(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Credit_History', 'Property_Area', 'Loan_Status'], axis=1)

In [None]:
train_num.isnull().sum().sort_values(ascending=False)

In [None]:
train_num.fillna(method='bfill', inplace=True)

In [None]:
train_num.isnull().sum().any()

## Merge categorical columns and numerical columns

In [None]:
df = pd.concat([train_cat, train_num], axis=1)

## Label encoding data

In [None]:
labelencoder = LabelEncoder()
column_objects = [col for col in df.columns if df[col].dtypes == 'object']
df[column_objects] = df[column_objects].apply(labelencoder.fit_transform)

## Training the model

### Splitting into dependent and independent variable vectors

In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

### Splitting training and testing data

In [None]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=13)

## Decision Tree Classification model

In [None]:
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

In [None]:
predict_test_dtc = decision_tree.predict(X_test)
accuracy_score(y_test, predict_test_dtc)

## Random Forest Classification model

In [None]:
random_forest = RandomForestClassifier(n_estimators=300)
random_forest.fit(X_train, y_train)

In [None]:
predict_test_rfc = random_forest.predict(X_test)
accuracy_score(y_test, predict_test_rfc)