#  Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Importing and loading the dataset

In [None]:
dataset = pd.read_csv('../input/loan-prediction-problem-dataset/train_u6lujuX_CVtuZ9i.csv')
dataset.head()   #prints a nutshell of the dataset

# Dataset Info

In [None]:
dataset.info()  #we get detailed info of the dataset

# Dataset Shape

In [None]:
dataset.shape  #no of rows and columns

# Dataset Describtion

In [None]:
dataset.describe()  #prints the numerical columns details

# Checking the missing data

In [None]:
dataset.isnull().sum()

**Taking care of missing values in "Loan Ammount","credit history" 

In [None]:
dataset['LoanAmount'] = dataset['LoanAmount'].fillna(dataset['LoanAmount'].mean())

In [None]:
dataset['Credit_History'] = dataset['Credit_History'].fillna(dataset['Credit_History'].median())

**Let's confirm if there are any missing values in 'LoanAmount' & 'Credit_History**

In [None]:
dataset.isnull().sum()

**Now Let's drop all the missing value remaining **

In [None]:
dataset.dropna(inplace=True)

**Let's check the Missing values for the final time!**

In [None]:
dataset.isnull().sum()

**This method commonly used to handle the null values. Here, we either delete a particular row if it has a null value for a particular feature and a particular column if it has more than 70-75% of missing values. This method is advised only when there are enough samples in the data set. One has to make sure that after we have deleted the data, there is no addition of bias. Removing the data will lead to loss of information which will not give the expected results while predicting the output.**

> Lets check our dataset new shape

In [None]:
dataset.shape

# Deep dive into the dataset

> Comparison between Genders in getting the Loan:


In [None]:
print(pd.crosstab(dataset['Gender'],dataset['Loan_Status']))

In [None]:
sns.countplot(dataset['Gender'],hue=dataset['Loan_Status'])

Here, we can see that the Males have more chances to get the Loan.

> Comparison between Married Status in getting the Loan:

In [None]:
print(pd.crosstab(dataset['Married'],dataset['Loan_Status']))
sns.countplot(dataset['Married'],hue=dataset['Loan_Status'])

 Here we can see married people has a greater chance to get the loan

> Comparison between Self-Employed or Not in getting the Loan:


In [None]:
print(pd.crosstab(dataset['Self_Employed'],dataset['Loan_Status']))
sns.countplot(dataset['Self_Employed'],hue=dataset['Loan_Status'])

Here we can see not employed people has a greater chance to get the loan








> Comparison between Property Area for getting the Loan:

In [None]:
print(pd.crosstab(dataset['Property_Area'],dataset['Loan_Status']))
sns.countplot(dataset['Property_Area'],hue=dataset['Loan_Status'])

The tendency of loan varies semiurban > rural > urban 

# Encoding of non-numerical values

In [None]:
dataset['Loan_Status'].replace('Y',1,inplace = True)
dataset['Loan_Status'].replace('N',0,inplace = True)

In [None]:
dataset['Loan_Status'].value_counts()

In [None]:
dataset.Gender=dataset.Gender.map({'Male':1,'Female':0})
dataset['Gender'].value_counts()

In [None]:
dataset.Married=dataset.Married.map({'Yes':1,'No':0})
dataset['Married'].value_counts()

In [None]:
dataset.Dependents=dataset.Dependents.map({'0':0,'1':1,'2':2,'3+':3})
dataset['Dependents'].value_counts()

In [None]:
dataset.Education=dataset.Education.map({'Graduate':1,'Not Graduate':0})
dataset['Education'].value_counts()

In [None]:
dataset.Self_Employed=dataset.Self_Employed.map({'Yes':1,'No':0})
dataset['Self_Employed'].value_counts()

In [None]:
dataset.Property_Area=dataset.Property_Area.map({'Urban':2,'Rural':0,'Semiurban':1})
dataset['Property_Area'].value_counts()

In [None]:
dataset['LoanAmount'].value_counts()

In [None]:
dataset['Loan_Amount_Term'].value_counts()

In [None]:
dataset['Credit_History'].value_counts()

# Display the correlation matrix

In [None]:
plt.figure(figsize=(16,5))
sns.heatmap(dataset.corr(),annot=True)
plt.title('Correlation Matrix (for Loan Status)')

# Our modified dataset

In [None]:
dataset.head()

# Spliting the dataset into train and test set

In [None]:
X = dataset.iloc[:,1:-1].values
y = dataset.iloc[:,-1].values

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y ,test_size=0.2, random_state=0)

# Feature scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
print(X_train)

# Creating the ANN Model

**Importing the libraries**

In [None]:
import tensorflow as tf
tf.__version__

# Initialising the ANN

In [None]:
ann = tf.keras.models.Sequential()

1. Adding the first input layer and first hidden layer

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

2.Crating a seceond hidden layer 

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

3. Adding the output layer

In [None]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Training the ANN model

1. Compiling the model

In [None]:
ann.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])

2. Training the model

In [None]:
ann.fit(X_train, y_train, batch_size =32, epochs =100)

# Predicting the test set result

In [None]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

# Making the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)