In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

#### Import Dataset

In [None]:
data = pd.read_csv('../input/loan-predication/train_u6lujuX_CVtuZ9i (1).csv')
data.head()

In [None]:
data.set_index('Loan_ID', inplace=True)
data.head()

In [None]:
print("Data Numerical Columns Description")
data.describe()

In [None]:
print('Data All Columns Description')
data.describe(include='all')

In [None]:
print("Check Missing Value:")
data.isna().sum()

In [None]:
print('Data Information:')
data.info()

In [None]:
print("To Check Numbers of Unique Value each Column Contains:")
data.nunique()

In [None]:
data.boxplot(figsize=(16, 10))
plt.show()

In [None]:
print("Probability of getting the loan on checking the credit history")
data.groupby('Loan_Status')['Credit_History'].mean()

In [None]:
print("Checking the Cross Tab:")
pd.crosstab(data['Credit_History'], data['Loan_Status'])

In [None]:
print('Filling the missing values in Loan Amount with the mean of Loan Amount:')
data.LoanAmount.fillna(data.LoanAmount.mean(), inplace=True)
print("To check is there any missing value left in Loan Amount")
data.LoanAmount.isna().sum()

In [None]:
print("Checking missing values in Self Employed Column:", data.Self_Employed.isna().sum())
print("Unique Values Count:")
print(data.Self_Employed.value_counts())

In [None]:
# so we can fill with No
data.Self_Employed.fillna('No', inplace=True)
print("Checking missing values in Self Employed Column:", data.Self_Employed.isna().sum())

In [None]:
print("Self Employed person with its Education asking for a loan which we have taken an average of:")
data.pivot_table(index='Self_Employed', values='LoanAmount', columns='Education', aggfunc=np.median)

In [None]:
print("Filling the categorical values with mode function:")
modeList = ['Gender', 'Married', 'Dependents', 'Loan_Amount_Term', 'Credit_History']
for mCol in modeList:
    data[mCol].fillna(data[mCol].mode()[0], inplace=True)
    
print("Now Checking the missing values of the columns define in the list:")
print(data[modeList].isna().sum())

### Now Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder
var = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status']
lE = LabelEncoder()
for v in var:
    data[v] = lE.fit_transform(data[v])

In [None]:
print("Now checking the data information")
print(data.info())
print("Data Columns Data Type:")
print(data.dtypes)
# so now all values are numeric

In [None]:
print("Checking the Correlation of Columns")
data[['Gender', 'Education', 'Credit_History', 'Self_Employed', 'Married', 'Property_Area', 'ApplicantIncome', 'Loan_Status']].corr()

## Importing other libraries

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier

In [None]:
def Custom_Model(X, y, model, model_name):
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=42)
    model.fit(train_X, train_y)
    pred_y = model.predict(test_X)
    print("Accuracy of the " + model_name + ": ", model.score(test_X, test_y))

In [None]:
y = data['Loan_Status']

## Logistic Regression

In [None]:
# with only one feature
# as we can see from correlation table credit history is highly correlated to loan status
X = data[['Credit_History']]
lr = LogisticRegression()
Custom_Model(X, y, lr, "Logistic Regression")

In [None]:
data.columns

In [None]:
# with more predictor with no checking
X = data[['Gender', 'Education', 'Credit_History', 'Self_Employed', 'Married', 'ApplicantIncome', 'Property_Area']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, lr, "Logistic Regression")

In [None]:
# with Correlation near 1
X = data[['Property_Area', 'Credit_History']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, lr, "Logistic Regression")

In [None]:
X = data[['Credit_History','Education','Married','Self_Employed','Property_Area']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, lr, "Logistic Regression")

## Decision Tree

In [None]:
X = data[['Gender', 'Education', 'Credit_History', 'Self_Employed', 'Married', 'ApplicantIncome', 'Property_Area']]
dt = DecisionTreeClassifier()
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, dt, "Decision Tree")

In [None]:
X = data[['Property_Area', 'Credit_History']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, dt, "Decision Tree")

In [None]:
X = data[['Credit_History']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, dt, "Decision Tree")

## Random Forest Classifier

In [None]:
X = data[['Gender', 'Education', 'Credit_History', 'Self_Employed', 'Married', 'ApplicantIncome', 'Property_Area']]
rfc = RandomForestClassifier()
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, rfc, "Random Forest")

In [None]:
X = data[['Property_Area', 'Credit_History']]
rfc = RandomForestClassifier()
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, rfc, "Random Forest")

In [None]:
X = data[['Credit_History']]
rfc = RandomForestClassifier()
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, rfc, "Random Forest")

## Stochastic Gradient Descent

In [None]:
sgd = SGDClassifier()
X = data[['Gender', 'Education', 'Credit_History', 'Self_Employed', 'Married', 'ApplicantIncome', 'Property_Area']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, sgd, "Stochastic Gradient Descent")

In [None]:
X = data[['Property_Area', 'Credit_History']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, sgd, "Stochastic Gradient Descent")

In [None]:
X = data[['Credit_History']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, sgd, "Stochastic Gradient Descent")

In [None]:
X = data[['ApplicantIncome']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, sgd, "Stochastic Gradient Descent")

In [None]:
X = data[['Self_Employed']]
print("With the predictors:", X.columns)
print()
Custom_Model(X, y, sgd, "Stochastic Gradient Descent")