# Import libraries & load dataset

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,accuracy_score,roc_curve,roc_auc_score
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sms
from sklearn.preprocessing import StandardScaler,LabelEncoder

In [None]:
loan = pd.read_csv('../input/analytics-vidhya-loan-prediction/train.csv')

In [None]:
loan.head()

In [None]:
loan.info()

In [None]:
pd.DataFrame([loan.isnull().sum(),loan.isnull().sum()/loan.isnull().count() * 100]).T

In [None]:
loan.describe(include=np.object)

In [None]:
loantest= pd.read_csv('../input/analytics-vidhya-loan-prediction/test.csv')
loantest.head()

In [None]:
loantest.info()

# Data cleaning

In [None]:
objcols = loan.columns[loan.dtypes == np.object]
objcols

In [None]:
for col in objcols:
    if (loan[col].isnull().sum() > 0) :
        loan[col].fillna(loan[col].mode()[0],inplace=True)

In [None]:
intcols = loan.columns[loan.dtypes != np.object]
intcols

In [None]:
for col in intcols:
    if (loan[col].isnull().sum() > 0) :
        loan[col].fillna(loan[col].median(),inplace=True)

In [None]:
objcols = loantest.columns[loantest.dtypes == np.object]
for col in objcols:
    if (loantest[col].isnull().sum() > 0) :
        loantest[col].fillna(loantest[col].mode()[0],inplace=True)

In [None]:
intcols = loantest.columns[loantest.dtypes != np.object]
for col in intcols:
    if (loantest[col].isnull().sum() > 0) :
        loantest[col].fillna(loantest[col].median(),inplace=True)

In [None]:
loan['Loan_Status'] = loan['Loan_Status'].map({'Y':1,'N':0})

# Check skewness

In [None]:
sns.distplot(np.log1p(loan.CoapplicantIncome))

# Scaling & Encoding

In [None]:
dummyTrain = pd.get_dummies(loan.drop(['Loan_Status','Loan_ID','Gender','ApplicantIncome','Loan_Amount_Term'],axis=1))
dummyTrain.head()

In [None]:
sc = StandardScaler()

In [None]:
scaledTrain = pd.DataFrame(sc.fit_transform(dummyTrain),columns=dummyTrain.columns)
scaledTrain['Loan_Status'] = loan['Loan_Status']
scaledTrain.head()

In [None]:
dummyTest = pd.get_dummies(loantest.drop(['Loan_ID','Gender','ApplicantIncome','Loan_Amount_Term'],axis=1))
scaledTest = pd.DataFrame(sc.fit_transform(dummyTest),columns=dummyTest.columns)
scaledTest.head()

# Apply Model

In [None]:
x = scaledTrain.drop('Loan_Status',axis=1)
y = scaledTrain['Loan_Status']

In [None]:
lor = LogisticRegression()

In [None]:
lor.fit(x,y)

In [None]:
ypred = lor.predict(scaledTest)
ypred

In [None]:
submission = pd.DataFrame({'Loan_ID':loantest.Loan_ID,'Loan_Status':ypred})
submission['Loan_Status'] = submission['Loan_Status'].map({1:'Y',0:'N'})
submission.head()

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(scaledTrain.corr(),annot=True)

In [None]:
# result = sms.Logit(y,x).fit()
# result.summary()