In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df = pd.read_csv('../input/loan_data.csv')

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(10,6))
df[df['credit.policy']==0]['fico'].plot.hist(bins=30,alpha=0.5,color='blue',label='Credit Policy==0')
df[df['credit.policy']==1]['fico'].plot.hist(bins=30,alpha=0.5,color='red',label='Credit Policy==1')
plt.legend()
plt.xlabel('FICO')

In [None]:
# More people with credit score = 1 than with credit score = 0
# People with less than 660(approx) FICO score will not meet the underwriting criteria

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(10,6))
df[df['not.fully.paid']==0]['fico'].plot.hist(bins=30,alpha=0.5,color='blue',label='Not fully paid==0')
df[df['not.fully.paid']==1]['fico'].plot.hist(bins=30,alpha=0.5,color='red',label='Not fully paid==1')
plt.legend()
plt.xlabel('FICO')

In [None]:
# More people fully paid their loan
# People with less than 660(approx) FICO score have not fully paid their loans

In [None]:
plt.tight_layout()
plt.figure(figsize=(10,8))
sns.countplot(x='purpose',hue='not.fully.paid',data=df)

In [None]:

sns.jointplot(x='fico',y='int.rate',data=df)

In [None]:
plt.figure(figsize=(11,7))
sns.lmplot(x='fico',y='int.rate',data=df,col='not.fully.paid',hue='credit.policy',palette='Set1')

In [None]:
#As the FICO score increases, better credit so the interest to be paid is decreases


In [None]:
df.info()

In [None]:
cat_feats=['purpose']

In [None]:
final_data = pd.get_dummies(df,columns=cat_feats,drop_first=True)

In [None]:
final_data.head()

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
X=final_data.drop('not.fully.paid',axis=1)
y=final_data['not.fully.paid']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=101)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtree = DecisionTreeClassifier()

In [None]:
dtree.fit(X_train,y_train)

In [None]:
predictions = dtree.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(classification_report(predictions,y_test))
print(confusion_matrix(predictions,y_test))

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier(n_estimators=600)

In [None]:
rfc.fit(X_train,y_train)

In [None]:
r_pred = rfc.predict(X_test)

In [None]:
print(classification_report(r_pred,y_test))
print('\n')
print(confusion_matrix(r_pred,y_test))

In [None]:
#Random Forest with precision of 100%