In [2]:
# Install Pandas

In [3]:
!pip install pandas



In [30]:
import pandas as pd
from datetime import datetime
from datetime import date
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [2]:
pd.__version__

'1.2.4'

In [31]:
df = pd.read_excel('Credit data report 1.xlsx')

In [32]:
df

Unnamed: 0,Customer ID,First Name,Middle Name,Last Name,Date of Birth,Email,Employment Status,Home Phone,Address Line1,City,...,Eligible amount,FICO Score,Red Flag,Default in Last 7 days,Month1,Month2,Month3,Decision,Unnamed: 26,Unnamed: 27
0,10003,Joshua,F.,James,09/25/1962,aliquam.enim.nec@Duisatlacus.org,Salaried,(325) 731-2326,"P.O. Box 766, 5055 Nunc, Street",Bowling Green,...,321322.0,717,No,No,No,No,No,Fail,,
1,10028,Harper,N.,Whitaker,03/28/1972,porttitor.tellus@faucibusutnulla.ca,Salaried,(424) 834-0240,"Ap #119-264 Vulputate, Street",Great Falls,...,505623.0,788,No,No,No,No,No,Pass,,
2,10031,Elmo,D.,Marsh,02/01/1990,augue@interdum.edu,Salaried,(893) 273-2802,"P.O. Box 480, 9768 Mollis Rd.",Jacksonville,...,695617.0,758,No,No,No,No,No,Pass,,
3,10073,Ginger,X.,Whitney,12/29/1966,ipsum.dolor@ornarefacilisis.edu,Salaried,(831) 975-5051,"P.O. Box 369, 885 Lectus Road",Ketchikan,...,221265.0,702,No,No,No,No,No,Fail,,
4,10026,Dante,S.,Sanders,06/10/1965,ultrices.posuere.cubilia@vulputateduinec.org,Salaried,(367) 840-8014,809-1813 Posuere Avenue,Las Vegas,...,217468.0,778,No,No,No,No,No,Pass,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,10574,Duncan,R.,Hill,06/16/1991,Vivamus@quam.edu,Salaried,(236) 788-0859,"P.O. Box 442, 7383 In Street",Kenosha,...,961551.0,690,No,Yes,Yes,Yes,Yes,Fail,,
496,10584,Sonya,Y.,Henry,08/20/1969,vel.sapien@magnanec.com,Salaried,(813) 461-5562,"P.O. Box 849, 8003 Facilisis Street",Minneapolis,...,1400200.0,649,No,Yes,Yes,Yes,Yes,Fail,,
497,10595,Tasha,C.,Carson,02/04/1969,Duis@arcu.org,Salaried,(382) 177-6841,3531 Sed Street,Norman,...,771484.0,658,No,Yes,Yes,Yes,Yes,Fail,,
498,10554,Allegra,P.,Vaughn,02/13/1987,odio.Etiam@Etiam.edu,Salaried,(167) 162-1216,3545 Quam Rd.,Frankfort,...,1014994.0,654,Yes,Yes,Yes,Yes,Yes,Fail,,


Remove unnecessary columns

In [33]:
df.drop(['Customer ID','First Name', 'Middle Name', 'Last Name', 'Email', 'Home Phone', 'Address Line1','Zip','Unnamed: 26','Unnamed: 27','City','State','Monthly Income','Monthly Expenses','Monthly Debts','Total Outstanding','Eligible amount'], axis = 1,inplace = True)

Removing 'Employment Status' column as it has only 'Salaried' value

In [34]:
df.drop(['Employment Status'], axis = 1,inplace = True)

Add a new column 'Age' from Date of Birth column and later drop the Date of Birth column

In [35]:
def calculate_age(born):
    born = datetime.strptime(born, "%m/%d/%Y").date()
    today = date.today()
    return today.year - born.year - ((today.month, today.day) < (born.month, born.day))

df['Age'] = df['Date of Birth'].apply(calculate_age)
df.drop(['Date of Birth'], axis = 1,inplace = True)

Replace categorical values

In [36]:
df['Red Flag'].replace(('Yes', 'No'), (1, 0), inplace=True)
df['Default in Last 7 days'].replace(('Yes', 'No'), (1, 0), inplace=True)
df['Month1'].replace(('Yes', 'No'), (1, 0), inplace=True)
df['Month2'].replace(('Yes', 'No'), (1, 0), inplace=True)
df['Month3'].replace(('Yes', 'No'), (1, 0), inplace=True)
df['Decision'].replace(('Pass', 'Fail'), (1, 0), inplace=True)

In [37]:
df

Unnamed: 0,DTI Ratio,Default eligible amount,FICO Score,Red Flag,Default in Last 7 days,Month1,Month2,Month3,Decision,Age
0,0.249146,351120,717,0,0,0,0,0,0,58
1,0.133701,554220,788,0,0,0,0,0,1,49
2,0.134533,765090,758,0,0,0,0,0,1,31
3,0.965269,270360,702,0,0,0,0,0,0,54
4,0.172975,287730,778,0,0,0,0,0,1,56
...,...,...,...,...,...,...,...,...,...,...
495,0.317712,989670,690,0,1,1,1,1,0,30
496,0.232335,1426950,649,0,1,1,1,1,0,52
497,0.410896,835320,658,0,1,1,1,1,0,52
498,0.392642,1065780,654,1,1,1,1,1,0,34


Remove 'Decision' field from the dataset 

In [38]:
decision_df = df['Decision']
df.drop(['Decision'], axis = 1,inplace = True)

In [39]:
#Splitting the training and test sets 

X = df
y = decision_df

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=27)

In [40]:
X_test.shape

(150, 9)

In [42]:
#Feature Scaling

In [41]:
from sklearn.preprocessing import StandardScaler

# copy of datasets
X_train_stand = X_train.copy()
X_test_stand = X_test.copy()

# numerical features
num_cols = ['DTI Ratio','Default eligible amount','FICO Score']

# apply standardization on numerical features
for i in num_cols:
    
    # fit on training data column
    scale = StandardScaler().fit(X_train_stand[[i]])
    
    # transform the training data column
    X_train_stand[i] = scale.transform(X_train_stand[[i]])
    
    # transform the testing data column
    X_test_stand[i] = scale.transform(X_test_stand[[i]])

In [43]:
X_train_stand.shape

(350, 9)

In [44]:
X_train_stand

Unnamed: 0,DTI Ratio,Default eligible amount,FICO Score,Red Flag,Default in Last 7 days,Month1,Month2,Month3,Age
217,-0.493488,0.885961,-0.628441,0,0,0,0,0,45
220,0.564215,0.315318,-0.574345,0,0,0,0,0,57
483,-0.380253,1.079282,-2.143129,0,0,0,0,0,49
211,-0.325931,-0.501493,0.579703,0,0,0,0,0,39
63,-0.542405,-1.109320,-0.033385,0,0,0,0,0,34
...,...,...,...,...,...,...,...,...,...
312,3.903668,-0.845762,-0.736633,0,0,0,0,0,55
31,-0.608544,-0.151170,0.868215,0,0,0,0,0,36
328,3.029430,-0.801947,-0.916953,0,0,0,0,0,30
184,0.749668,-1.091449,0.327255,0,0,0,0,0,35


In [45]:
y_train

217    0
220    0
483    0
211    0
63     0
      ..
312    0
31     1
328    0
184    0
19     1
Name: Decision, Length: 350, dtype: int64

# RANDOM FOREST

In [46]:
classifier = RandomForestClassifier(n_estimators = 3, random_state = 0, n_jobs = -1)
classifier.fit(X_train_stand, y_train)

RandomForestClassifier(n_estimators=3, n_jobs=-1, random_state=0)

In [47]:
y_pred = classifier.predict(X_test_stand)

In [48]:
y_pred

array([1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0])

In [49]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)
print(acc)

1.0


In [50]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[108   0]
 [  0  42]]


# Gradient Booster Algorithm

In [51]:
from sklearn.ensemble import GradientBoostingClassifier
GB_classifier=GradientBoostingClassifier()
GB_classifier.fit(X_train_stand,y_train)

GradientBoostingClassifier()

In [52]:
y_pred = GB_classifier.predict(X_test_stand)

In [53]:
y_pred

array([1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0])

In [54]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)
print(acc)

0.9933333333333333


In [55]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)


[[108   0]
 [  1  41]]


# SVM 

In [56]:
from sklearn.svm import SVC
model = SVC(C=1, gamma=1)
model.fit(X_train_stand,y_train)

SVC(C=1, gamma=1)

In [57]:
y_pred = model.predict(X_test_stand)
print(y_pred)

[1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 0
 0 1 1 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1
 0 0]


In [58]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)
print(acc)

0.92


In [59]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)


[[105   3]
 [  9  33]]
