In [5]:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.utils import resample

In [6]:
df = pd.read_excel("default of credit card clients.xls")
new_header = df.iloc[0] #grab the first row for the header
df = df[1:] #take the data less the header row
df.columns = new_header #set the header row as the df header
df = df.drop(columns=["ID"])
le = preprocessing.LabelEncoder()
#df["default payment next month"] = le.fit_transform(df["default payment next month"])
df = df.apply(le.fit_transform)
print(df['default payment next month'].value_counts())
#print(df.dtypes)
results = pd.DataFrame(columns=["Algorithm", "n_estimators", "eta", "max_depth", "n_neighbors", "AUC"])

0    23364
1     6636
Name: default payment next month, dtype: int64


In [3]:
#DECISION TREE 10 fold
max_depth = [2, 5, 7, 10, 15, 20]
for depth in max_depth:
    model = DecisionTreeClassifier(random_state = 42, max_depth = depth)
    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    target = df.loc[:,"default payment next month"]
    aucs=[]
    for train_index, test_index in skf.split(df, target):
        train = df.iloc[train_index,:] 
        test = df.iloc[test_index,:]    
        model.fit(train.iloc[:, :-1], train.iloc[:, -1])
        predictions = model.predict_proba(test.iloc[:, :-1])
        #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
        aucs.append(roc_auc_score(test.iloc[:, -1], predictions[:, 1], multi_class="ovr"))
    print("Average: " + str(sum(aucs)/len(aucs)))
    auc = str(sum(aucs)/len(aucs))
    results=results.append({"Algorithm": ["Decision Tree"], "n_estimators": [0], "eta": [0], "max_depth": [depth], "n_neighbors": [0], "AUC": [auc]}, ignore_index=True)

Average: 0.693420248372191
Average: 0.7640653255692867
Average: 0.7843937805669035
Average: 0.8256787917333215
Average: 0.9124141219799867
Average: 0.9716195877172609


In [4]:
results.to_csv("10_train_dt.csv")

In [24]:
#DECISION TREE normal
max_depth = [2, 5, 7, 10, 15, 20]
for depth in max_depth:
    model = DecisionTreeClassifier(random_state = 42, max_depth = depth)
    X = df.iloc[:,:-1].values
    y = df.iloc[:,-1:].values
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= .3)
    model.fit(X_train,y_train)
    predictions = model.predict(X_test)
    pred_prob1 = model.predict_proba(X_test)
    auc_score1 = roc_auc_score(y_test, predictions, multi_class='ovr')
    print("AUC score",auc_score1)


AUC score 0.6437644659455021
AUC score 0.6562400734482036
AUC score 0.6414258767977343
AUC score 0.6492961752547413
AUC score 0.6243094573331784
AUC score 0.6195999752483149


In [7]:
#XGB 10 fold
n_estimators = [100, 200, 500, 1000]
eta = [0.01, 0.1, 0.3]
max_depth = [2, 3, 4, 5, 6]
for eta in eta:
    for estimators in n_estimators:
        for depth in max_depth:
            model = XGBClassifier(random_state = 42, n_estimators = estimators, eta = eta, max_depth = depth)
            from sklearn.model_selection import StratifiedKFold
            skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
            target = df.loc[:,"default payment next month"]
            aucs=[]
            for train_index, test_index in skf.split(df, target):
                train = df.iloc[train_index,:] 
                test = df.iloc[test_index,:]    
                model.fit(train.iloc[:, :-1], train.iloc[:, -1])
                predictions = model.predict_proba(test.iloc[:, :-1])
                #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
                aucs.append(roc_auc_score(test.iloc[:, -1], predictions[:, 1], multi_class="ovr"))
            print("Average: " + str(sum(aucs)/len(aucs)))
            auc = str(sum(aucs)/len(aucs))
            results=results.append({"Algorithm": ["XGBoost"], "n_estimators": [estimators], "eta": [eta], "max_depth": [depth], "n_neighbors": [0], "AUC": [auc]}, ignore_index=True)







































Average: 0.7609616820170615




































Average: 0.7691846240130805




































Average: 0.7773193228405355




































Average: 0.7856694796809027




































Average: 0.7961737010611143




































Average: 0.7687564239164688




































Average: 0.7762139244086951




































Average: 0.7864056067950932




































Average: 0.7961509535139637




































Average: 0.8103802369345031




































Average: 0.7803198511328191




































Average: 0.790266421600142




































Average: 0.8017773599081688




































Average: 0.8164745622347287




































Average: 0.8366948251643704




































Average: 0.7893880288266499




































Average: 0.8017883448051435




































Average: 0.8172078501849876




































Average: 0.8359859162765388




































Average: 0.8607805206912978




































Average: 0.7894436326035252




































Average: 0.802437133382584




































Average: 0.8179054464480187




































Average: 0.8379463234060862




































Average: 0.8637354112970522




































Average: 0.7977925296300804




































Average: 0.8162227666190024




































Average: 0.8403076159053299




































Average: 0.8699480015169703




































Average: 0.9030794150378764




































Average: 0.8126721200184672




































Average: 0.8467085803125702




































Average: 0.8882721305700171




































Average: 0.9293266004319104




































Average: 0.9643209770470049




































Average: 0.831381901824438




































Average: 0.8822604227304869




































Average: 0.9334919056828335




































Average: 0.9720714937353714




































Average: 0.9929523130699198




































Average: 0.8034021322704394




































Average: 0.8276003983832879




































Average: 0.8592410682390035




































Average: 0.8938230591091859




































Average: 0.930991750246388




































Average: 0.8174098814386106




































Average: 0.8554199411001691




































Average: 0.9003493757078441




































Average: 0.9409706817216603




































Average: 0.9748386098954605




































Average: 0.8469598262352391




































Average: 0.9073152419988654




































Average: 0.9598032692395735




































Average: 0.9900448970568755




































Average: 0.9988332924416223




































Average: 0.878762739781817




































Average: 0.9504530170348167




































Average: 0.9911904966991626




































Average: 0.999306928801823




































Average: 0.9998972477072673


In [8]:
results.to_csv("10_train_xgb.csv")

In [7]:
#XGB normal
n_estimators = [100, 200, 500, 1000]
eta = [0.01, 0.1, 0.3]
max_depth = [2, 3, 4, 5, 6]
for eta in eta:
    for estimators in n_estimators:
        for depth in max_depth:
            model = XGBClassifier(random_state = 42, n_estimators = estimators, eta = eta, max_depth = depth)
            X = df.iloc[:,:-1].values
            y = df.iloc[:,-1:].values
            from sklearn.model_selection import train_test_split
            X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= .3)
            model.fit(X_train,y_train)
            predictions = model.predict(X_test)
            pred_prob1 = model.predict_proba(X_test)
            auc_score1 = roc_auc_score(y_test, predictions, multi_class='ovr')
            print("AUC score",auc_score1)


  return f(**kwargs)


AUC score 0.6457894045150551


  return f(**kwargs)


AUC score 0.6630600880600881


  return f(**kwargs)


AUC score 0.6533581317530995


  return f(**kwargs)


AUC score 0.6521117530086139


  return f(**kwargs)


AUC score 0.6508979403125793


  return f(**kwargs)


AUC score 0.6427751232355475


  return f(**kwargs)


AUC score 0.6599512841317039


  return f(**kwargs)


AUC score 0.6563843761557647


  return f(**kwargs)


AUC score 0.6388342007026938


  return f(**kwargs)


AUC score 0.6617973335855759


  return f(**kwargs)


AUC score 0.6538761887365042


  return f(**kwargs)


AUC score 0.6512120598813304


  return f(**kwargs)


AUC score 0.6606120059334849


  return f(**kwargs)


AUC score 0.6621665371665372


  return f(**kwargs)


AUC score 0.6618155017064669


  return f(**kwargs)


AUC score 0.6586654690016456


  return f(**kwargs)


AUC score 0.6501780530693512


  return f(**kwargs)


AUC score 0.6554632787816644


  return f(**kwargs)


AUC score 0.6544284245117024


  return f(**kwargs)


AUC score 0.6486618403973231


  return f(**kwargs)


AUC score 0.6568976118622386


  return f(**kwargs)


AUC score 0.6499195511126343


  return f(**kwargs)


AUC score 0.6515724389251811


  return f(**kwargs)


AUC score 0.6598894513000845


  return f(**kwargs)


AUC score 0.6615657910784377


  return f(**kwargs)


AUC score 0.6520497924125167


  return f(**kwargs)


AUC score 0.6588359462948843


  return f(**kwargs)


AUC score 0.6577132070681689


  return f(**kwargs)


AUC score 0.6555378854630651


  return f(**kwargs)


AUC score 0.6571271242526994


  return f(**kwargs)


AUC score 0.6567427006645209


  return f(**kwargs)


AUC score 0.6593371191316153


  return f(**kwargs)


AUC score 0.6648212019593032


  return f(**kwargs)


AUC score 0.654463604916949


  return f(**kwargs)


AUC score 0.6529527917067214


  return f(**kwargs)


AUC score 0.6525858373856362


  return f(**kwargs)


AUC score 0.6626706432565281


  return f(**kwargs)


AUC score 0.6530703565818047


  return f(**kwargs)


AUC score 0.6457770287456762


  return f(**kwargs)


AUC score 0.6489257032719931


  return f(**kwargs)


AUC score 0.6552953050861567


  return f(**kwargs)


AUC score 0.6598169603880063


  return f(**kwargs)


AUC score 0.6534526011756057


  return f(**kwargs)


AUC score 0.6592525285538637


  return f(**kwargs)


AUC score 0.6554848199292733


  return f(**kwargs)


AUC score 0.6534107382937479


  return f(**kwargs)


AUC score 0.6527414487369853


  return f(**kwargs)


AUC score 0.6538306027988724


  return f(**kwargs)


AUC score 0.6597108402698395


  return f(**kwargs)


AUC score 0.6496606074288575


  return f(**kwargs)


AUC score 0.6566428571428572


  return f(**kwargs)


AUC score 0.6596379581304204


  return f(**kwargs)


AUC score 0.6497121491959097


  return f(**kwargs)


AUC score 0.6441975311098862


  return f(**kwargs)


AUC score 0.6491074674931177


  return f(**kwargs)


AUC score 0.6561837024319725


  return f(**kwargs)


AUC score 0.646073153154542


  return f(**kwargs)


AUC score 0.6504151217896178


  return f(**kwargs)


AUC score 0.6506752842103072


  return f(**kwargs)


AUC score 0.6479393032056451


In [52]:
#RANDOM FOREST 10 fold
n_estimators = [100, 200, 500, 1000]
max_depth =  [2, 3, 4, 5, 6]
for estimators in n_estimators:
    for depth in max_depth:
        model = RandomForestClassifier(random_state = 42, n_estimators = estimators, max_depth = depth)
        from sklearn.model_selection import StratifiedKFold
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        target = df.loc[:,"default payment next month"]
        aucs=[]
        for train_index, test_index in skf.split(df, target):
            train = df.iloc[train_index,:] 
            test = df.iloc[test_index,:]    
            model.fit(train.iloc[:, :-1], train.iloc[:, -1])
            predictions = model.predict_proba(test.iloc[:, :-1])
            #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
            aucs.append(roc_auc_score(test.iloc[:, -1], predictions[:, 1], multi_class="ovr"))
        print("Average: " + str(sum(aucs)/len(aucs)))
        auc = str(sum(aucs)/len(aucs))
        results=results.append({"Algorithm": ["Random Forest"], "n_estimators": [estimators], "eta": [0], "max_depth": [depth], "n_neighbors": [0], "AUC": [auc]}, ignore_index=True)

Average: 0.7682535180032125
Average: 0.7710683573073629
Average: 0.7730238267218132
Average: 0.7745783938411505
Average: 0.7767596365730836
Average: 0.7683237943206775
Average: 0.7708713155280366
Average: 0.7730375330755973
Average: 0.7746716064474772
Average: 0.7769812762936207
Average: 0.7679902978255781
Average: 0.7712133500729946
Average: 0.7730075169721419
Average: 0.7747586775774926
Average: 0.7770832239520372
Average: 0.7678983814154401
Average: 0.7707628856347903
Average: 0.7728714860480804
Average: 0.774734739767957
Average: 0.7769943737354621


In [53]:
results.to_csv("10_train_rf.csv")

In [44]:
#RANDOM FOREST normal
n_estimators = [100, 200, 500, 1000]
max_depth =  [2, 3, 4, 5, 6]
for estimators in n_estimators:
    for depth in max_depth:
        model = RandomForestClassifier(random_state = 42, n_estimators = estimators, max_depth = depth)
        X = df.iloc[:,:-1].values
        y = df.iloc[:,-1:].values
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= .3)
        model.fit(X_train,y_train)
        predictions = model.predict(X_test)
        pred_prob1 = model.predict_proba(X_test)
        auc_score1 = roc_auc_score(y_test, predictions, multi_class='ovr')
        print("AUC score",auc_score1)

  model.fit(X_train,y_train)


AUC score 0.570502789447668


  model.fit(X_train,y_train)


AUC score 0.6055188000728234


  model.fit(X_train,y_train)


AUC score 0.6242256479067139


  model.fit(X_train,y_train)


AUC score 0.6303484059892531


  model.fit(X_train,y_train)


AUC score 0.6506320230049188


  model.fit(X_train,y_train)


AUC score 0.5749574397386328


  model.fit(X_train,y_train)


AUC score 0.604135205032066


  model.fit(X_train,y_train)


AUC score 0.622001956357818


  model.fit(X_train,y_train)


AUC score 0.6272258382215237


  model.fit(X_train,y_train)


AUC score 0.6517703781141975


  model.fit(X_train,y_train)


AUC score 0.5761979477656107


  model.fit(X_train,y_train)


AUC score 0.6084015275504637


  model.fit(X_train,y_train)


AUC score 0.6221986115224504


  model.fit(X_train,y_train)


AUC score 0.624618063751346


  model.fit(X_train,y_train)


AUC score 0.641777101873373


  model.fit(X_train,y_train)


AUC score 0.5742158738234107


  model.fit(X_train,y_train)


AUC score 0.6056322177398411


  model.fit(X_train,y_train)


AUC score 0.6182651467895489


  model.fit(X_train,y_train)


AUC score 0.63509007448467


  model.fit(X_train,y_train)


AUC score 0.6341569973616272


In [47]:
#KNN 10 fold
n_neighbors = [3, 5, 7, 11]
for neighbors in n_neighbors:
        model = KNeighborsClassifier(n_neighbors = neighbors)
        from sklearn.model_selection import StratifiedKFold
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        target = df.loc[:,"default payment next month"]
        aucs=[]
        for train_index, test_index in skf.split(df, target):
            train = df.iloc[train_index,:] 
            test = df.iloc[test_index,:]    
            model.fit(train.iloc[:, :-1], train.iloc[:, -1])
            predictions = model.predict_proba(test.iloc[:, :-1])
            #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
            aucs.append(roc_auc_score(test.iloc[:, -1], predictions[:, 1], multi_class="ovr"))
        print("Average: " + str(sum(aucs)/len(aucs)))
        auc = str(sum(aucs)/len(aucs))
        results=results.append({"Algorithm": ["KNN"], "n_estimators": [0], "eta": [0], "max_depth": [0], "n_neighbors": [neighbors], "AUC": [auc]}, ignore_index=True)


Average: 0.5904971836103643
Average: 0.6094895657779796
Average: 0.6189537791987341
Average: 0.6328647197959573


In [48]:
results.to_csv("10_train_knn.csv")

In [49]:
#KNN normal
n_neighbors = [3, 5, 7, 11]
for neighbors in n_neighbors:
        model = KNeighborsClassifier(n_neighbors = neighbors)
        X = df.iloc[:,:-1].values
        y = df.iloc[:,-1:].values
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= .3)
        model.fit(X_train,y_train)
        predictions = model.predict(X_test)
        pred_prob1 = model.predict_proba(X_test)
        auc_score1 = roc_auc_score(y_test, predictions, multi_class='ovr')
        print("AUC score",auc_score1)

  model.fit(X_train,y_train)


AUC score 0.5548294864737507


  model.fit(X_train,y_train)


AUC score 0.5513037153608114


  model.fit(X_train,y_train)


AUC score 0.539709073199078


  model.fit(X_train,y_train)


AUC score 0.5433008791883531


In [3]:
#AdaBoost 10 fold
n_estimators = [50,100]
for estimators in n_estimators:
        model = AdaBoostClassifier(n_estimators = estimators)
        from sklearn.model_selection import StratifiedKFold
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        target = df.loc[:,"default payment next month"]
        aucs=[]
        for train_index, test_index in skf.split(df, target):
            train = df.iloc[train_index,:] 
            test = df.iloc[test_index,:]    
            model.fit(train.iloc[:, :-1], train.iloc[:, -1])
            predictions = model.predict_proba(test.iloc[:, :-1])
            #print(roc_auc_score(test.iloc[:, -1], predictions, multi_class="ovr"))
            aucs.append(roc_auc_score(test.iloc[:, -1], predictions[:, 1], multi_class="ovr"))
        print("Average: " + str(sum(aucs)/len(aucs)))
        auc = str(sum(aucs)/len(aucs))
        results=results.append({"Algorithm": ["AdaBoost"], "n_estimators": [estimators], "eta": [0], "max_depth": [0], "n_neighbors": [0], "AUC": [auc]}, ignore_index=True)


Average: 0.774919530392226
Average: 0.7756091442391423


In [4]:
results.to_csv("10_train_adaboost.csv")