# YAKC - Model

## Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display, Markdown
plt.style.use("seaborn-darkgrid")
pd.set_option('display.max_columns', None)  

DEBUG = False
SEED = 666

import os
for d in ['orig','data','output']: os.makedirs(d, exist_ok=True)


In [2]:
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

classifiers = {
    #"LR": LogisticRegression(),
    "DT(max_depth=3)": DecisionTreeClassifier(max_depth=3),
    "DT(max_depth=6)": DecisionTreeClassifier(max_depth=6),
    "DT(max_depth=9)": DecisionTreeClassifier(max_depth=9),
    "KNN": KNeighborsClassifier(n_neighbors = 5),
    "SVC": SVC(),
    "RFC": RandomForestClassifier(),
    "ADA": AdaBoostClassifier(),
}

In [21]:
models = []

# Naive Bayes
from sklearn.naive_bayes import GaussianNB
models.append( {"name": "NB", "ref": GaussianNB()} )

# k-nearest neighbor 
from sklearn.neighbors import KNeighborsClassifier
models.append( {"name": "KNN", "ref": KNeighborsClassifier()} )

# Logistic Regression
from sklearn.linear_model import LogisticRegression
models.append( {"name": "LogisticRegression", "ref": LogisticRegression(solver="lbfgs")} )

# Support Vector Machines
from sklearn.svm import SVC
models.append( {"name": "SVC", "ref": SVC(gamma="scale")} )

# Decision Trees
from sklearn.tree import DecisionTreeClassifier
models.append( {"name": "DT (pruned=4)", "ref": DecisionTreeClassifier(criterion='entropy', max_depth=4)} )

## Datasets

#### Train

In [4]:
df = pd.read_pickle("data/train.pkl")
print(df.shape)
df

(18735, 207)


Unnamed: 0,x000,x001,x002,x003,x004,x005,x006,x007,x008,x009,x010,x011,x012,x013,x014,x015,x016,x017,x018,x019,x020,x021,x022,x023,x024,x025,x026,x027,x028,x029,x030,x031,x032,x033,x034,x035,x036,x037,x038,x039,x040,x041,x042,x043,x044,x045,x046,x047,x048,x049,x050,x051,x052,x053,x054,x055,x056,x057,x058,x059,x060,x061,x062,x063,x064,x065,x066,x067,x068,x069,x070,x071,x072,x073,x074,x075,x076,x077,x078,x079,x080,x081,x082,x083,x084,x085,x086,x087,x088,x089,x090,x091,x092,x093,x094,x095,x096,x097,x098,x099,x100,x101,x102,x103,x104,x105,x106,x107,x108,x109,x110,x111,x112,x113,x114,x115,x116,x117,x118,x119,x120,x121,x122,x123,x124,x125,x126,x127,x128,x129,x130,x131,x132,x133,x134,x135,x136,x137,x138,x139,x140,x141,x142,x143,x144,x145,x146,x147,x148,x149,x150,x151,x152,x153,x154,x155,x156,x157,x158,x159,x160,x161,x162,x163,x164,x165,x166,x167,x168,x169,x170,x171,x172,x173,x174,x175,x176,x177,x178,x179,x180,x181,x182,x183,x184,x185,x186,x187,x188,x189,x190,x191,x192,x193,x194,x195,x196,x197,x198,x199,x200,x201,x202,x203,x204,x205,target
0,3,77.4395,1,0.8533,14.9130,7.4400,0.54,3.522070,0.000000,1043.82,0.720,7,1.1346,0.1733,0.0000,-0.318995,0.000256,1.210252,15.4300,1382.2740,0.720,0.000272,23.4832,0.001704,527.6685,0.0,1.1056,2.995991,0.046392,0.0000,0.1987,3.751083,0.0,3,253.17,0.2096,0.675,0.0,3,0.0178,0.2044,94.8683,2.295,2.925321,4.1667,1.1594,2.1226,60.000,1706.86,0.1853,0.043272,0.0000,0.2880,1,0.0,3,0.0000,0.900,-0.925930,3,0.000296,152.9706,3.459193,0.0652,1.915986,67.082,1.0,5,0.0,,0.0356,0.0000,2.0,0.886684,0.0,0.306582,0.0000,0.0000,0.0000,6.12,3.0974,22.320,1.955933,0.000,0.0000,10.3810,0.0,39.1053,12.78,-0.879191,0.0356,0.1511,1.0790,1.1328,0.3118,4.0625,0.009304,0.00,-6.142256,5.76,0.4578,4,3,1,16.0654,4.5387,0.0000,1,0.0222,0.0000,0.000000,0.00,973.62,0.0,0.000000,0.8,1.1448,0.6088,0.0000,1,38.34,10.8933,3,0.443059,0.0000,5,0.0000,4.1176,4,0.188757,0.0000,0.000000,0.0000,1.1263,3,2.295,1.1497,0.000,0.00,6.66,35.0627,13.0509,1.1627,1,209.34,3.522070,1.955933,1397.0974,0,1931.4836,0.045495,-0.525758,0.0000,-1.219289,0.0000,4.5000,0,1.915986,0.0000,1.1807,67.0820,0.00,1.0713,19950.39,6.2253,4.742285,0.0,2,0.000,0.0000,13.293280,1,211.0660,-1.474187,3.081379,0.0933,2015,1.440,0.0000,-0.925930,0.000568,2,24.1959,0,1704,0.0000,0.0000,-1.330913,0.1911,5,-0.004427,1.1091,0.00,3.152907,2.925321,0.4432,0.99,21.6853,1.0718,3.0,152.9706,2.0,0.0,0.0,0,0.011252,1.0
1,3,12.5110,1,1.9778,25.1193,0.0000,0.00,-0.996876,0.000000,0.00,0.000,6,0.0000,0.0000,0.0000,2.109710,0.000000,-0.934238,23.0435,1457.9697,0.810,0.000220,0.0000,0.000000,0.0000,0.0,1.1103,0.111828,0.000000,0.0000,0.0347,0.415993,0.0,2,0.00,0.2995,0.495,0.0,3,0.0044,0.0000,0.0000,1.620,0.777478,1.6250,0.0000,0.0000,60.000,1343.77,0.2027,0.004672,0.0000,0.0480,1,0.0,2,0.0000,0.810,-4.497039,3,0.000052,0.0000,-2.646247,0.0636,0.403263,60.000,,4,0.0,1.0,0.0000,0.0000,2.0,0.818036,0.0,0.304375,0.0000,0.0000,0.0000,4.95,0.0000,0.000,1.737248,0.000,0.0000,0.0000,0.0,11.3913,1.62,0.560984,0.0444,0.0000,1.1214,1.1552,0.0000,0.0000,0.176948,0.00,-0.055706,0.00,0.0133,4,3,1,0.0000,0.0000,0.0000,1,0.0089,0.0000,0.000000,0.00,105.12,0.0,0.000000,0.9,1.0523,0.1350,0.0000,1,0.00,0.0000,3,2.284246,0.0000,5,0.0000,5.0667,4,-1.008446,0.0000,0.000000,0.0000,1.1214,3,0.000,0.0000,0.000,0.00,1.17,58.4320,0.0000,0.0000,2,3981.33,-0.996876,1.737248,617.9989,0,0.0000,-0.543308,2.684732,0.0000,0.071503,0.0000,2.0000,0,0.403263,0.0000,1.2840,301.4963,0.00,87.5903,18405.81,56.7627,1.174342,0.0,2,0.000,0.0000,-1.718136,1,67.0820,-0.491866,1.766481,0.0000,2018,0.585,0.0000,-4.497039,0.000072,2,50.9762,0,1327,0.0000,0.0000,1.161308,0.6800,5,1.119686,1.1214,0.00,-2.594028,0.777478,0.0450,27.72,2.0853,0.0000,3.0,0.0000,2.0,0.0,0.0,0,0.000000,1.0
2,3,18.5793,1,0.4889,10.4918,0.9533,0.63,1.749067,0.000000,97.38,0.000,7,1.1494,0.0720,0.0000,0.646423,0.000124,-0.407236,11.0795,3928.3637,0.630,0.000576,15.4689,0.000000,211.0660,0.0,1.0945,2.798307,0.004328,0.0000,0.1120,-2.228379,0.0,4,486.99,0.1144,0.810,1.0,3,0.1244,0.2889,0.0000,2.745,-2.564040,3.5000,1.1701,2.3809,60.000,1847.92,0.3213,0.023440,0.0000,2.1187,1,0.0,3,0.0000,1.080,-0.622749,3,0.000140,1705.7843,-2.541260,0.0651,1.043497,67.082,0.0,5,0.0,0.0,0.0133,0.0000,,0.940496,0.0,-3.566325,0.0000,0.0000,0.0000,12.96,0.0000,11.250,-0.483398,0.000,0.0000,0.0000,0.0,32.4675,107.82,-0.159943,0.0489,0.0222,1.1059,1.1414,0.4243,2.2500,0.004460,0.00,-8.512552,2.79,0.6178,5,3,1,23.1351,9.0827,0.0000,1,0.0178,0.0000,0.000000,0.00,527.40,0.0,0.000000,2.4,1.1215,0.3010,0.0000,1,0.00,12.4480,4,0.302760,0.0000,5,0.0000,5.2083,4,-0.922660,0.0000,0.000000,0.0000,1.1347,3,2.520,0.0000,0.000,0.00,3.15,24.6480,5.5455,0.0000,2,100.35,1.749067,-0.483398,973.4988,0,2229.9103,0.567358,3.323676,0.0000,0.988273,0.0000,11.4000,0,1.043497,0.0000,1.1564,2902.9062,0.00,0.8920,21161.16,3.1773,7.312340,0.0,2,0.000,0.0000,-0.229996,2,295.4657,0.659240,-0.189567,0.0000,2020,0.765,0.0000,-0.622749,0.004792,2,17.9104,0,1860,0.0000,0.0000,2.938327,0.1200,5,1.623314,1.1141,0.00,2.801389,-2.564040,1.1270,0.90,13.2720,1.0640,3.0,67.0820,2.0,0.0,0.0,0,0.021644,0.0
3,3,37.0018,2,1.5911,29.7133,28.1413,0.72,1.086428,0.012236,8414.73,0.630,5,1.1154,0.5800,0.0356,-0.207179,0.001660,0.327794,27.2020,453.3721,0.810,0.000000,394.8397,0.000656,134.1641,0.0,0.0000,-0.567656,0.373988,3.0880,0.3587,1.261983,0.0,2,0.00,0.5854,0.000,0.0,4,0.1333,0.0000,666.1081,0.765,-0.289264,5.2308,0.0000,3.1474,60.000,843.72,0.0000,0.038656,0.0000,1.3813,1,0.0,2,1.1214,0.990,-0.595020,3,0.000656,84.8528,0.225479,0.0701,-2.114084,67.082,0.0,5,0.0,1.0,0.0756,171.0469,1.0,0.520500,0.0,0.620245,0.0000,1.4253,1.1170,0.00,0.2044,65.250,-2.349034,0.810,0.0044,7.6538,0.0,44.5076,61.74,-0.006320,0.0000,0.1822,1.1289,1.1178,3.1599,5.3659,0.046400,275.31,7.100842,37.35,0.5333,3,2,1,0.0000,0.0000,53.3957,1,0.0444,0.0000,0.000000,0.00,869.76,0.0,0.002448,2.2,1.1161,1.5211,10.4324,1,14.76,0.0000,3,-1.509055,1.6250,5,10.7800,0.0000,5,2.200987,0.0347,0.000056,0.0000,1.1313,3,0.000,1.0995,0.000,1.26,14.76,55.9960,18.0163,0.5280,2,1044.00,1.086428,-2.349034,0.0000,0,174.9286,0.543135,-3.098486,1.1698,0.270680,0.0000,9.8679,0,-2.114084,2.6422,1.1721,121.8466,1.26,9.6768,11711.25,24.9013,-1.716554,0.0,3,13.680,0.1511,-0.751243,1,67.0820,1.692199,-0.817929,0.0978,2020,0.990,829.1820,-0.595020,0.002744,3,43.3750,0,838,0.0000,0.0000,-1.632378,1.1111,6,1.031328,1.1313,55.08,-2.224542,-0.289264,0.0000,1.26,23.3093,1.0699,3.0,0.0000,2.0,0.0,0.0,0,0.000000,0.0
4,3,6.0767,1,0.0089,15.6965,9.1427,0.90,-0.857957,0.046752,2393.64,0.495,6,1.0882,0.1947,0.0489,-0.616561,0.000268,-0.372517,15.2975,1872.9870,1.080,0.000000,83.0177,0.000044,1340.6342,0.0,0.0000,-3.118512,0.106384,9.9867,0.1200,0.854402,0.0,2,0.99,0.3145,0.000,1.0,4,0.0222,0.0089,2965.7545,0.720,-1.353324,3.7500,1.1240,0.9231,67.082,1015.37,0.0000,0.026424,0.0000,0.1893,1,0.0,2,1.0831,0.630,-4.954999,3,0.000144,134.1641,-0.174665,0.0768,0.714458,60.000,0.0,5,0.0,0.0,0.0311,726.2231,2.0,0.819356,0.0,0.598902,0.0000,0.1573,0.0000,0.00,0.0450,19.890,-2.679649,0.540,0.0000,2.0000,0.0,47.6810,6.75,0.643944,0.0000,0.1911,1.1065,1.1088,0.4217,4.2941,0.000096,1051.92,-0.560491,6.03,0.8933,4,2,1,2.2857,0.0427,175.7680,1,0.0178,0.0000,0.000000,0.00,594.54,0.0,0.000188,1.0,1.0956,0.5248,17.2995,1,0.99,0.0450,3,0.968503,0.0000,5,4.2143,0.0000,6,-0.349614,0.0000,0.000000,0.0000,1.1383,3,0.495,1.0948,0.000,0.00,3.24,35.6587,10.8379,0.0373,2,2.16,-0.857957,-2.679649,0.0000,0,878.2369,2.090833,0.942994,1.2715,-1.217891,0.0000,3.9444,0,0.714458,0.1045,1.1802,227.9706,0.00,0.0900,18435.51,0.0667,-4.379485,0.0,3,23.220,0.0311,-0.983125,1,5757.5776,-0.262855,-0.197279,0.0089,2019,0.540,94.8683,-4.954999,0.000300,3,2.6000,0,1007,0.0000,0.0000,1.903067,0.4222,6,1.211350,1.1297,4.23,-1.309924,-1.353324,0.0000,1.26,20.6573,1.1216,3.0,2379.2856,2.0,0.0,0.0,0,0.000044,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18730,3,5.0726,1,0.0356,19.8635,34.8613,0.54,-1.993010,0.008304,14549.40,0.000,6,1.1046,0.0133,0.0356,-0.990054,0.000024,1.264842,17.1425,232.4647,0.585,0.000000,2161.0574,0.000000,0.0000,0.0,0.0000,3.050528,0.646640,1.9907,0.1240,0.468389,0.0,2,0.00,0.4804,0.000,1.0,4,0.0000,0.0000,0.0000,0.000,3.445438,3.6923,0.0000,0.0000,67.082,1159.37,0.0000,0.027156,0.0000,0.0000,1,0.0,2,1.1021,0.810,9.020818,3,0.000148,60.0000,-0.205221,0.0698,1.623486,60.000,0.0,4,0.0,0.0,0.0044,2299.6521,2.0,0.317216,0.0,0.804271,0.0000,0.1467,0.0000,0.00,0.0000,11.475,3.801038,0.765,0.0000,0.0000,0.0,44.8303,0.00,-0.685618,0.0000,0.1422,1.1214,0.0000,0.0000,1.0000,0.000292,186.84,0.077574,0.54,0.8622,4,2,1,0.0000,0.0000,16.9735,1,0.0311,0.0000,0.000000,0.00,611.01,0.0,0.000220,1.0,1.1065,0.1045,8.3152,1,0.00,0.0000,3,-1.366609,0.0000,5,3.6667,0.0000,5,1.433264,0.0000,0.000000,0.0000,1.1327,3,0.000,0.0000,0.000,0.00,3.33,29.0773,17.0509,0.0000,2,6.57,-1.993010,3.801038,0.0000,0,0.0000,0.767375,-2.085622,1.1721,-0.724036,0.0000,0.0000,0,1.623486,0.3502,1.1694,67.0820,0.00,0.5538,7137.36,0.2293,0.340494,0.0,2,19.755,0.0267,3.058725,1,400.1361,1.380059,0.667806,0.0000,2020,0.450,1142.9537,9.020818,0.000000,3,4.7778,0,1159,0.0000,0.0000,0.861595,1.1867,6,-0.680611,1.1114,4.95,2.900628,3.445438,0.0000,0.00,19.6507,1.0048,3.0,0.0000,2.0,0.0,0.0,0,0.000000,0.0
18731,3,0.0000,1,0.6844,16.3844,8.3800,1.08,-1.042947,0.001732,1018.62,0.000,7,1.1773,1.0907,0.0089,-0.323509,0.002864,1.043297,16.4810,3762.7393,0.720,0.013456,46.1453,0.000000,94.8683,0.0,1.1112,-1.717672,0.045272,0.3040,6.9560,2.300763,0.0,3,589.95,0.2355,0.765,0.0,3,0.4711,0.6844,0.0000,1.080,2.866560,18.6738,1.1214,3.2462,67.082,1172.92,3.6587,0.000000,0.1331,7.1107,1,0.0,2,1.1065,2.160,3.024214,4,0.019880,480.0000,-2.075588,0.0632,0.030920,0.000,1.0,3,2.6,0.0,0.1378,657.9514,2.0,0.872348,0.0,-0.445063,1.0822,0.7587,0.0000,302.76,0.0000,23.310,-3.341806,0.540,0.0000,0.0000,0.0,0.0000,198.54,0.221361,0.1600,0.1022,1.1292,1.1468,2.6530,7.5926,0.008352,38.97,1.816522,64.44,0.0000,5,3,1,24.0679,10.3413,8.5050,1,0.5556,0.0667,0.000084,1.89,0.00,0.0,0.000968,0.6,1.0901,9.4503,2.7143,1,0.00,11.6802,4,0.785432,0.0000,5,8.9688,12.2845,5,-0.844890,0.0000,0.000000,0.0000,1.1125,4,0.990,0.0000,0.495,0.00,447.30,38.6907,15.1690,0.0000,1,187.92,-1.042947,-3.341806,136.8466,0,1146.2984,0.547120,-1.560493,1.1192,-0.534576,0.0000,28.4149,0,0.030920,0.8029,0.0000,120.0000,0.00,1.3244,19627.83,5.2080,0.277687,0.0,2,19.485,0.1200,-0.704256,1,127.2792,-0.868799,0.187120,0.0000,2020,0.900,8792.2011,3.024214,0.008824,2,21.3370,0,1187,550.2124,0.0178,1.131979,0.1156,5,1.937079,1.1094,21.78,2.600166,2.866560,41.2852,0.00,0.0000,1.0775,4.0,120.0000,2.0,0.0,0.0,0,0.026220,1.0
18732,3,17.9158,1,0.0267,20.7358,37.3360,0.00,-2.724210,0.004300,14540.40,0.450,5,1.1333,0.0000,0.0178,1.611334,0.000000,1.433179,18.1615,350.8432,0.765,0.000000,2649.0969,0.000020,0.0000,0.0,0.0000,-0.566577,0.646240,0.8320,0.0000,1.721659,0.0,2,0.00,0.4804,0.000,1.0,4,0.0000,0.0000,0.0000,0.000,1.035994,0.0000,0.0000,0.0000,67.082,1131.84,0.0000,0.031496,0.0000,0.0000,1,0.0,2,1.0822,0.810,0.203151,3,0.000000,92.4342,-1.763402,0.0719,-4.793332,60.000,0.0,5,0.0,0.0,0.0000,8778.5249,1.0,0.317684,0.0,0.517622,0.0000,0.0480,0.0000,0.00,0.0000,9.090,-3.809018,0.900,0.0000,1.4000,0.0,47.5562,0.00,-0.869272,0.0000,0.0978,1.1283,0.0000,0.0000,0.0000,0.000180,96.75,6.890848,0.00,0.6400,3,2,1,0.0000,0.0000,18.2951,1,0.0000,0.0000,0.000000,0.00,708.66,0.0,0.000080,2.0,0.0000,0.0000,4.7273,1,0.45,0.0000,3,0.310528,0.0000,5,2.0000,0.0000,5,0.629352,0.0000,0.000000,0.0000,1.1366,3,0.000,1.1065,0.000,0.00,0.00,30.6493,18.3756,0.0187,1,4.05,-2.724210,-3.809018,0.0000,0,0.0000,-0.706335,-0.258465,1.1454,0.942444,0.0000,0.0000,0,-4.793332,0.2700,1.1610,0.0000,0.00,0.2423,7147.89,0.1320,0.794835,0.0,2,22.050,0.0089,-5.523364,1,165.0000,1.226168,0.584807,0.0044,2020,0.000,5164.1602,0.203151,0.000000,2,3.7143,0,1132,0.0000,0.0000,2.478470,1.1156,5,1.360655,1.1100,1.80,-0.198991,1.035994,0.0000,1.26,22.5120,0.0000,3.0,0.0000,2.0,0.0,0.0,0,0.000000,0.0
18733,3,23.2971,1,0.1822,18.2378,36.6253,0.81,0.923949,0.002928,9497.07,0.855,6,1.1339,1.2813,0.0222,0.396402,0.010892,0.970629,21.4070,447.9876,0.720,0.000044,1344.0741,0.004744,120.0000,0.0,1.1100,-1.596039,0.422092,0.9160,0.5573,-0.457189,0.0,2,130.95,0.5602,0.990,0.0,3,0.2933,0.2622,318.4532,0.720,-0.377663,7.0667,1.1297,2.9742,90.000,1290.65,0.0267,0.034196,0.0000,2.5933,1,0.0,2,1.0945,0.900,-2.074279,3,0.000872,123.6932,-1.481413,0.0729,-0.382435,67.082,0.0,3,0.0,1.0,0.0800,150.0000,2.0,0.510200,0.0,-1.274383,0.0000,0.5987,1.1624,0.99,3.3329,9.720,-3.100664,0.720,0.0311,18.3043,0.0,49.9351,110.88,-0.110599,0.0044,0.0756,1.1173,1.1141,49.1188,4.5905,0.002196,65.88,-0.572045,245.07,0.6889,4,3,1,15.5974,3.1827,2.6811,1,0.1067,0.0000,0.000000,0.00,769.41,0.0,0.000804,9.3,1.0795,0.3853,6.3455,1,106.74,3.6257,3,0.493732,5.7647,4,7.7931,1.5714,5,0.373214,0.2600,0.000284,816.0882,1.1283,3,0.810,1.1512,0.000,6.39,19.62,33.3947,21.7523,3.3507,1,49.41,0.923949,-3.100664,0.0000,0,330.0000,1.555632,-0.061063,1.1947,-0.053484,0.6206,13.9014,0,-0.382435,0.4190,1.1899,229.1004,0.54,1.0043,11479.50,1.5053,-0.751790,0.0,2,14.220,0.1022,-2.241254,1,120.0000,0.946021,-0.341185,0.2578,2019,0.630,1647.5436,-2.074279,0.004928,2,12.0851,0,1309,0.0000,0.0000,2.067862,1.1378,5,1.564234,1.1183,18.09,-0.476205,-0.377663,0.0000,1.53,24.5453,1.0619,3.0,94.8683,2.0,0.0,0.0,0,0.005820,0.0


In [5]:
df_score = pd.read_pickle("data/test.pkl")
print(df_score.shape)
df_score

(6265, 206)


Unnamed: 0,x000,x001,x002,x003,x004,x005,x006,x007,x008,x009,x010,x011,x012,x013,x014,x015,x016,x017,x018,x019,x020,x021,x022,x023,x024,x025,x026,x027,x028,x029,x030,x031,x032,x033,x034,x035,x036,x037,x038,x039,x040,x041,x042,x043,x044,x045,x046,x047,x048,x049,x050,x051,x052,x053,x054,x055,x056,x057,x058,x059,x060,x061,x062,x063,x064,x065,x066,x067,x068,x069,x070,x071,x072,x073,x074,x075,x076,x077,x078,x079,x080,x081,x082,x083,x084,x085,x086,x087,x088,x089,x090,x091,x092,x093,x094,x095,x096,x097,x098,x099,x100,x101,x102,x103,x104,x105,x106,x107,x108,x109,x110,x111,x112,x113,x114,x115,x116,x117,x118,x119,x120,x121,x122,x123,x124,x125,x126,x127,x128,x129,x130,x131,x132,x133,x134,x135,x136,x137,x138,x139,x140,x141,x142,x143,x144,x145,x146,x147,x148,x149,x150,x151,x152,x153,x154,x155,x156,x157,x158,x159,x160,x161,x162,x163,x164,x165,x166,x167,x168,x169,x170,x171,x172,x173,x174,x175,x176,x177,x178,x179,x180,x181,x182,x183,x184,x185,x186,x187,x188,x189,x190,x191,x192,x193,x194,x195,x196,x197,x198,x199,x200,x201,x202,x203,x204,x205
0,3,4.2241,1,0.0000,23.5068,29.6787,0.450,1.980838,0.163172,16295.85,0.00,5,1.0945,0.0533,0.1333,0.430486,0.000064,-0.552766,15.8210,30.8575,0.000,0.000000,3518.1899,0.000000,0.0000,0.000,0.0000,-2.669028,0.724260,11.5360,0.0000,1.363201,0.0000,2,0.00,0.4409,0.000,0.0,4,0.0000,0.0000,0.0000,0.00,0.292289,0.0000,0.0000,0.0000,90.000,990.09,0.0000,0.027496,0.0,0.0000,1,0.000,2,1.1065,0.72,-0.383887,3,0.000000,60.0000,-0.999931,0.0676,2.405041,67.0820,1.0,5,0.0,,0.0133,522.5555,,0.084792,0.0000,0.252765,0.0,0.1747,0.0000,0.00,0.0000,2.340,-4.421815,0.54,0.0000,0.0000,0.000000,44.1145,0.00,0.257967,0.0000,0.0711,1.1169,0.0000,0.0424,2.5000,0.000000,3671.37,-1.765610,1.44,1.1022,3,2,1,0.0000,0.0000,216.0818,1,0.0000,0.0,0.0,0.0,618.66,0.0,0.000216,2.4,0.0000,0.0000,10.9480,1,0.00,0.0000,3,-0.101281,0.0000,5,4.4667,0.0000,5,-0.099349,0.0000,0.000000,0.0,1.1216,3,0.000,0.0000,0.0,0.00,0.00,18.1667,14.0200,0.0000,2,0.00,1.980838,-4.421815,0.0000,0,3707.0339,0.255978,1.562013,1.0958,-0.211940,0.0000,0.0000,0,2.405041,0.1786,1.1440,0.0000,0.00,0.0000,1907.82,0.0000,2.883239,0.0000,2,58.005,0.0356,-2.337341,1,0.0000,0.412650,-4.120612,0.0000,2021,0.000,227.9023,-0.383887,0.000000,2,0.0000,0,995,0.0,0.0,-2.854167,1.2489,5,1.875138,0.0000,4.86,1.518522,0.292289,0.0000,0.99,19.4360,1.0699,3.0,0.0000,1.0,0.00,0.000,0,0.000000
1,3,0.0000,1,1.3244,6.6164,0.0000,0.540,-2.606239,0.000000,0.00,0.00,7,0.0000,0.0160,0.0000,-0.835329,0.000024,-0.561637,6.5965,9702.2157,0.765,0.000000,0.0000,0.000000,0.0000,0.000,0.0000,-0.999451,0.000000,0.0000,0.4960,1.520167,0.0000,3,2.07,0.0499,0.000,0.0,3,0.0000,0.0044,0.0000,0.00,-0.457013,7.2692,1.1671,0.0000,60.000,1465.89,0.0000,0.003376,0.0,0.0000,1,0.000,2,0.0000,905.58,1.763969,3,0.000676,0.0000,-1.396045,0.0560,0.625527,0.0000,1.0,4,0.0,0.0,0.0044,0.0000,2.0,0.974472,0.0000,-0.245821,0.0,0.0000,0.0000,0.00,0.0000,0.000,-2.442345,0.00,0.0000,0.0000,0.000000,5.2542,0.00,-1.137698,0.0000,0.0000,1.1557,0.0000,0.0000,1.2000,0.021360,0.00,-1.965061,0.54,0.0044,5,3,1,2.2000,0.0587,0.0000,1,0.1022,0.0,0.0,0.0,75.96,0.0,0.000000,1.3,1.0831,0.3147,0.0000,1,0.00,0.0000,4,-1.083037,0.0000,5,0.0000,0.0000,3,0.018753,0.0000,0.000000,0.0,1.1347,3,2.070,0.0000,0.0,0.00,15.21,14.8267,0.0000,0.0000,2,480.60,-2.606239,-2.442345,0.0000,0,0.0000,-1.329380,0.826567,0.0000,0.175797,0.0000,0.0000,0,0.625527,0.0000,1.2473,379.4733,0.00,3.3791,21925.62,13.6387,0.838948,0.0000,2,0.000,0.0000,-0.176897,1,90.0000,0.784310,0.165931,0.0000,2015,0.540,0.0000,1.763969,0.000000,2,34.8912,0,1456,0.0,0.0,2.020684,0.0133,4,-2.273555,1.1382,0.00,3.998988,-0.457013,0.0000,75.96,0.8120,1.0472,3.0,0.0000,2.0,0.00,0.000,0,0.000092
2,3,6.7518,1,0.0089,31.1296,47.9627,0.000,1.907240,0.000028,16537.50,0.00,6,1.1297,0.0000,0.0044,-0.335846,0.000000,-0.323353,21.2705,71.2133,0.630,0.000000,3490.4073,0.000000,0.0000,0.000,0.0000,0.923170,0.735000,0.0267,1.1227,2.840879,0.0000,3,0.00,0.4035,0.000,1.0,3,0.0000,0.0000,0.0000,0.00,6.346902,10.5500,0.0000,0.0000,67.082,1552.56,0.0000,0.026616,0.0,0.0000,1,0.000,2,1.0945,0.81,6.631989,3,0.001524,60.0000,-2.126982,0.0658,-0.261761,60.0000,0.0,5,0.0,0.0,0.0000,295.4657,2.0,0.235780,0.0000,0.657043,0.0,0.6360,0.0000,0.00,0.0000,1.980,-0.166256,0.63,0.0000,0.0000,0.000000,42.7866,0.00,-0.974155,0.0000,0.0933,1.1248,0.0000,0.0000,0.0000,0.000056,0.63,7.205096,0.00,0.9067,4,3,1,0.0000,0.0000,0.0000,1,0.0622,0.0,0.0,0.0,598.86,0.0,0.000996,0.8,1.1773,2.7077,1.6667,1,0.00,0.0000,3,0.522825,0.0000,5,7.4688,0.0000,5,-0.658656,0.0000,0.000000,0.0,1.1347,3,0.000,0.0000,0.0,0.00,34.29,39.6613,21.8030,0.0000,2,1.26,1.907240,-0.166256,0.0000,0,0.0000,0.065581,-5.574248,1.1455,3.147528,0.0000,0.0000,0,-0.261761,0.5913,1.1610,202.0820,0.00,0.1800,5305.05,0.0453,2.258411,0.0000,2,0.630,0.1111,2.465039,1,10296.3392,-0.496219,0.949382,0.0000,2020,1.395,0.0000,6.631989,0.000000,2,2.1250,0,1548,0.0,0.0,-2.768246,1.8667,5,0.297217,1.1023,22.41,0.599163,6.346902,0.0000,1.17,18.6547,0.0000,3.0,0.0000,2.0,0.00,0.000,0,0.000000
3,3,87.7282,1,1.5689,27.8614,5.8093,0.765,-2.373402,0.006536,1470.42,0.00,6,1.1609,1.3280,0.0356,-0.684918,0.002912,-0.429417,27.0710,711.0611,0.720,0.000000,335.8771,0.000000,0.0000,0.000,0.0000,-1.187768,0.065352,2.3973,1.1280,-1.602373,0.0000,2,62.28,0.3820,0.000,1.0,3,0.0000,0.0933,0.0000,0.00,-0.880441,10.0714,1.1663,0.0000,60.000,1445.68,0.0000,0.027008,0.0,0.0000,1,0.000,2,0.0000,0.72,-0.379069,3,0.001752,120.0000,-0.163890,0.0735,2.427688,67.0820,0.0,4,0.0,0.0,0.1689,0.0000,2.0,0.773664,0.0000,0.429807,0.0,0.0000,0.0000,0.00,0.0000,23.310,1.845067,0.00,0.0000,0.0000,0.000000,44.0485,0.00,-0.992454,0.0000,0.0489,1.1268,0.0000,4.1570,9.2593,0.120008,147.06,-4.599989,65.52,0.1200,3,3,1,10.6415,1.4987,23.8255,1,0.1289,0.0,0.0,0.0,607.68,0.0,0.000000,0.6,1.1355,1.6655,11.1358,1,0.00,1.9811,3,1.040476,0.0000,5,0.0000,0.0000,4,1.026824,0.0000,0.000000,0.0,1.1383,3,2.250,0.0000,0.0,0.00,39.42,63.8640,8.9648,0.0000,1,2700.18,-2.373402,1.845067,0.0000,0,372.2724,-0.416422,3.681627,1.1905,-0.199133,0.0000,0.0000,0,2.427688,0.0000,1.2178,216.3331,0.00,51.1180,17407.44,43.7080,-1.485136,0.0000,2,3.195,0.0000,-2.860613,1,67.0820,1.717978,-1.129010,0.0000,2021,0.810,60.0000,-0.379069,0.000000,2,48.0461,0,1447,0.0,0.0,0.788583,1.2178,5,-0.472918,1.1383,0.00,0.317534,-0.880441,0.0000,1.80,19.3120,1.0923,3.0,240.0000,2.0,0.00,0.000,0,0.002768
4,2,97.0239,1,4.0756,47.6245,12.0040,0.720,-1.940630,0.000000,2571.48,0.72,5,1.1263,0.3693,0.0000,0.497241,0.000620,0.737561,44.2645,482.5136,0.720,0.000172,115.1449,0.003436,2531.4028,0.000,1.0902,3.046115,0.114288,0.0000,1.2773,-6.039875,0.0000,2,325.35,0.5197,0.810,1.0,2,0.0222,0.3022,167.7267,2.25,-2.739155,8.7091,1.1473,1.4472,60.000,1375.44,0.1253,0.038564,0.0,0.2173,1,0.000,2,0.0000,0.99,3.229866,2,0.003004,67.0820,3.266722,0.0664,0.432787,67.0820,0.0,4,0.0,0.0,0.0578,0.0000,,0.662764,0.0000,-0.164857,0.0,0.0000,1.1673,3.87,4.3541,23.310,7.379442,0.00,0.0044,12.3220,0.000000,43.8629,12.51,-2.375249,0.0222,0.1644,1.1347,1.0909,0.7509,5.7200,0.162108,0.00,-5.330209,13.95,0.2089,3,3,1,19.0744,6.1440,0.0000,1,0.1156,0.0,0.0,0.0,867.69,0.0,0.000000,3.3,1.0973,5.0370,0.0000,1,77.31,4.6525,3,0.268656,1.8333,5,0.0000,3.3571,3,-0.686159,0.0293,0.000028,0.0,1.1383,3,3.240,1.1281,0.0,0.63,67.59,101.6040,13.5221,1.9347,1,3647.43,-1.940630,7.379442,3035.3418,0,658.6350,-0.045303,1.402204,0.0000,-1.367509,0.0000,3.4583,0,0.432787,0.0000,1.1839,408.0441,0.63,47.2410,14912.19,84.0573,-4.168653,0.0000,2,0.000,0.0000,-0.425156,1,67.0820,-1.890956,0.428630,0.1867,2016,1.215,0.0000,3.229866,0.000556,2,78.6402,0,1371,0.0,0.0,2.342365,1.4844,4,0.965675,1.1383,0.00,-0.960147,-2.739155,0.2319,1.53,22.9813,1.0699,3.0,123.6932,2.0,0.00,0.000,0,0.014460
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6260,5,64.4097,3,0.0000,24.3589,3.0053,0.720,-2.234310,0.083360,236.16,0.63,4,1.2009,2.9413,0.3600,1.261977,0.005200,2.442722,26.4205,990.6015,0.000,0.000000,19.9226,0.002856,67.0820,0.585,0.0000,5.816715,0.010496,16.5640,0.0160,-5.622266,1.0589,1,50.76,0.3346,0.000,0.0,6,0.8222,0.2711,210.0000,0.81,-2.482120,1.2000,1.1347,6.0270,60.000,352.70,0.0000,0.056148,0.0,11.5933,1,1.625,1,1.1082,0.00,4.640878,6,0.000024,140.7775,0.265302,0.0715,-2.393854,60.0000,0.0,6,0.0,1.0,0.5600,67.0820,1.0,0.809056,0.0347,-0.105289,0.0,4.6173,1.0921,0.00,9.3527,14.805,5.553127,0.81,0.0311,7.9630,0.000052,59.4895,481.77,0.387232,0.0000,0.0444,1.0803,1.1355,0.5753,15.2055,0.000000,1875.60,-0.858579,117.00,0.5111,2,2,3,13.9167,1.7787,37.7003,3,0.0044,0.0,0.0,0.0,1263.33,60.0,0.008808,4.7,1.0472,0.0000,21.7336,1,64.26,0.6040,2,-1.093845,4.2105,6,18.5426,0.0000,6,0.537753,0.2133,0.000332,60.0,1.1100,2,0.630,1.1065,0.0,7.47,0.54,56.2320,10.9417,1.1453,5,0.00,-2.234310,5.553127,0.0000,0,456.9464,-0.712108,1.206151,1.1375,1.465632,0.3483,29.6667,0,-2.393854,3.9144,1.1347,0.0000,1.08,0.0000,18203.76,0.0000,-0.265892,0.0089,6,10.800,0.4178,-4.373753,1,0.0000,0.958902,-0.150691,0.1022,2021,0.540,67.0820,4.640878,0.021412,5,0.0000,1,365,0.0,0.0,4.993787,0.6844,6,0.162529,0.0000,198.18,-1.841729,-2.482120,0.0000,0.81,37.4347,1.0516,2.0,182.4829,1.0,1.17,0.135,0,0.002256
6261,2,0.0000,1,0.7556,4.9334,0.0000,7.740,1.236457,0.000000,0.00,0.00,6,0.0000,0.1680,0.0000,0.561012,0.000688,0.149861,4.9925,7209.7789,0.720,0.000000,0.0000,0.000000,0.0000,0.000,0.0000,2.112211,0.000000,0.0000,0.6520,-0.076278,0.0000,3,0.00,0.0374,0.000,1.0,2,0.0000,0.0000,0.0000,0.00,-1.795855,7.3235,0.0000,0.0000,60.000,1421.48,0.0000,0.006388,0.0,0.0000,1,0.000,2,0.0000,0.81,-1.292921,2,0.001156,0.0000,2.625066,0.0637,1.808144,0.0000,0.0,4,0.0,0.0,0.0089,0.0000,2.0,0.981024,0.0000,-0.274839,0.0,0.0000,0.0000,0.00,0.0000,0.000,5.083203,0.00,0.0000,0.0000,0.000000,12.1125,0.00,0.466383,0.0000,0.0000,1.0984,0.0000,1.8900,2.3333,0.010744,0.00,-1.633875,15.48,0.0044,4,3,1,0.0000,0.0000,0.0000,1,0.0844,0.0,0.0,0.0,143.73,0.0,0.000000,1.3,1.0831,1.5734,0.0000,1,0.00,0.0000,3,0.426512,0.0000,4,0.0000,0.0000,3,-0.488607,0.0000,0.000000,0.0,1.1328,3,0.000,0.0000,0.0,0.00,26.01,10.4480,0.0000,0.0000,1,241.74,1.236457,5.083203,0.0000,0,3134.5335,0.116552,-1.691418,0.0000,0.492671,0.0000,0.0000,0,1.808144,0.0000,1.3520,569.2100,0.00,1.8211,22073.04,7.4493,-1.405560,0.0000,2,0.000,0.0000,8.322939,1,84.8528,1.089389,2.448507,0.0000,2020,0.900,0.0000,-1.292921,0.000000,2,27.0673,0,1403,0.0,0.0,-2.688401,0.0267,4,0.134865,1.1395,0.00,-0.633933,-1.795855,0.0000,143.73,2.5760,1.0858,3.0,0.0000,2.0,0.00,0.000,0,0.000000
6262,3,5.6391,1,0.8356,4.5338,0.4613,0.900,3.210738,0.001504,76.41,1.62,6,1.1010,0.0187,0.0044,0.476302,0.000040,-0.580793,4.6090,10326.9166,0.630,0.000000,7.6050,0.000144,591.6925,0.000,0.0000,1.645344,0.003396,0.1760,0.0000,-1.352946,0.0000,3,71.91,0.0373,0.000,1.0,3,0.0267,0.1911,120.0000,0.63,-4.256409,0.0000,1.1289,0.1855,60.000,1408.54,0.0000,0.000868,0.0,0.1467,1,0.000,2,0.0000,110.79,-3.812181,3,0.000000,1056.8349,-0.659100,0.0530,2.537036,94.8683,0.0,3,0.0,0.0,0.0044,0.0000,,0.981128,0.0000,1.075724,0.0,0.0000,0.0000,0.00,0.5400,38.205,-0.676031,0.00,0.0000,3.0000,0.000000,5.4333,4.05,-0.736903,0.0000,0.0089,1.0600,1.1093,0.0000,1.0000,0.009544,33.84,-10.149250,0.90,0.0222,4,3,1,12.5088,1.9013,0.0000,1,0.0000,0.0,0.0,0.0,19.53,0.0,0.000000,0.6,0.0000,0.0000,1.6923,1,3.24,2.8463,3,0.553616,0.0000,4,0.0000,0.0000,5,-0.387347,0.0000,0.000000,0.0,1.1141,3,0.810,1.1436,0.0,0.00,0.00,9.3547,2.9322,0.0947,1,214.74,3.210738,-0.676031,0.0000,0,0.0000,0.457135,1.963269,1.0835,-0.688746,0.0000,3.9286,0,2.537036,0.0000,1.1267,0.0000,0.00,3.6154,22075.38,6.6747,4.492799,0.0000,2,33.840,0.0000,5.103708,1,152.9706,-0.492499,0.677394,0.0089,2021,0.000,0.0000,-3.812181,0.000180,2,25.6327,0,1412,0.0,0.0,1.734925,0.0133,5,-0.119728,1.1141,0.00,2.699156,-4.256409,0.0000,1.17,0.4200,1.0223,3.0,108.1665,2.0,0.00,0.000,0,0.003196
6263,3,0.0000,1,2.6889,19.6045,0.0000,0.000,-5.095745,0.000000,0.00,0.00,5,0.0000,0.0000,0.0000,0.603294,0.000000,-0.139157,19.7540,2153.3850,0.720,0.004212,0.0000,0.000000,247.3863,0.000,1.1403,-3.782023,0.000000,0.0000,0.1600,0.613642,0.0000,2,46.08,0.1994,0.720,0.0,4,0.0267,0.0711,0.0000,2.43,-2.106944,4.0000,1.1639,1.8484,60.000,1546.90,2.7707,0.001344,0.0,0.3520,1,0.000,2,0.0000,0.81,-1.752915,3,0.000208,0.0000,-1.292361,0.0623,-0.200127,0.0000,1.0,5,0.0,0.0,0.0000,0.0000,2.0,0.888848,0.0000,-1.138674,0.0,0.0000,0.0000,94.77,0.0000,0.000,3.337798,0.00,0.0000,0.0000,0.000000,5.2162,15.75,-0.033797,0.2844,0.0000,1.1138,1.1312,0.0000,0.0000,0.102640,0.00,4.492470,0.00,0.0044,3,2,1,9.2174,1.1293,0.0000,1,0.0400,0.0,0.0,0.0,30.24,0.0,0.000000,0.4,1.0699,0.0927,0.0000,1,0.00,1.3288,3,1.127725,0.0000,5,0.0000,16.0462,5,-1.539538,0.0000,0.000000,0.0,1.1383,3,2.610,0.0000,0.0,0.00,4.68,46.7973,0.0000,0.0000,2,2309.40,-5.095745,3.337798,114.0833,0,0.0000,2.019932,-2.399245,0.0000,0.285714,0.0000,4.9259,0,-0.200127,0.0000,1.2632,1441.5617,0.00,43.5941,19999.08,48.3027,-5.586931,0.0000,2,0.000,0.0000,1.790167,1,67.0820,0.246319,5.818289,0.0000,2019,0.450,0.0000,-1.752915,0.000700,2,56.5981,0,1534,0.0,0.0,-3.153808,0.3733,5,0.337841,1.1415,0.00,0.736060,-2.106944,1.8361,30.24,0.5093,0.0000,3.0,396.1158,,0.00,0.000,0,0.002048


## Model

#### Preprocessing

In [6]:
df.fillna(0, inplace=True)
df['target'] = df['target'].astype(int)
df_score.fillna(0, inplace=True)

In [7]:
df_train, df_test = train_test_split(df, stratify=df.target, test_size=0.4, random_state=SEED)
df_train.shape, df_test.shape

((11241, 207), (7494, 207))

In [8]:
target = 'target'
features = df_train.columns.tolist()
features.remove(target)
len(features)

206

In [9]:
ss = StandardScaler()
X_train = ss.fit_transform(df_train[features])
y_train = df_train[target]

X_test = ss.fit_transform(df_test[features])
y_test = df_test[target]


#### Model Selection

Go through every classifier and quickly score it.

In [10]:
classifying_classifiers = False

In [11]:
if classifying_classifiers:
    for classifier in classifiers:
        model = classifiers[classifier]
        scores = cross_val_score(model, X_train, y_train, cv=10)
        print(classifier + ": " + str(scores.mean()) + ", " + str(scores.std()))


#### Choosing model for current output

In [12]:
classifier = "DT(max_depth=9)"

In [13]:
model = classifiers[classifier]

#### Hyperparameter Tuning

#### Dimensionality Reduction

In [14]:


#best_mean = 0
#for i in range(1, len(X_train)):
  #  pca = PCA(n_components=i)
 #   X_train_pca = pca.fit_transform(X_train)
 #   scores = cross_val_score(model, X_train_pca, y_train, cv=10)
 ##   if best_mean > scores.mean():
 #       break
#    best_mean = scores.mean()
 #   print(i + ": " + str(scores.mean()) + ", " + str(scores.std())) 


In [19]:
i=150
pca = PCA(n_components=i)
X_train_pca = pca.fit_transform(X_train)

scores = cross_val_score(model, X_train_pca, y_train, cv=10)
print(str(i) + ": " + str(scores.mean()) + ", " + str(scores.std())) 

scores = cross_val_score(model, X_train, y_train, cv=10)
print( "Base: " + str(scores.mean()) + ", " + str(scores.std())) 

150: 0.743971451166469, 0.011609014783534282
Base: 0.7844483194938711, 0.007994370772421586


#### Using Feature cut down from churn

In [22]:
print("Number of features starting:", len(features))

Number of features starting: 206


In [23]:
# list of our modules that can be used with RFECV
suitable_models = [model for model in models if model['name'].split()[0] in ['LogisticRegression','DT'] ]

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV

model = suitable_models[1]

rfecv = RFECV(estimator=model["ref"], step=1, cv=StratifiedKFold(2), scoring="f1")
rfecv.fit(X_train, y_train)
print("Optimal number of features : %d" % rfecv.n_features_)

# Plot number of features VS. cross-validation scores
plt.figure()
plt.title(f"{model['name']} with RFECV")
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score")
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
plt.show()

In [170]:
# list of features selected
features_selected = np.array(features)[rfecv.support_].tolist()
features_selected

['Intl_Plan', 'VMail_Plan', 'Intl_Calls', 'CustServ_Calls', 'Total_Charge']

### Training - After Feature Selection

In [171]:
display(df_model.loc[:, features_selected + [target]].head(1))

# normalise 
X_train = scaler.fit_transform(df_train.loc[:,features_selected].values)
y_train = df_train.Churn.values
X_test = scaler.transform(df_test.loc[:,features_selected].values)
y_test = df_test.Churn.values

train_models()

Unnamed: 0,Intl_Plan,VMail_Plan,Intl_Calls,CustServ_Calls,Total_Charge,Churn
0,0,1,3,1,75.56,0



Model NB
	CV scores ['0.4912', '0.4074', '0.3333', '0.5200', '0.4074', '0.4727', '0.5263', '0.5263', '0.4151', '0.3214']
	mean=44.21% std=0.0729

Model KNN
	CV scores ['0.6939', '0.7200', '0.8000', '0.8302', '0.7083', '0.7200', '0.6939', '0.7755', '0.6667', '0.5714']
	mean=71.80% std=0.0693

Model LogisticRegression
	CV scores ['0.3684', '0.1667', '0.0976', '0.2703', '0.0625', '0.2703', '0.4186', '0.3636', '0.2439', '0.1667']
	mean=24.28% std=0.1131

Model SVC
	CV scores ['0.7083', '0.6800', '0.8235', '0.8235', '0.6957', '0.7347', '0.6809', '0.7660', '0.6809', '0.5581']
	mean=71.52% std=0.0742

Model DT (pruned=4)
	CV scores ['0.8400', '0.8235', '0.8627', '0.9057', '0.8400', '0.8627', '0.8627', '0.8846', '0.8400', '0.8846']
	mean=86.07% std=0.0242

Best Performing Model DT (pruned=4) with mean=86.07%


## Evaluation

In [1]:
model.fit(X_train, y_train)

NameError: name 'model' is not defined

In [2]:
y_pred = model.predict(X_test)

NameError: name 'model' is not defined

In [3]:
print(classification_report(y_test, y_pred))

NameError: name 'classification_report' is not defined

## Scoring
* Retain on all data and generate predictions on df_score

In [None]:
ss = StandardScaler()
X_train = ss.fit_transform(df[features])
y_train = df[target]

X_test = ss.fit_transform(df_score[features])

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred

In [None]:
df_score['target'] = y_pred
df_score[['target']].reset_index().to_csv("output/solution.csv", index=False)