In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

In [2]:
from sklearn.metrics import roc_auc_score

In [3]:
xgb_train_output = pd.read_csv('xgb_train_output.csv')
nn_train_output = pd.read_csv('nn_train_output.csv')
gru_train_output = pd.read_csv('gru_train_output.csv')
xgb_test_output = pd.read_csv('xgb_test_output.csv')
nn_test_output = pd.read_csv('nn_test_output.csv').drop(columns=['blend'])
gru_test_output = pd.read_csv('gru_test_output.csv').drop(columns=['blend'])

In [4]:
xgb_train_output

Unnamed: 0,label,0,1,2,3,4
0,0,0.000370,0.019684,1.369640e-04,2.791729e-04,0.000321
1,1,0.999702,0.999730,8.768851e-01,9.998814e-01,0.999822
2,1,0.999326,0.999129,8.145804e-01,9.995720e-01,0.999319
3,1,0.999692,0.999892,9.999219e-01,9.998901e-01,0.995724
4,1,0.999981,0.999995,9.999994e-01,9.999980e-01,0.506189
...,...,...,...,...,...,...
25963,1,0.999993,0.999952,9.996278e-01,9.999437e-01,0.999982
25964,0,0.000004,0.000005,4.484758e-07,5.730712e-07,0.000013
25965,1,0.004114,0.999252,9.996789e-01,9.996320e-01,0.999472
25966,1,0.999999,0.999924,9.999989e-01,9.999944e-01,0.999793


In [5]:
nn_test_output.columns, gru_test_output.columns, xgb_test_output.columns

(Index(['sequence', 'nn_model_1e-05_isplit_2.pickle',
        'nn_model_1e-05_isplit_0.pickle', 'nn_model_1e-05_isplit_4.pickle',
        'nn_model_1e-05_isplit_3.pickle', 'nn_model_1e-05_isplit_1.pickle'],
       dtype='object'),
 Index(['sequence', 'gru_model_1e-05_isplit_1.pickle',
        'gru_model_1e-05_isplit_3.pickle', 'gru_model_1e-05_isplit_0.pickle',
        'gru_model_1e-05_isplit_2.pickle', 'gru_model_1e-05_isplit_4.pickle'],
       dtype='object'),
 Index(['sequence', '0', '1', '2', '3', '4'], dtype='object'))

In [6]:
train_output = pd.concat([xgb_train_output, 
                          nn_train_output.drop(columns=['label']), 
                          gru_train_output.drop(columns=['label'])], axis=1)

In [7]:
train_output

Unnamed: 0,label,0,1,2,3,4,nn_model_1e-05_isplit_2.pickle,nn_model_1e-05_isplit_0.pickle,nn_model_1e-05_isplit_4.pickle,nn_model_1e-05_isplit_3.pickle,nn_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_3.pickle,gru_model_1e-05_isplit_0.pickle,gru_model_1e-05_isplit_2.pickle,gru_model_1e-05_isplit_4.pickle
0,0,0.000370,0.019684,1.369640e-04,2.791729e-04,0.000321,0.000428,0.000670,0.001373,0.004204,0.000526,0.000185,0.000079,0.000003,0.000054,0.000115
1,1,0.999702,0.999730,8.768851e-01,9.998814e-01,0.999822,0.822656,0.948457,0.830469,0.989444,0.875767,0.876932,0.615339,0.743665,0.652782,0.823054
2,1,0.999326,0.999129,8.145804e-01,9.995720e-01,0.999319,0.993019,0.998480,0.991656,0.982240,0.985368,0.998456,0.997478,0.997330,0.988788,0.972890
3,1,0.999692,0.999892,9.999219e-01,9.998901e-01,0.995724,0.783070,0.870413,0.520563,0.864861,0.651066,0.913660,0.773436,0.820726,0.341078,0.404788
4,1,0.999981,0.999995,9.999994e-01,9.999980e-01,0.506189,0.963354,0.990503,0.659269,0.984968,0.913403,0.964888,0.998668,0.999307,0.929100,0.963484
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25963,1,0.999993,0.999952,9.996278e-01,9.999437e-01,0.999982,0.992234,0.988758,0.989263,0.988569,0.967122,0.999389,0.999596,0.992314,0.998707,0.998608
25964,0,0.000004,0.000005,4.484758e-07,5.730712e-07,0.000013,0.007409,0.000589,0.003515,0.022195,0.002654,0.061340,0.013662,0.021130,0.007287,0.012153
25965,1,0.004114,0.999252,9.996789e-01,9.996320e-01,0.999472,0.837370,0.553210,0.819429,0.954351,0.712913,0.551010,0.840445,0.944922,0.959467,0.734983
25966,1,0.999999,0.999924,9.999989e-01,9.999944e-01,0.999793,0.986278,0.972275,0.977593,0.975883,0.985457,0.995877,0.997890,0.998813,0.997893,0.993944


In [8]:
test_output = pd.concat([xgb_test_output, 
                          nn_test_output.drop(columns=['sequence']), 
                          gru_test_output.drop(columns=['sequence'])], axis=1)

In [9]:
test_output

Unnamed: 0,sequence,0,1,2,3,4,nn_model_1e-05_isplit_2.pickle,nn_model_1e-05_isplit_0.pickle,nn_model_1e-05_isplit_4.pickle,nn_model_1e-05_isplit_3.pickle,nn_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_3.pickle,gru_model_1e-05_isplit_0.pickle,gru_model_1e-05_isplit_2.pickle,gru_model_1e-05_isplit_4.pickle
0,25968,0.999739,9.999932e-01,9.999812e-01,0.999842,9.999605e-01,0.988292,0.982424,0.968129,0.991629,0.982700,0.975159,0.992379,0.991872,0.955924,0.926214
1,25969,0.999125,9.990976e-01,9.999573e-01,0.999402,9.997385e-01,0.983622,0.987861,0.957940,0.978525,0.982838,0.999122,0.999387,0.999008,0.995885,0.998611
2,25970,0.000516,3.055108e-05,1.652879e-06,0.000028,4.810690e-07,0.000108,0.000620,0.000238,0.000859,0.000418,0.000002,0.000010,0.000016,0.000005,0.000048
3,25971,0.193302,5.805919e-01,1.210721e-01,0.684031,1.405497e-02,0.608966,0.925262,0.710716,0.878715,0.826165,0.992676,0.886703,0.972923,0.870804,0.953395
4,25972,0.963764,5.462738e-01,9.998817e-01,0.995877,8.971304e-01,0.028058,0.030483,0.031900,0.138249,0.422702,0.667639,0.001045,0.731680,0.070348,0.003636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12213,38181,0.999352,9.965652e-01,9.997079e-01,0.999117,9.985447e-01,0.996996,0.865621,0.891795,0.965554,0.497939,0.247908,0.466485,0.974230,0.924690,0.475540
12214,38182,0.901120,2.903413e-03,9.095907e-01,0.200363,3.895281e-02,0.838029,0.922800,0.459102,0.916497,0.667446,0.163819,0.437104,0.819549,0.094952,0.396854
12215,38183,0.680357,8.884818e-01,5.249078e-01,0.509008,6.546109e-01,0.170850,0.155390,0.107199,0.212193,0.102878,0.822619,0.137272,0.266712,0.483808,0.250369
12216,38184,0.000020,6.034665e-07,4.094243e-08,0.000012,1.104939e-05,0.000924,0.002528,0.001428,0.003295,0.002429,0.001910,0.007833,0.000770,0.010178,0.003099


In [10]:
for col in train_output.columns[1:]:
    print(f"{col} {roc_auc_score(train_output['label'], train_output[col])}")

0 0.9944337298635817
1 0.9938221570342923
2 0.9947823318928353
3 0.9941975736916757
4 0.995349069335172
nn_model_1e-05_isplit_2.pickle 0.987833813795948
nn_model_1e-05_isplit_0.pickle 0.9916117075045059
nn_model_1e-05_isplit_4.pickle 0.9885196673863818
nn_model_1e-05_isplit_3.pickle 0.9889905323749753
nn_model_1e-05_isplit_1.pickle 0.9884527242416505
gru_model_1e-05_isplit_1.pickle 0.9768982502638043
gru_model_1e-05_isplit_3.pickle 0.9785030201913885
gru_model_1e-05_isplit_0.pickle 0.9802978444681099
gru_model_1e-05_isplit_2.pickle 0.9773209758619351
gru_model_1e-05_isplit_4.pickle 0.9788664576116282


In [11]:
from sklearn.linear_model import LogisticRegression
blend = LogisticRegression()

In [12]:
blend.fit(train_output.drop(columns=['label']), train_output['label'])

LogisticRegression()

In [13]:
blend.coef_

array([[3.53114387, 3.45515733, 3.49014357, 3.50183895, 3.62964273,
        0.29628002, 0.66423224, 0.37950713, 0.34038532, 0.50742083,
        0.13565306, 0.23703361, 0.27292166, 0.19633644, 0.21481544]])

In [14]:
raw_series = pd.read_csv('train.csv')
subjects = raw_series['subject'].unique()
subjects

array([ 47,  66, 542, 437, 510, 246, 520,  78, 526,   4, 508, 651, 123,
       100,  99, 381, 511, 263, 543, 239, 317, 489,  48, 357, 333, 232,
       623, 661, 290, 582, 465, 207, 443, 394, 573, 579, 365,  35,  82,
       164, 322, 235, 138,  89,  70,  76, 346, 662, 107, 505, 479, 140,
       188,  59, 441, 596, 671, 426, 191, 330, 597, 392, 120, 297, 415,
       416, 420,  60, 438,  98, 487, 665, 358, 103, 590, 122, 256, 535,
       127, 146, 473,  71,  88,  41,  56, 352,  38, 198, 468, 460, 558,
       203, 196, 311, 496, 131, 491,  75, 408, 285, 422, 583, 553, 364,
       499, 141, 424, 229, 351, 374, 530, 598, 607, 222, 647, 310, 395,
       630, 476, 189, 454, 213, 431, 318, 166, 249,  42, 369, 125, 309,
       538, 197, 251,  44, 622,  72, 337, 209, 477, 359, 555,   2,  94,
       269,  52, 397, 480, 240, 185,  95, 559, 502, 386, 258, 545, 450,
        40, 463, 170, 294, 413, 667, 306, 341,  25,   1, 262, 646, 272,
       237, 613, 640,  91, 266, 338,   0, 144, 563, 417,  18, 10

In [15]:
train_output.columns

Index(['label', '0', '1', '2', '3', '4', 'nn_model_1e-05_isplit_2.pickle',
       'nn_model_1e-05_isplit_0.pickle', 'nn_model_1e-05_isplit_4.pickle',
       'nn_model_1e-05_isplit_3.pickle', 'nn_model_1e-05_isplit_1.pickle',
       'gru_model_1e-05_isplit_1.pickle', 'gru_model_1e-05_isplit_3.pickle',
       'gru_model_1e-05_isplit_0.pickle', 'gru_model_1e-05_isplit_2.pickle',
       'gru_model_1e-05_isplit_4.pickle'],
      dtype='object')

In [16]:
cols = [['0', 'nn_model_1e-05_isplit_0.pickle', 'gru_model_1e-05_isplit_0.pickle'],
        ['1', 'nn_model_1e-05_isplit_1.pickle', 'gru_model_1e-05_isplit_1.pickle'],
        ['2', 'nn_model_1e-05_isplit_2.pickle', 'gru_model_1e-05_isplit_2.pickle'],
        ['3', 'nn_model_1e-05_isplit_3.pickle', 'gru_model_1e-05_isplit_3.pickle'],
        ['4', 'nn_model_1e-05_isplit_4.pickle', 'gru_model_1e-05_isplit_4.pickle']]

In [17]:
kf = KFold(random_state=123, shuffle=True)
index = list(kf.split(subjects))
index

[(array([  0,   1,   2,   3,   4,   6,   7,   8,  10,  12,  14,  15,  16,
          17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
          30,  32,  33,  34,  36,  37,  38,  39,  41,  42,  44,  45,  46,
          47,  49,  51,  52,  53,  55,  56,  58,  60,  61,  62,  63,  64,
          65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
          80,  81,  83,  84,  86,  87,  88,  89,  90,  92,  93,  94,  95,
          96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 108, 109,
         110, 111, 112, 113, 114, 115, 116, 117, 118, 121, 122, 123, 124,
         125, 126, 127, 128, 129, 130, 133, 135, 136, 137, 139, 140, 141,
         142, 143, 144, 145, 146, 148, 149, 152, 153, 154, 156, 157, 158,
         160, 161, 163, 165, 167, 168, 170, 171, 173, 174, 175, 176, 177,
         178, 179, 180, 183, 184, 186, 187, 188, 189, 190, 191, 193, 194,
         197, 198, 199, 200, 201, 203, 204, 205, 206, 207, 208, 211, 212,
         213, 214, 215, 216, 218, 219,

In [18]:
raw_series

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,sensor_07,sensor_08,sensor_09,sensor_10,sensor_11,sensor_12
0,0,47,0,-0.196291,0.112395,1.000000,0.329204,-1.004660,-0.131638,-0.127505,0.368702,-0.1,-0.963873,-0.985069,0.531893,4.751492
1,0,47,1,-0.447450,0.134454,1.000000,-0.658407,0.162495,0.340314,-0.209472,-0.867176,0.2,-0.301301,0.082733,-0.231481,0.454390
2,0,47,2,0.326893,-0.694328,1.000000,0.330088,0.473678,1.280479,-0.094718,0.535878,1.4,1.002168,0.449221,-0.586420,-4.736147
3,0,47,3,0.523184,0.751050,1.000000,0.976991,-0.563287,-0.720269,0.793260,0.951145,-0.3,-0.995665,-0.434290,1.344650,0.429241
4,0,47,4,0.272025,1.074580,1.000000,-0.136283,0.398579,0.044877,0.560109,-0.541985,-0.9,1.055636,0.812631,0.123457,-0.223359
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1558075,25967,327,55,-0.282844,-1.217437,-1.666153,0.586726,-0.930698,-0.451010,-0.651184,0.368702,0.4,0.008671,-0.723536,-0.353909,-0.914749
1558076,25967,327,56,0.130603,0.349790,-1.666153,-0.324779,0.775324,-0.332835,0.099271,0.122137,-0.2,0.644509,0.691407,-0.613169,-0.515772
1558077,25967,327,57,-0.579598,0.429622,-1.666153,0.319469,0.308861,0.282723,-0.512750,0.012214,-1.6,-0.424133,0.716855,1.628601,0.928389
1558078,25967,327,58,1.278980,1.711134,-1.522820,0.802655,-0.460541,-0.055348,2.405282,0.043511,1.9,0.283960,-0.914914,0.364198,0.211424


In [19]:
test_output

Unnamed: 0,sequence,0,1,2,3,4,nn_model_1e-05_isplit_2.pickle,nn_model_1e-05_isplit_0.pickle,nn_model_1e-05_isplit_4.pickle,nn_model_1e-05_isplit_3.pickle,nn_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_3.pickle,gru_model_1e-05_isplit_0.pickle,gru_model_1e-05_isplit_2.pickle,gru_model_1e-05_isplit_4.pickle
0,25968,0.999739,9.999932e-01,9.999812e-01,0.999842,9.999605e-01,0.988292,0.982424,0.968129,0.991629,0.982700,0.975159,0.992379,0.991872,0.955924,0.926214
1,25969,0.999125,9.990976e-01,9.999573e-01,0.999402,9.997385e-01,0.983622,0.987861,0.957940,0.978525,0.982838,0.999122,0.999387,0.999008,0.995885,0.998611
2,25970,0.000516,3.055108e-05,1.652879e-06,0.000028,4.810690e-07,0.000108,0.000620,0.000238,0.000859,0.000418,0.000002,0.000010,0.000016,0.000005,0.000048
3,25971,0.193302,5.805919e-01,1.210721e-01,0.684031,1.405497e-02,0.608966,0.925262,0.710716,0.878715,0.826165,0.992676,0.886703,0.972923,0.870804,0.953395
4,25972,0.963764,5.462738e-01,9.998817e-01,0.995877,8.971304e-01,0.028058,0.030483,0.031900,0.138249,0.422702,0.667639,0.001045,0.731680,0.070348,0.003636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12213,38181,0.999352,9.965652e-01,9.997079e-01,0.999117,9.985447e-01,0.996996,0.865621,0.891795,0.965554,0.497939,0.247908,0.466485,0.974230,0.924690,0.475540
12214,38182,0.901120,2.903413e-03,9.095907e-01,0.200363,3.895281e-02,0.838029,0.922800,0.459102,0.916497,0.667446,0.163819,0.437104,0.819549,0.094952,0.396854
12215,38183,0.680357,8.884818e-01,5.249078e-01,0.509008,6.546109e-01,0.170850,0.155390,0.107199,0.212193,0.102878,0.822619,0.137272,0.266712,0.483808,0.250369
12216,38184,0.000020,6.034665e-07,4.094243e-08,0.000012,1.104939e-05,0.000924,0.002528,0.001428,0.003295,0.002429,0.001910,0.007833,0.000770,0.010178,0.003099


In [20]:
for icol, (train_index, test_index) in enumerate(index):
    test_subjects = subjects[test_index]
    test_seqs = raw_series.loc[raw_series['subject'].isin(test_subjects), 'sequence'].unique()
    tmp = train_output.iloc[test_seqs][['label']+cols[icol]]
    
    print('doing {icol} roc_auc_score:')
    for tcol in tmp.columns[1:]:
        print(f"{roc_auc_score(tmp['label'], tmp[tcol])}")
    
    params = {'C': [1e-3,1e-2,1e-1,1.0,5.0,10.0]}
    gcv = GridSearchCV(LogisticRegression(), params)
    gcv.fit(tmp.drop(columns=['label']), tmp['label'])
    print(f"{gcv.best_params_, gcv.best_estimator_.coef_, gcv.best_score_}")
    
    train_output['blend_'+str(icol)] = gcv.predict_proba(train_output[cols[icol]])[:,1]
    test_output['blend_'+str(icol)] = gcv.predict_proba(test_output[cols[icol]])[:,1]

doing {icol} roc_auc_score:
0.9219946385126003
0.9766217373021355
0.9605706905355481
({'C': 1.0}, array([[1.57253878, 4.31992046, 2.13351451]]), 0.9285000037521481)
doing {icol} roc_auc_score:
0.9249090020967141
0.9625054841062948
0.9464056784732248
({'C': 0.01}, array([[1.38364271, 1.82570212, 1.53982785]]), 0.9112802663438255)
doing {icol} roc_auc_score:
0.9217263476176265
0.9648601960469226
0.9477795505153691
({'C': 1.0}, array([[1.61202018, 3.47289313, 2.17067457]]), 0.9097344064056851)
doing {icol} roc_auc_score:
0.9167582371481191
0.9715526448112124
0.9510779895826903
({'C': 5.0}, array([[1.57820114, 4.83630493, 1.27786379]]), 0.9186473429951691)
doing {icol} roc_auc_score:
0.9228727641113286
0.9705471305612051
0.9556084942502958
({'C': 0.1}, array([[1.60916708, 3.34387551, 2.22554882]]), 0.91511321290517)


In [21]:
train_output

Unnamed: 0,label,0,1,2,3,4,nn_model_1e-05_isplit_2.pickle,nn_model_1e-05_isplit_0.pickle,nn_model_1e-05_isplit_4.pickle,nn_model_1e-05_isplit_3.pickle,...,gru_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_3.pickle,gru_model_1e-05_isplit_0.pickle,gru_model_1e-05_isplit_2.pickle,gru_model_1e-05_isplit_4.pickle,blend_0,blend_1,blend_2,blend_3,blend_4
0,0,0.000370,0.019684,1.369640e-04,2.791729e-04,0.000321,0.000428,0.000670,0.001373,0.004204,...,0.000185,0.000079,0.000003,0.000054,0.000115,0.016599,0.099884,0.026799,0.020957,0.032719
1,1,0.999702,0.999730,8.768851e-01,9.998814e-01,0.999822,0.822656,0.948457,0.830469,0.989444,...,0.876932,0.615339,0.743665,0.652782,0.823054,0.959720,0.891436,0.890269,0.963896,0.944049
2,1,0.999326,0.999129,8.145804e-01,9.995720e-01,0.999319,0.993019,0.998480,0.991656,0.982240,...,0.998456,0.997478,0.997330,0.988788,0.972890,0.980687,0.923571,0.964908,0.976742,0.975811
3,1,0.999692,0.999892,9.999219e-01,9.998901e-01,0.995724,0.783070,0.870413,0.520563,0.864861,...,0.913660,0.773436,0.820726,0.341078,0.404788,0.952485,0.852209,0.814232,0.947056,0.700977
4,1,0.999981,0.999995,9.999994e-01,9.999980e-01,0.506189,0.963354,0.990503,0.659269,0.984968,...,0.964888,0.998668,0.999307,0.929100,0.963484,0.980126,0.909702,0.967086,0.977089,0.854636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25963,1,0.999993,0.999952,9.996278e-01,9.999437e-01,0.999982,0.992234,0.988758,0.989263,0.988569,...,0.999389,0.999596,0.992314,0.998707,0.998608,0.979684,0.921373,0.974198,0.977500,0.976970
25964,0,0.000004,0.000005,4.484758e-07,5.730712e-07,0.000013,0.007409,0.000589,0.003515,0.022195,...,0.061340,0.013662,0.021130,0.007287,0.012153,0.017335,0.106435,0.027855,0.023199,0.033794
25965,1,0.004114,0.999252,9.996789e-01,9.996320e-01,0.999472,0.837370,0.553210,0.819429,0.954351,...,0.551010,0.840445,0.944922,0.959467,0.734983,0.581059,0.786783,0.952946,0.967771,0.930359
25966,1,0.999999,0.999924,9.999989e-01,9.999944e-01,0.999793,0.986278,0.972275,0.977593,0.975883,...,0.995877,0.997890,0.998813,0.997893,0.993944,0.978511,0.923379,0.973643,0.976061,0.975825


In [22]:
test_output

Unnamed: 0,sequence,0,1,2,3,4,nn_model_1e-05_isplit_2.pickle,nn_model_1e-05_isplit_0.pickle,nn_model_1e-05_isplit_4.pickle,nn_model_1e-05_isplit_3.pickle,...,gru_model_1e-05_isplit_1.pickle,gru_model_1e-05_isplit_3.pickle,gru_model_1e-05_isplit_0.pickle,gru_model_1e-05_isplit_2.pickle,gru_model_1e-05_isplit_4.pickle,blend_0,blend_1,blend_2,blend_3,blend_4
0,25968,0.999739,9.999932e-01,9.999812e-01,0.999842,9.999605e-01,0.988292,0.982424,0.968129,0.991629,...,0.975159,0.992379,0.991872,0.955924,0.926214,0.979105,0.920732,0.971396,0.977619,0.971135
1,25969,0.999125,9.990976e-01,9.999573e-01,0.999402,9.997385e-01,0.983622,0.987861,0.957940,0.978525,...,0.999122,0.999387,0.999008,0.995885,0.998611,0.979863,0.923314,0.973291,0.976381,0.974482
2,25970,0.000516,3.055108e-05,1.652879e-06,0.000028,4.810690e-07,0.000108,0.000620,0.000238,0.000859,...,0.000002,0.000010,0.000016,0.000005,0.000048,0.016599,0.097424,0.026762,0.020618,0.032578
3,25971,0.193302,5.805919e-01,1.210721e-01,0.684031,1.405497e-02,0.608966,0.925262,0.710716,0.878715,...,0.992676,0.886703,0.972923,0.870804,0.953395,0.908179,0.833858,0.647083,0.930692,0.755670
4,25972,0.963764,5.462738e-01,9.998817e-01,0.995877,8.971304e-01,0.028058,0.030483,0.031900,0.138249,...,0.667639,0.001045,0.731680,0.070348,0.003636,0.293823,0.581426,0.150324,0.164744,0.137821
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12213,38181,0.999352,9.965652e-01,9.997079e-01,0.999117,9.985447e-01,0.996996,0.865621,0.891795,0.965554,...,0.247908,0.466485,0.974230,0.924690,0.475540,0.964576,0.608907,0.970321,0.951554,0.905107
12214,38182,0.901120,2.903413e-03,9.095907e-01,0.200363,3.895281e-02,0.838029,0.922800,0.459102,0.916497,...,0.163819,0.437104,0.819549,0.094952,0.396854,0.955503,0.320350,0.728838,0.808802,0.286831
12215,38183,0.680357,8.884818e-01,5.249078e-01,0.509008,6.546109e-01,0.170850,0.155390,0.107199,0.212193,...,0.822619,0.137272,0.266712,0.483808,0.250369,0.144924,0.612270,0.248928,0.134700,0.194214
12216,38184,0.000020,6.034665e-07,4.094243e-08,0.000012,1.104939e-05,0.000924,0.002528,0.001428,0.003295,...,0.001910,0.007833,0.000770,0.010178,0.003099,0.016748,0.098003,0.027419,0.021061,0.032920


In [23]:
test_output[['sequence','blend_0']].to_csv('blend_baseline.csv', 
                                  index=False, header=['sequence','state'])

In [24]:
gcv.fit(train_output[['blend_0','blend_1','blend_2','blend_3','blend_4']], train_output['label'])

GridSearchCV(estimator=LogisticRegression(),
             param_grid={'C': [0.001, 0.01, 0.1, 1.0, 5.0, 10.0]})

In [25]:
gcv.best_score_, gcv.best_params_, gcv.best_estimator_.coef_

(0.9774338637932747,
 {'C': 0.1},
 array([[2.15859183, 3.76967488, 1.53781406, 1.32508894, 2.64424737]]))

In [27]:
train_output['blend_all'] = gcv.predict_proba(train_output[['blend_0','blend_1','blend_2','blend_3','blend_4']])[:,1]
test_output['blend_all'] = gcv.predict_proba(test_output[['blend_0','blend_1','blend_2','blend_3','blend_4']])[:,1]

In [28]:
test_output[['sequence','blend_all']].to_csv('blend_all.csv', 
                                  index=False, header=['sequence','state'])

In [29]:
test_output

Unnamed: 0,sequence,0,1,2,3,4,nn_model_1e-05_isplit_2.pickle,nn_model_1e-05_isplit_0.pickle,nn_model_1e-05_isplit_4.pickle,nn_model_1e-05_isplit_3.pickle,...,gru_model_1e-05_isplit_3.pickle,gru_model_1e-05_isplit_0.pickle,gru_model_1e-05_isplit_2.pickle,gru_model_1e-05_isplit_4.pickle,blend_0,blend_1,blend_2,blend_3,blend_4,blend_all
0,25968,0.999739,9.999932e-01,9.999812e-01,0.999842,9.999605e-01,0.988292,0.982424,0.968129,0.991629,...,0.992379,0.991872,0.955924,0.926214,0.979105,0.920732,0.971396,0.977619,0.971135,0.994080
1,25969,0.999125,9.990976e-01,9.999573e-01,0.999402,9.997385e-01,0.983622,0.987861,0.957940,0.978525,...,0.999387,0.999008,0.995885,0.998611,0.979863,0.923314,0.973291,0.976381,0.974482,0.994205
2,25970,0.000516,3.055108e-05,1.652879e-06,0.000028,4.810690e-07,0.000108,0.000620,0.000238,0.000859,...,0.000010,0.000016,0.000005,0.000048,0.016599,0.097424,0.026762,0.020618,0.032578,0.005167
3,25971,0.193302,5.805919e-01,1.210721e-01,0.684031,1.405497e-02,0.608966,0.925262,0.710716,0.878715,...,0.886703,0.972923,0.870804,0.953395,0.908179,0.833858,0.647083,0.930692,0.755670,0.971035
4,25972,0.963764,5.462738e-01,9.998817e-01,0.995877,8.971304e-01,0.028058,0.030483,0.031900,0.138249,...,0.001045,0.731680,0.070348,0.003636,0.293823,0.581426,0.150324,0.164744,0.137821,0.101737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12213,38181,0.999352,9.965652e-01,9.997079e-01,0.999117,9.985447e-01,0.996996,0.865621,0.891795,0.965554,...,0.466485,0.974230,0.924690,0.475540,0.964576,0.608907,0.970321,0.951554,0.905107,0.976011
12214,38182,0.901120,2.903413e-03,9.095907e-01,0.200363,3.895281e-02,0.838029,0.922800,0.459102,0.916497,...,0.437104,0.819549,0.094952,0.396854,0.955503,0.320350,0.728838,0.808802,0.286831,0.599445
12215,38183,0.680357,8.884818e-01,5.249078e-01,0.509008,6.546109e-01,0.170850,0.155390,0.107199,0.212193,...,0.137272,0.266712,0.483808,0.250369,0.144924,0.612270,0.248928,0.134700,0.194214,0.106951
12216,38184,0.000020,6.034665e-07,4.094243e-08,0.000012,1.104939e-05,0.000924,0.002528,0.001428,0.003295,...,0.007833,0.000770,0.010178,0.003099,0.016748,0.098003,0.027419,0.021061,0.032920,0.005193
