In [2]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

In [1]:
def prep_withsubject(df):
    scs = {}
    for col in df.columns[3:]:
        sc = StandardScaler()
        df[col] = sc.fit_transform(df[col].values.reshape(-1,1))
        scs[col] = sc

    for sensor in df.columns[3:]:
        df[sensor+'_square'] = np.square(df[sensor])
        df[sensor+'_diff'] = df[sensor].diff()
        df.loc[df['step']==0, sensor+'_diff'] = 0.0

        tmp = df.groupby(['subject','step']).agg({sensor:'mean'}).to_dict()[sensor]
        df[sensor+'_subject_mean'] = pd.Series(zip(df['subject'], df['step'])).map(tmp)

        tmp = df.groupby(['subject','step']).agg({sensor+'_square':'mean'}).to_dict()[sensor+'_square']
        df[sensor+'_square_subject_mean'] = pd.Series(zip(df['subject'], df['step'])).map(tmp)

        tmp = df.groupby(['subject','step']).agg({sensor+'_diff':'mean'}).to_dict()[sensor+'_diff']
        df[sensor+'_diff_subject_mean'] = pd.Series(zip(df['subject'], df['step'])).map(tmp)

    for col in df.columns[16:]:
        sc = StandardScaler()
        df[col] = sc.fit_transform(df[col].values.reshape(-1,1))
        scs[col] = sc

    return scs

In [3]:
class MyDataset(Dataset):
    def __init__(self, series, labels=None):
        self.X = series.drop(columns=['sequence','subject','step']).values
        self.X = self.X.reshape(-1,60,series.shape[1]-3).transpose([0,2,1]).copy()
        if labels is None:
            self.y = None
        else:
            self.y = labels['state'].values
   
    def __len__(self):
        return len(self.X)
   
    def __getitem__(self, idx):
        ''' input tensor shape is N*C*L '''
        X = self.X[idx]
        if self.y is None:
            y = -1
        else:
            y = self.y[idx]
        return (torch.tensor(X, dtype=torch.float32), torch.tensor(y))

In [4]:
class MyFCNModel(nn.Module):
    def __init__(self, input_channel):
        super(MyFCNModel, self).__init__()
        torch.manual_seed(123)

        self.conv1d_1 = nn.Conv1d(input_channel, input_channel, 5, groups=input_channel, padding=2)
        self.bn_1 = nn.BatchNorm1d(input_channel)
        self.conv1d_2 = nn.Conv1d(input_channel, input_channel, 9, groups=input_channel, padding=4)
        self.bn_2 = nn.BatchNorm1d(input_channel)
        self.conv1d_3 = nn.Conv1d(input_channel, input_channel, 59, groups=input_channel, padding=29)
        self.bn_3 = nn.BatchNorm1d(input_channel)

        self.conv1d_4 = nn.Conv1d(3*input_channel, 32, 3, padding=1)
        self.bn_4 = nn.BatchNorm1d(32)

        self.conv1d_5 = nn.Conv1d(32, 2, 3, padding=1)
        self.bn_5 = nn.BatchNorm1d(2)

        self.avg = nn.AvgPool1d(60, padding=0)
        self.dropout = nn.Dropout(p=0.25)

    def forward(self, X):
        ''' input shape (N,C,L) '''

        X1 = F.relu(self.bn_1(self.conv1d_1(X)))
        X2 = F.relu(self.bn_2(self.conv1d_2(X)))
        X3 = F.relu(self.bn_3(self.conv1d_3(X)))

        X = torch.cat([X1,X2,X3], dim=1)
        X = self.dropout(X)

        X = F.relu(self.bn_4(self.conv1d_4(X)))
        X = self.dropout(X)

        X = F.relu(self.bn_5(self.conv1d_5(X)))

        X = self.avg(X).squeeze(dim=2)
        output = F.log_softmax(X, dim=1)

        return output


In [26]:
model = MyFCNModel(78)
tmp = torch.load('nn_model_0.0_seed_1.pickle')

In [27]:
model.load_state_dict(tmp['model'])

<All keys matched successfully>

In [8]:
    train_series = pd.read_csv('train.csv')
    train_labels = pd.read_csv('train_labels.csv')
    test_series = pd.read_csv('test.csv')
    all_series = pd.concat([train_series, test_series], axis=0)
    scs = prep_withsubject(all_series)

IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).

In [9]:
all_series

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,...,sensor_11_square,sensor_11_diff,sensor_11_subject_mean,sensor_11_square_subject_mean,sensor_11_diff_subject_mean,sensor_12_square,sensor_12_diff,sensor_12_subject_mean,sensor_12_square_subject_mean,sensor_12_diff_subject_mean
0,0,47,0,-0.071577,0.025398,0.527133,0.077786,-0.573614,-0.081093,-0.037586,...,-0.025933,-0.000022,0.168770,-0.020132,0.000008,-0.188896,0.000141,-0.328722,-0.196842,0.000148
1,0,47,1,-0.162994,0.030337,0.527133,-0.153988,0.093636,0.213528,-0.061907,...,-0.026239,-0.109142,-1.893829,1.189209,-1.330854,-0.191629,-0.089298,-0.548938,-0.249138,-0.179915
2,0,47,2,0.118849,-0.155230,0.527133,0.077994,0.271536,0.800437,-0.027858,...,-0.025863,-0.050758,-0.569109,-0.065291,0.854765,-0.188933,-0.107894,0.132294,-0.228386,0.557168
3,0,47,3,0.190294,0.168396,0.527133,0.229810,-0.321286,-0.448553,0.235614,...,-0.023936,0.276014,0.689967,-0.046081,0.812409,-0.191632,0.107651,0.338776,-0.236459,0.168981
4,0,47,4,0.098878,0.240835,0.527133,-0.031455,0.228603,0.029098,0.166435,...,-0.026286,-0.174585,1.845034,0.530400,0.745299,-0.191650,-0.013442,-0.110551,-0.239970,-0.367251
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733075,38185,773,55,0.076939,0.449216,-0.482516,0.163767,-0.091489,-0.744109,0.125361,...,-0.019940,0.432783,-0.259021,-0.126677,-0.083693,0.830281,-2.454671,0.001434,-0.290904,-0.000526
733076,38185,773,56,-0.300821,-0.552471,-0.482516,0.090040,0.083489,0.663165,-0.227016,...,-0.022123,-0.571801,0.299570,-0.130276,0.360432,0.447150,3.414122,0.002338,-0.290904,0.000887
733077,38185,773,57,0.274679,0.329268,-0.482516,-0.293551,-0.236399,0.005752,0.269392,...,-0.025763,0.348222,-0.015547,-0.122904,-0.203317,-0.050393,-2.216112,0.001129,-0.290904,-0.000841
733078,38185,773,58,-0.068202,0.160164,-0.482516,0.018804,0.757225,0.100536,-0.117844,...,-0.026172,-0.046347,-0.016316,-0.127543,-0.000488,0.333538,2.075789,-0.008307,-0.290858,-0.007567


In [10]:
test_series = all_series.loc[all_series['sequence']>25967]

In [11]:
class MyDataset(Dataset):
    def __init__(self, series, labels=None):
        self.X = series.drop(columns=['sequence','subject','step']).values
        self.X = self.X.reshape(-1,60,series.shape[1]-3).transpose([0,2,1]).copy()
        if labels is None:
            self.y = None
        else:
            self.y = labels['state'].values

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        ''' input tensor shape is N*C*L '''
        X = self.X[idx]
        if self.y is None:
            y = -1
        else:
            y = self.y[idx]
        return (torch.tensor(X, dtype=torch.float32), torch.tensor(y))

In [12]:
mydataset = MyDataset(test_series)

In [13]:
test_dataloader = DataLoader(mydataset, shuffle=False, batch_size=len(mydataset))

In [14]:
X, y = next(iter(test_dataloader))

In [28]:
model.eval()
with torch.no_grad():
    pred = model(X).detach().numpy()

In [21]:
pred

array([[-8.3445959e+00, -2.3767508e-04],
       [-1.8771969e-02, -3.9847605e+00],
       [-4.4107342e-06, -1.2333013e+01],
       ...,
       [-1.2132168e-01, -2.1693573e+00],
       [-4.0531077e-06, -1.2421602e+01],
       [-1.2361512e-03, -6.6963606e+00]], dtype=float32)

In [29]:
np.exp(pred)

array([[1.9591354e-04, 9.9980402e-01],
       [9.9650866e-01, 3.4912948e-03],
       [9.9998176e-01, 1.8237837e-05],
       ...,
       [9.1440046e-01, 8.5599594e-02],
       [9.9988580e-01, 1.1420031e-04],
       [9.9948746e-01, 5.1260722e-04]], dtype=float32)

In [20]:
np.exp(pred)

array([[2.37677479e-04, 9.99762297e-01],
       [9.81403112e-01, 1.85968969e-02],
       [9.99995589e-01, 4.40393296e-06],
       ...,
       [8.85748982e-01, 1.14251025e-01],
       [9.99996006e-01, 4.03057220e-06],
       [9.98764634e-01, 1.23539986e-03]], dtype=float32)

In [30]:
output = pd.DataFrame()
output['sequence'] = test_series['sequence'].unique()
output['state'] = np.exp(pred)[:,1]
output.to_csv('nn_baseline.csv',index=False)

In [31]:
import glob

In [34]:
model_names = glob.glob('*.pickle')

In [35]:
model_names

['nn_model_1e-05_seed_3.pickle',
 'nn_model_1.0_seed_1.pickle',
 'nn_model_0.01_seed_1.pickle',
 'nn_model_0.0_seed_5.pickle',
 'nn_model_0.001_seed_2.pickle',
 'nn_model_0.1_seed_2.pickle',
 'nn_model_1e-04_seed_4.pickle',
 'nn_model_1.0_seed_3.pickle',
 'nn_model_1e-05_seed_1.pickle',
 'nn_model_0.01_seed_3.pickle',
 'nn_model_0.001_seed_4.pickle',
 'nn_model_0.0_seed_3.pickle',
 'nn_model_1e-05_seed_5.pickle',
 'nn_model_0.0_seed_1.pickle',
 'nn_model_0.01_seed_5.pickle',
 'nn_model_1.0_seed_5.pickle',
 'nn_model_1e-04_seed_2.pickle',
 'nn_model_0.1_seed_4.pickle',
 'nn_model_0.001_seed_3.pickle',
 'nn_model_0.1_seed_1.pickle',
 'nn_model_0.0_seed_4.pickle',
 'nn_model_1e-05_seed_2.pickle',
 'nn_model_0.01_seed_2.pickle',
 'nn_model_1.0_seed_2.pickle',
 'nn_model_1e-04_seed_5.pickle',
 'nn_model_0.1_seed_3.pickle',
 'nn_model_0.001_seed_1.pickle',
 'nn_model_1e-05_seed_4.pickle',
 'nn_model_0.0_seed_2.pickle',
 'nn_model_0.001_seed_5.pickle',
 'nn_model_1e-04_seed_1.pickle',
 'nn_mo

In [41]:
train_series = all_series.loc[all_series['sequence']<=25967]
train_series

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,...,sensor_11_square,sensor_11_diff,sensor_11_subject_mean,sensor_11_square_subject_mean,sensor_11_diff_subject_mean,sensor_12_square,sensor_12_diff,sensor_12_subject_mean,sensor_12_square_subject_mean,sensor_12_diff_subject_mean
0,0,47,0,-0.071577,0.025398,0.527133,0.077786,-0.573614,-0.081093,-0.037586,...,-0.025933,-0.000022,0.168770,-0.020132,0.000008,-0.188896,0.000141,-0.328722,-0.196842,0.000148
1,0,47,1,-0.162994,0.030337,0.527133,-0.153988,0.093636,0.213528,-0.061907,...,-0.026239,-0.109142,-1.893829,1.189209,-1.330854,-0.191629,-0.089298,-0.548938,-0.249138,-0.179915
2,0,47,2,0.118849,-0.155230,0.527133,0.077994,0.271536,0.800437,-0.027858,...,-0.025863,-0.050758,-0.569109,-0.065291,0.854765,-0.188933,-0.107894,0.132294,-0.228386,0.557168
3,0,47,3,0.190294,0.168396,0.527133,0.229810,-0.321286,-0.448553,0.235614,...,-0.023936,0.276014,0.689967,-0.046081,0.812409,-0.191632,0.107651,0.338776,-0.236459,0.168981
4,0,47,4,0.098878,0.240835,0.527133,-0.031455,0.228603,0.029098,0.166435,...,-0.026286,-0.174585,1.845034,0.530400,0.745299,-0.191650,-0.013442,-0.110551,-0.239970,-0.367251
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1558075,25967,327,55,-0.103081,-0.272356,-0.626783,0.138222,-0.531331,-0.280465,-0.192967,...,-0.026146,0.053509,-0.428508,-0.133116,-0.477073,-0.191555,-0.032216,-0.054662,-0.290757,-0.056195
1558076,25967,327,56,0.047404,0.078552,-0.626783,-0.075692,0.443984,-0.206693,0.029700,...,-0.025822,-0.037082,-0.266833,-0.133045,0.104327,-0.191624,0.008445,0.000358,-0.290749,0.045136
1558077,25967,327,57,-0.211092,0.096427,-0.626783,0.075502,0.177312,0.177576,-0.151893,...,-0.022832,0.320427,0.165521,-0.133474,0.278978,-0.191549,0.030199,-0.038437,-0.290757,-0.031573
1558078,25967,327,58,0.465387,0.383362,-0.564748,0.188897,-0.262547,-0.033468,0.713917,...,-0.026130,-0.180762,0.260551,-0.134936,0.061325,-0.191650,-0.014782,0.079244,-0.290526,0.096372


In [42]:
train_labels

Unnamed: 0,sequence,state
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1
...,...,...
25963,25963,1
25964,25964,0
25965,25965,1
25966,25966,1


In [62]:
mydataset = MyDataset(train_series, train_labels)
train_dataloader = DataLoader(mydataset, shuffle=False, batch_size=len(mydataset))
X, y = next(iter(train_dataloader))

model = MyFCNModel(78)
output = pd.DataFrame()
output['label'] = train_labels['state']
for model_name in model_names[:3]:
    tmp = torch.load(model_name)
    if tmp['score'] > 0.95:
        print(model_name, tmp['score'])
        model.load_state_dict(tmp['model'])
        model.eval()
        with torch.no_grad():
            pred = model(X).detach().numpy()
            output[model_name] = np.exp(pred[:,1])

nn_model_1e-05_seed_3.pickle 0.967407039837758
nn_model_0.01_seed_1.pickle 0.9651124408052815


In [63]:
output

Unnamed: 0,label,nn_model_1e-05_seed_3.pickle,nn_model_0.01_seed_1.pickle
0,0,0.032689,0.270240
1,1,0.915209,0.873950
2,1,0.951692,0.784093
3,1,0.990372,0.832905
4,1,0.998018,0.843915
...,...,...,...
25963,1,0.824950,0.878561
25964,0,0.000028,0.045164
25965,1,0.899276,0.530571
25966,1,0.997986,0.935754


In [65]:
roc_auc_score(output['label'], output['nn_model_1e-05_seed_3.pickle'])

0.9953539482272495

In [46]:
from sklearn.linear_model import LogisticRegression

In [48]:
output.corr()

Unnamed: 0,label,nn_model_1e-05_seed_3.pickle,nn_model_0.01_seed_1.pickle,nn_model_0.0_seed_5.pickle,nn_model_0.001_seed_2.pickle,nn_model_1e-04_seed_4.pickle,nn_model_1e-05_seed_1.pickle,nn_model_0.01_seed_3.pickle,nn_model_0.001_seed_4.pickle,nn_model_0.0_seed_3.pickle,...,nn_model_1e-05_seed_2.pickle,nn_model_0.01_seed_2.pickle,nn_model_1e-04_seed_5.pickle,nn_model_0.001_seed_1.pickle,nn_model_1e-05_seed_4.pickle,nn_model_0.0_seed_2.pickle,nn_model_0.001_seed_5.pickle,nn_model_1e-04_seed_1.pickle,nn_model_1e-04_seed_3.pickle,nn_model_0.01_seed_4.pickle
label,1.0,0.952526,0.871276,0.9299,0.909259,0.94986,0.912837,0.865882,0.927828,0.955078,...,0.952105,0.872602,0.942771,0.913133,0.935334,0.937998,0.923338,0.934152,0.925925,0.865521
nn_model_1e-05_seed_3.pickle,0.952526,1.0,0.92798,0.967986,0.955115,0.966996,0.959882,0.92942,0.964682,0.990618,...,0.967756,0.929727,0.969427,0.9591,0.966251,0.967999,0.961835,0.96867,0.978856,0.92431
nn_model_0.01_seed_1.pickle,0.871276,0.92798,1.0,0.948609,0.958848,0.925786,0.957381,0.973245,0.949131,0.92636,...,0.920673,0.964423,0.939174,0.96701,0.936911,0.936584,0.957554,0.949601,0.948928,0.970605
nn_model_0.0_seed_5.pickle,0.9299,0.967986,0.948609,1.0,0.968588,0.96745,0.969296,0.946655,0.97243,0.96735,...,0.962192,0.947651,0.983302,0.972324,0.97213,0.967365,0.980635,0.972285,0.972275,0.941127
nn_model_0.001_seed_2.pickle,0.909259,0.955115,0.958848,0.968588,1.0,0.957903,0.963898,0.957179,0.963083,0.952535,...,0.956833,0.96189,0.959261,0.971646,0.956374,0.9649,0.974482,0.965198,0.969714,0.946494
nn_model_1e-04_seed_4.pickle,0.94986,0.966996,0.925786,0.96745,0.957903,1.0,0.949973,0.923661,0.96725,0.966936,...,0.959764,0.923197,0.966566,0.957492,0.973186,0.957438,0.965501,0.964562,0.965139,0.919653
nn_model_1e-05_seed_1.pickle,0.912837,0.959882,0.957381,0.969296,0.963898,0.949973,1.0,0.949584,0.969835,0.958674,...,0.956651,0.950276,0.961976,0.978726,0.96659,0.966418,0.964225,0.984594,0.967911,0.95041
nn_model_0.01_seed_3.pickle,0.865882,0.92942,0.973245,0.946655,0.957179,0.923661,0.949584,1.0,0.946022,0.926091,...,0.915595,0.967373,0.933554,0.957134,0.932153,0.92957,0.953644,0.942186,0.947899,0.966687
nn_model_0.001_seed_4.pickle,0.927828,0.964682,0.949131,0.97243,0.963083,0.96725,0.969835,0.946022,1.0,0.963312,...,0.962749,0.949078,0.967839,0.971376,0.976317,0.969832,0.96858,0.968199,0.966422,0.950813
nn_model_0.0_seed_3.pickle,0.955078,0.990618,0.92636,0.96735,0.952535,0.966936,0.958674,0.926091,0.963312,1.0,...,0.969678,0.925515,0.970936,0.958174,0.96836,0.968012,0.961897,0.967308,0.97721,0.923511


In [49]:
X, y = output.drop(columns=['label']).values, output['label'].values

In [51]:
blend = LogisticRegression()

In [52]:
blend.fit(X,y)

LogisticRegression()

In [53]:
blend.coef_

array([[ 3.33425213, -0.14357049, -0.46629618, -1.77603156,  5.29140767,
        -4.5078311 , -2.7805581 ,  1.1323818 ,  2.94072352, -1.55404265,
         6.26004481, -0.54751453,  4.75809475, -0.35314217,  0.33420335,
         3.06273922,  0.95398303,  2.12453147, -1.27166394, -1.79543061,
        -3.01900245,  2.20286894,  0.31883698, -0.94812108, -3.3145148 ]])

In [54]:
mydataset = MyDataset(test_series)
test_dataloader = DataLoader(mydataset, shuffle=False, batch_size=len(mydataset))
X, y = next(iter(test_dataloader))

model = MyFCNModel(78)
output = pd.DataFrame()
for model_name in model_names:
    tmp = torch.load(model_name)
    if tmp['score'] > 0.95:
        print(model_name, tmp['score'])
        model.load_state_dict(tmp['model'])
        model.eval()
        with torch.no_grad():
            pred = model(X).detach().numpy()
            output[model_name] = np.exp(pred[:,1])

nn_model_1e-05_seed_3.pickle 0.967407039837758
nn_model_0.01_seed_1.pickle 0.9651124408052815
nn_model_0.0_seed_5.pickle 0.9698101723792287
nn_model_0.001_seed_2.pickle 0.9634224629904639
nn_model_1e-04_seed_4.pickle 0.9707457410984005
nn_model_1e-05_seed_1.pickle 0.9616762311335061
nn_model_0.01_seed_3.pickle 0.9639302704393696
nn_model_0.001_seed_4.pickle 0.9691502005645519
nn_model_0.0_seed_3.pickle 0.9679836247721169
nn_model_1e-05_seed_5.pickle 0.9693870127764213
nn_model_0.0_seed_1.pickle 0.9640131103179602
nn_model_0.01_seed_5.pickle 0.967782116120104
nn_model_1e-04_seed_2.pickle 0.965658078705901
nn_model_0.001_seed_3.pickle 0.9700645084734792
nn_model_0.0_seed_4.pickle 0.9683374189075421
nn_model_1e-05_seed_2.pickle 0.962389526474227
nn_model_0.01_seed_2.pickle 0.957944574321284
nn_model_1e-04_seed_5.pickle 0.9703305143055155
nn_model_0.001_seed_1.pickle 0.9682757132526185
nn_model_1e-05_seed_4.pickle 0.968802926756797
nn_model_0.0_seed_2.pickle 0.9626013696330608
nn_model_0.0

In [57]:
output['state'] = blend.predict_proba(output.values)[:,1]

In [59]:
output['sequence'] = test_series['sequence'].unique()

In [60]:
output[['sequence','state']].to_csv('nn_blend.csv',index=False)

In [66]:
train_series

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,...,sensor_11_square,sensor_11_diff,sensor_11_subject_mean,sensor_11_square_subject_mean,sensor_11_diff_subject_mean,sensor_12_square,sensor_12_diff,sensor_12_subject_mean,sensor_12_square_subject_mean,sensor_12_diff_subject_mean
0,0,47,0,-0.071577,0.025398,0.527133,0.077786,-0.573614,-0.081093,-0.037586,...,-0.025933,-0.000022,0.168770,-0.020132,0.000008,-0.188896,0.000141,-0.328722,-0.196842,0.000148
1,0,47,1,-0.162994,0.030337,0.527133,-0.153988,0.093636,0.213528,-0.061907,...,-0.026239,-0.109142,-1.893829,1.189209,-1.330854,-0.191629,-0.089298,-0.548938,-0.249138,-0.179915
2,0,47,2,0.118849,-0.155230,0.527133,0.077994,0.271536,0.800437,-0.027858,...,-0.025863,-0.050758,-0.569109,-0.065291,0.854765,-0.188933,-0.107894,0.132294,-0.228386,0.557168
3,0,47,3,0.190294,0.168396,0.527133,0.229810,-0.321286,-0.448553,0.235614,...,-0.023936,0.276014,0.689967,-0.046081,0.812409,-0.191632,0.107651,0.338776,-0.236459,0.168981
4,0,47,4,0.098878,0.240835,0.527133,-0.031455,0.228603,0.029098,0.166435,...,-0.026286,-0.174585,1.845034,0.530400,0.745299,-0.191650,-0.013442,-0.110551,-0.239970,-0.367251
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1558075,25967,327,55,-0.103081,-0.272356,-0.626783,0.138222,-0.531331,-0.280465,-0.192967,...,-0.026146,0.053509,-0.428508,-0.133116,-0.477073,-0.191555,-0.032216,-0.054662,-0.290757,-0.056195
1558076,25967,327,56,0.047404,0.078552,-0.626783,-0.075692,0.443984,-0.206693,0.029700,...,-0.025822,-0.037082,-0.266833,-0.133045,0.104327,-0.191624,0.008445,0.000358,-0.290749,0.045136
1558077,25967,327,57,-0.211092,0.096427,-0.626783,0.075502,0.177312,0.177576,-0.151893,...,-0.022832,0.320427,0.165521,-0.133474,0.278978,-0.191549,0.030199,-0.038437,-0.290757,-0.031573
1558078,25967,327,58,0.465387,0.383362,-0.564748,0.188897,-0.262547,-0.033468,0.713917,...,-0.026130,-0.180762,0.260551,-0.134936,0.061325,-0.191650,-0.014782,0.079244,-0.290526,0.096372


In [67]:
test_series

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,...,sensor_11_square,sensor_11_diff,sensor_11_subject_mean,sensor_11_square_subject_mean,sensor_11_diff_subject_mean,sensor_12_square,sensor_12_diff,sensor_12_subject_mean,sensor_12_square_subject_mean,sensor_12_diff_subject_mean
0,25968,684,0,0.883369,4.397639,0.527133,-0.343603,-0.736725,-2.625765,0.737967,...,0.131075,-0.000022,0.168770,-0.020132,0.000008,-0.191633,0.000141,-0.328722,-0.196842,0.000148
1,25968,684,1,-1.802014,-4.869207,0.527133,0.231264,-0.324583,1.153423,-1.153357,...,-0.021755,-1.837126,-1.893829,1.189209,-1.330854,-0.191655,-0.008758,-0.548938,-0.249138,-0.179915
2,25968,684,2,0.413350,-2.408146,0.527133,0.239156,0.551938,1.533489,0.092934,...,0.148649,-1.388438,-0.569109,-0.065291,0.854765,-0.191653,0.003228,0.132294,-0.228386,0.557168
3,25968,684,3,0.293243,1.456549,0.527133,-0.041632,0.554833,-0.645123,-0.135409,...,-0.002873,1.049563,0.689967,-0.046081,0.812409,-0.191630,-0.012404,0.338776,-0.236459,0.168981
4,25968,684,4,0.468762,1.243464,0.527133,-0.115359,-0.591602,-0.702087,0.596098,...,-0.026020,0.538815,1.845034,0.530400,0.745299,-0.191654,0.007247,-0.110551,-0.239970,-0.367251
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733075,38185,773,55,0.076939,0.449216,-0.482516,0.163767,-0.091489,-0.744109,0.125361,...,-0.019940,0.432783,-0.259021,-0.126677,-0.083693,0.830281,-2.454671,0.001434,-0.290904,-0.000526
733076,38185,773,56,-0.300821,-0.552471,-0.482516,0.090040,0.083489,0.663165,-0.227016,...,-0.022123,-0.571801,0.299570,-0.130276,0.360432,0.447150,3.414122,0.002338,-0.290904,0.000887
733077,38185,773,57,0.274679,0.329268,-0.482516,-0.293551,-0.236399,0.005752,0.269392,...,-0.025763,0.348222,-0.015547,-0.122904,-0.203317,-0.050393,-2.216112,0.001129,-0.290904,-0.000841
733078,38185,773,58,-0.068202,0.160164,-0.482516,0.018804,0.757225,0.100536,-0.117844,...,-0.026172,-0.046347,-0.016316,-0.127543,-0.000488,0.333538,2.075789,-0.008307,-0.290858,-0.007567


In [69]:
sorted(train_series['subject'].unique())

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [70]:
sorted(test_series['subject'].unique())

[672,
 673,
 674,
 675,
 676,
 677,
 678,
 679,
 680,
 681,
 682,
 683,
 684,
 685,
 686,
 687,
 688,
 689,
 690,
 691,
 692,
 693,
 694,
 695,
 696,
 697,
 698,
 699,
 700,
 701,
 702,
 703,
 704,
 705,
 706,
 707,
 708,
 709,
 710,
 711,
 712,
 713,
 714,
 715,
 716,
 717,
 718,
 719,
 720,
 721,
 722,
 723,
 724,
 725,
 726,
 727,
 728,
 729,
 730,
 731,
 732,
 733,
 734,
 735,
 736,
 737,
 738,
 739,
 740,
 741,
 742,
 743,
 744,
 745,
 746,
 747,
 748,
 749,
 750,
 751,
 752,
 753,
 754,
 755,
 756,
 757,
 758,
 759,
 760,
 761,
 762,
 763,
 764,
 765,
 766,
 767,
 768,
 769,
 770,
 771,
 772,
 773,
 774,
 775,
 776,
 777,
 778,
 779,
 780,
 781,
 782,
 783,
 784,
 785,
 786,
 787,
 788,
 789,
 790,
 791,
 792,
 793,
 794,
 795,
 796,
 797,
 798,
 799,
 800,
 801,
 802,
 803,
 804,
 805,
 806,
 807,
 808,
 809,
 810,
 811,
 812,
 813,
 814,
 815,
 816,
 817,
 818,
 819,
 820,
 821,
 822,
 823,
 824,
 825,
 826,
 827,
 828,
 829,
 830,
 831,
 832,
 833,
 834,
 835,
 836,
 837,
 838

In [73]:
model_names = glob.glob('nosub.dir/*.pickle')
for model_name in model_names:
    tmp = torch.load(model_name)
    if (tmp['score']>0.95):
        print(model_name, tmp['score'])

nosub.dir/nn_model_1e-05_seed_3.pickle 0.9670267850400751
nosub.dir/nn_model_0.01_seed_1.pickle 0.9613187300254274
nosub.dir/nn_model_0.0_seed_5.pickle 0.9710402229306967
nosub.dir/nn_model_0.001_seed_2.pickle 0.9600839220696058
nosub.dir/nn_model_1e-04_seed_4.pickle 0.9699580300103996
nosub.dir/nn_model_1e-05_seed_1.pickle 0.9648622483495464
nosub.dir/nn_model_0.01_seed_3.pickle 0.9551396425065534
nosub.dir/nn_model_0.001_seed_4.pickle 0.9712610805724756
nosub.dir/nn_model_0.0_seed_3.pickle 0.9668563445917518
nosub.dir/nn_model_1e-05_seed_5.pickle 0.9702833114626829
nosub.dir/nn_model_0.0_seed_1.pickle 0.965813446240698
nosub.dir/nn_model_0.01_seed_5.pickle 0.961096527592283
nosub.dir/nn_model_1e-04_seed_2.pickle 0.96061058025182
nosub.dir/nn_model_0.001_seed_3.pickle 0.968140581007756
nosub.dir/nn_model_0.0_seed_4.pickle 0.9671724112316149
nosub.dir/nn_model_1e-05_seed_2.pickle 0.9576549659522459
nosub.dir/nn_model_0.01_seed_2.pickle 0.9503155390443031
nosub.dir/nn_model_1e-04_seed_5

In [77]:
def prep_nosubject(df):
    scs = {}
    for col in df.columns[3:]:
        sc = StandardScaler()
        df[col] = sc.fit_transform(df[col].values.reshape(-1,1))
        scs[col] = sc

    for sensor in df.columns[3:]:
        df[sensor+'_square'] = np.square(df[sensor])
        df[sensor+'_diff'] = df[sensor].diff()
        df.loc[df['step']==0, sensor+'_diff'] = 0.0

    for col in df.columns[16:]:
        sc = StandardScaler()
        df[col] = sc.fit_transform(df[col].values.reshape(-1,1))
        scs[col] = sc

    return scs

In [79]:
train_series = pd.read_csv('train.csv')
train_labels = pd.read_csv('train_labels.csv')
test_series = pd.read_csv('test.csv')
all_series = pd.concat([train_series, test_series], axis=0)
scs = prep_nosubject(all_series)
train_series = all_series.loc[all_series['sequence']<=25967]
test_series = all_series.loc[all_series['sequence']>25967]

In [80]:
mydataset = MyDataset(train_series, train_labels)
train_dataloader = DataLoader(mydataset, shuffle=False, batch_size=len(mydataset))
X, y = next(iter(train_dataloader))

model = MyFCNModel(39)
output = pd.DataFrame()
output['label'] = train_labels['state']
for model_name in model_names:
    tmp = torch.load(model_name)
    if tmp['score'] > 0.95:
        print(model_name, tmp['score'])
        model.load_state_dict(tmp['model'])
        model.eval()
        with torch.no_grad():
            pred = model(X).detach().numpy()
            output[model_name] = np.exp(pred[:,1])

nosub.dir/nn_model_1e-05_seed_3.pickle 0.9670267850400751
nosub.dir/nn_model_0.01_seed_1.pickle 0.9613187300254274
nosub.dir/nn_model_0.0_seed_5.pickle 0.9710402229306967
nosub.dir/nn_model_0.001_seed_2.pickle 0.9600839220696058
nosub.dir/nn_model_1e-04_seed_4.pickle 0.9699580300103996
nosub.dir/nn_model_1e-05_seed_1.pickle 0.9648622483495464
nosub.dir/nn_model_0.01_seed_3.pickle 0.9551396425065534
nosub.dir/nn_model_0.001_seed_4.pickle 0.9712610805724756
nosub.dir/nn_model_0.0_seed_3.pickle 0.9668563445917518
nosub.dir/nn_model_1e-05_seed_5.pickle 0.9702833114626829
nosub.dir/nn_model_0.0_seed_1.pickle 0.965813446240698
nosub.dir/nn_model_0.01_seed_5.pickle 0.961096527592283
nosub.dir/nn_model_1e-04_seed_2.pickle 0.96061058025182
nosub.dir/nn_model_0.001_seed_3.pickle 0.968140581007756
nosub.dir/nn_model_0.0_seed_4.pickle 0.9671724112316149
nosub.dir/nn_model_1e-05_seed_2.pickle 0.9576549659522459
nosub.dir/nn_model_0.01_seed_2.pickle 0.9503155390443031
nosub.dir/nn_model_1e-04_seed_5

In [81]:
roc_auc_score(output['label'],output['nosub.dir/nn_model_0.0_seed_5.pickle'])

0.989093333152058

In [82]:
mydataset = MyDataset(test_series)
test_dataloader = DataLoader(mydataset, shuffle=False, batch_size=len(mydataset))
X, y = next(iter(test_dataloader))

model = MyFCNModel(39)
output_submit = pd.DataFrame()
output_submit['sequence'] = test_series['sequence'].unique()
for model_name in model_names:
    tmp = torch.load(model_name)
    if tmp['score'] > 0.95:
        print(model_name, tmp['score'])
        model.load_state_dict(tmp['model'])
        model.eval()
        with torch.no_grad():
            pred = model(X).detach().numpy()
            output_submit[model_name] = np.exp(pred[:,1])

nosub.dir/nn_model_1e-05_seed_3.pickle 0.9670267850400751
nosub.dir/nn_model_0.01_seed_1.pickle 0.9613187300254274
nosub.dir/nn_model_0.0_seed_5.pickle 0.9710402229306967
nosub.dir/nn_model_0.001_seed_2.pickle 0.9600839220696058
nosub.dir/nn_model_1e-04_seed_4.pickle 0.9699580300103996
nosub.dir/nn_model_1e-05_seed_1.pickle 0.9648622483495464
nosub.dir/nn_model_0.01_seed_3.pickle 0.9551396425065534
nosub.dir/nn_model_0.001_seed_4.pickle 0.9712610805724756
nosub.dir/nn_model_0.0_seed_3.pickle 0.9668563445917518
nosub.dir/nn_model_1e-05_seed_5.pickle 0.9702833114626829
nosub.dir/nn_model_0.0_seed_1.pickle 0.965813446240698
nosub.dir/nn_model_0.01_seed_5.pickle 0.961096527592283
nosub.dir/nn_model_1e-04_seed_2.pickle 0.96061058025182
nosub.dir/nn_model_0.001_seed_3.pickle 0.968140581007756
nosub.dir/nn_model_0.0_seed_4.pickle 0.9671724112316149
nosub.dir/nn_model_1e-05_seed_2.pickle 0.9576549659522459
nosub.dir/nn_model_0.01_seed_2.pickle 0.9503155390443031
nosub.dir/nn_model_1e-04_seed_5

In [96]:
output_submit[['sequence','nosub.dir/nn_model_0.0_seed_5.pickle']].to_csv('nn_baseline_nosub.csv', 
                                  index=False, header=['sequence','state'])

In [85]:
output

Unnamed: 0,label,nosub.dir/nn_model_1e-05_seed_3.pickle,nosub.dir/nn_model_0.01_seed_1.pickle,nosub.dir/nn_model_0.0_seed_5.pickle,nosub.dir/nn_model_0.001_seed_2.pickle,nosub.dir/nn_model_1e-04_seed_4.pickle,nosub.dir/nn_model_1e-05_seed_1.pickle,nosub.dir/nn_model_0.01_seed_3.pickle,nosub.dir/nn_model_0.001_seed_4.pickle,nosub.dir/nn_model_0.0_seed_3.pickle,...,nosub.dir/nn_model_1e-05_seed_2.pickle,nosub.dir/nn_model_0.01_seed_2.pickle,nosub.dir/nn_model_1e-04_seed_5.pickle,nosub.dir/nn_model_0.001_seed_1.pickle,nosub.dir/nn_model_1e-05_seed_4.pickle,nosub.dir/nn_model_0.0_seed_2.pickle,nosub.dir/nn_model_0.001_seed_5.pickle,nosub.dir/nn_model_1e-04_seed_1.pickle,nosub.dir/nn_model_1e-04_seed_3.pickle,nosub.dir/nn_model_0.01_seed_4.pickle
0,0,0.020576,0.144853,0.071082,0.026887,0.007982,0.011428,0.205886,0.128787,0.032725,...,0.015836,0.141967,0.030918,0.029002,0.008740,0.014743,0.049640,0.004602,0.028185,0.167167
1,1,0.988026,0.910414,0.989773,0.957544,0.953089,0.995108,0.755492,0.919190,0.988562,...,0.991151,0.792319,0.994028,0.982620,0.972394,0.988066,0.940246,0.995587,0.944315,0.862065
2,1,0.992522,0.886979,0.999247,0.979241,0.994093,0.997811,0.835038,0.989577,0.997221,...,0.998066,0.845904,0.999127,0.996473,0.994251,0.997379,0.988702,0.995686,0.974686,0.868504
3,1,0.678202,0.751685,0.946159,0.884210,0.731639,0.917529,0.737054,0.910724,0.688778,...,0.929997,0.685514,0.911868,0.907824,0.611406,0.922882,0.777669,0.807082,0.605289,0.775732
4,1,0.768204,0.808280,0.976778,0.894703,0.806823,0.961094,0.410352,0.927100,0.724997,...,0.967052,0.558554,0.955107,0.838905,0.940010,0.964122,0.837207,0.859311,0.795138,0.740504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25963,1,0.951663,0.889925,0.995539,0.989611,0.993273,0.994702,0.908609,0.982097,0.961649,...,0.985821,0.897083,0.989156,0.979513,0.988548,0.991717,0.978971,0.987933,0.970137,0.937547
25964,0,0.004659,0.082516,0.012568,0.005846,0.004516,0.003867,0.135610,0.062267,0.002944,...,0.001811,0.041856,0.005041,0.005436,0.002080,0.001580,0.014304,0.001238,0.003287,0.183121
25965,1,0.990066,0.650797,0.865641,0.315227,0.747458,0.967776,0.742422,0.882496,0.997553,...,0.893047,0.602741,0.766993,0.899908,0.447838,0.894732,0.515503,0.889252,0.967475,0.418435
25966,1,0.987115,0.889395,0.934523,0.925398,0.983830,0.989121,0.798391,0.937821,0.981568,...,0.938494,0.850020,0.955255,0.967849,0.948401,0.981897,0.973859,0.979156,0.932942,0.822605


In [86]:
blend.fit(output.drop(columns=['label']), output['label'])

LogisticRegression()

In [87]:
blend.coef_

array([[ 0.86721463, -2.22723182,  2.95551815,  1.18311953, -0.8888529 ,
         1.95469803, -1.24505145,  2.79233274,  2.03546041, -1.03102436,
         0.16984841, -0.20617449,  0.10581301,  0.92885549,  1.08953358,
         1.06596952, -2.98254905,  2.04959648,  0.597367  ,  1.43836143,
         0.64010868, -1.9781879 , -0.52775146,  0.66050199, -1.85433181]])

In [92]:
roc_auc_score(output['label'], blend.predict_proba(output.drop(columns=['label']))[:,1])

0.9919491043943863

In [93]:
output_submit['blend'] = blend.predict_proba(output_submit.drop(columns=['sequence']))[:,1]

In [95]:
output_submit[['sequence','blend']].to_csv('nn_blend_nosub.csv', 
                                  index=False, header=['sequence','state'])

In [99]:
def prep_withsubject(df):
    scs = {}
    for col in df.columns[3:]:
        sc = StandardScaler()
        df[col] = sc.fit_transform(df[col].values.reshape(-1,1))
        scs[col] = sc

    for sensor in df.columns[3:]:
        df[sensor+'_square'] = np.square(df[sensor])
        df[sensor+'_diff'] = df[sensor].diff()
        df.loc[df['step']==0, sensor+'_diff'] = 0.0

        tmp = df.groupby(['subject','step']).agg({sensor:'mean'}).to_dict()[sensor]
        df[sensor+'_subject_mean'] = pd.Series(zip(df['subject'], df['step'])).map(tmp)

        tmp = df.groupby(['subject','step']).agg({sensor+'_square':'mean'}).to_dict()[sensor+'_square']
        df[sensor+'_square_subject_mean'] = pd.Series(zip(df['subject'], df['step'])).map(tmp)

        tmp = df.groupby(['subject','step']).agg({sensor+'_diff':'mean'}).to_dict()[sensor+'_diff']
        df[sensor+'_diff_subject_mean'] = pd.Series(zip(df['subject'], df['step'])).map(tmp)

    for col in df.columns[16:]:
        sc = StandardScaler()
        df[col] = sc.fit_transform(df[col].values.reshape(-1,1))
        scs[col] = sc

    return scs

In [101]:
train_series = pd.read_csv('train.csv')
train_labels = pd.read_csv('train_labels.csv')
test_series = pd.read_csv('test.csv')
all_series = pd.concat([train_series, test_series], axis=0)
scs = prep_withsubject(all_series)
train_series = all_series.loc[all_series['sequence']<=25967]
test_series = all_series.loc[all_series['sequence']>25967]

In [103]:
model_names = glob.glob('nn_model_*isplit*')

In [105]:
class MyFCNModel(nn.Module):
    def __init__(self, input_channel):
        super(MyFCNModel, self).__init__()
        torch.manual_seed(123)

        self.conv1d_1 = nn.Conv1d(input_channel, input_channel, 5, groups=input_channel, padding=2)
        self.bn_1 = nn.BatchNorm1d(input_channel)
        self.conv1d_2 = nn.Conv1d(input_channel, input_channel, 9, groups=input_channel, padding=4)
        self.bn_2 = nn.BatchNorm1d(input_channel)
        self.conv1d_3 = nn.Conv1d(input_channel, input_channel, 59, groups=input_channel, padding=29)
        self.bn_3 = nn.BatchNorm1d(input_channel)

        self.conv1d_4 = nn.Conv1d(3*input_channel, 32, 3, padding=1)
        self.bn_4 = nn.BatchNorm1d(32)

        self.conv1d_5 = nn.Conv1d(32, 1, 3, padding=1)
        self.bn_5 = nn.BatchNorm1d(1)

        self.avg = nn.AvgPool1d(60, padding=0)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=0.25)

    def forward(self, X):
        ''' input shape (N,C,L) '''

        X1 = F.relu(self.bn_1(self.conv1d_1(X)))
        X2 = F.relu(self.bn_2(self.conv1d_2(X)))
        X3 = F.relu(self.bn_3(self.conv1d_3(X)))

        X = torch.cat([X1,X2,X3], dim=1)
        X = self.dropout(X)

        X = F.relu(self.bn_4(self.conv1d_4(X)))
        X = self.dropout(X)

        X = self.conv1d_5(X))

        X = self.avg(X).squeeze(dim=2)
        output = self.sigmoid(X.squeeze(dim=1))

        return output


In [110]:
mydataset = MyDataset(test_series)
test_dataloader = DataLoader(mydataset, shuffle=False, batch_size=len(mydataset))
X, y = next(iter(test_dataloader))

model = MyFCNModel(78)
output_submit = pd.DataFrame()
output_submit['sequence'] = test_series['sequence'].unique()
for model_name in model_names:
    tmp = torch.load(model_name)
    if tmp['score'] > 0.95:
        print(model_name, tmp['score'])
        model.load_state_dict(tmp['model'])
        model.eval()
        with torch.no_grad():
            pred = model(X).detach().numpy()
            output_submit[model_name] = pred

nn_model_0.0_isplit_1.pickle 0.984950655797083
nn_model_0.0_isplit_0.pickle 0.9582540624155228
nn_model_0.0_isplit_2.pickle 0.9822634597788978


In [111]:
output_submit

Unnamed: 0,sequence,nn_model_0.0_isplit_1.pickle,nn_model_0.0_isplit_0.pickle,nn_model_0.0_isplit_2.pickle
0,25968,0.980709,0.976293,0.985033
1,25969,0.679662,0.661489,0.865164
2,25970,0.500000,0.500000,0.500000
3,25971,0.685326,0.658995,0.632230
4,25972,0.500000,0.523010,0.500000
...,...,...,...,...
12213,38181,0.500000,0.523868,0.500000
12214,38182,0.989561,0.961179,0.995769
12215,38183,0.520576,0.528439,0.515364
12216,38184,0.500000,0.504880,0.506204


In [109]:
output_submit[['sequence','nn_model_0.0_isplit_1.pickle']].to_csv('nn_withsub_retry.csv', 
                                  index=False, header=['sequence','state'])