In [0]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/kaggle.json

In [0]:
!kaggle competitions download -c grasp-and-lift-eeg-detection

Downloading train.zip to /content
100% 913M/915M [00:14<00:00, 101MB/s] 
100% 915M/915M [00:14<00:00, 67.7MB/s]
Downloading test.zip to /content
 99% 151M/153M [00:01<00:00, 124MB/s]
100% 153M/153M [00:01<00:00, 132MB/s]
Downloading sample_submission.csv.zip to /content
  0% 0.00/5.16M [00:00<?, ?B/s]
100% 5.16M/5.16M [00:00<00:00, 47.5MB/s]


In [0]:
!unzip train.zip
!unzip test.zip

Archive:  train.zip
   creating: train/
  inflating: train/subj10_series1_data.csv  
  inflating: train/subj10_series1_events.csv  
  inflating: train/subj10_series2_data.csv  
  inflating: train/subj10_series2_events.csv  
  inflating: train/subj10_series3_data.csv  
  inflating: train/subj10_series3_events.csv  
  inflating: train/subj10_series4_data.csv  
  inflating: train/subj10_series4_events.csv  
  inflating: train/subj10_series5_data.csv  
  inflating: train/subj10_series5_events.csv  
  inflating: train/subj10_series6_data.csv  
  inflating: train/subj10_series6_events.csv  
  inflating: train/subj10_series7_data.csv  
  inflating: train/subj10_series7_events.csv  
  inflating: train/subj10_series8_data.csv  
  inflating: train/subj10_series8_events.csv  
  inflating: train/subj11_series1_data.csv  
  inflating: train/subj11_series1_events.csv  
  inflating: train/subj11_series2_data.csv  
  inflating: train/subj11_series2_events.csv  
  inflating: train/subj11_series3_data.c

In [0]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import random
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score as auc
from scipy.interpolate import BSpline
from sklearn.metrics import accuracy_score

In [0]:
USE_CUDA = 1

In [0]:
def prepare_data_train(fname):
    
    data = pd.read_csv(fname)
    
    events_fname = fname.replace('_data','_events')
    
    labels= pd.read_csv(events_fname)
    clean=data.drop(['id' ], axis=1)#remove id
    labels=labels.drop(['id' ], axis=1)#remove id
    return  clean,labels


scaler= StandardScaler()
def data_preprocess_train(X):
    X_prep=scaler.fit_transform(X)
    
    return X_prep
def data_preprocess_test(X):
    X_prep=scaler.transform(X)
    
    return X_prep

In [0]:
def load_data(subjects,series):
  y_raw= []
  raw = []
  for subject in subjects:
    for ser in series:
      fname ='train/subj%d_series%d_data.csv' % (subject,ser)
      # print(fname)
      data,labels=prepare_data_train(fname)
      raw.append(data)
      y_raw.append(labels)

    X = pd.concat(raw)
    y = pd.concat(y_raw)
      
    X =np.asarray(X.astype(float))
    y = np.asarray(y.astype(float))

  return X,y

In [0]:
# some parameteres for the model
num_features = 32
window_size = 1024
batch_size=2000


In [0]:
def get_batch(dataset,target, batch_size=2000, val=False, index=None):
    if val == False:
        index = random.randint(window_size, len(dataset) - 16 * batch_size)
        indexes = np.arange(index, index + 16*batch_size, 16)

    else:
        indexes = np.arange(index, index + batch_size)
    
    batch = np.zeros((batch_size, num_features, window_size//4))
    
    b = 0
    for i in indexes:
        
        start = i - window_size if i - window_size > 0 else 0
        
        tmp = dataset[start:i]
        batch[b,:,:] = tmp[::4].transpose()
        
        b += 1

    targets = target[indexes]
    return torch.DoubleTensor(batch), torch.DoubleTensor(targets) 

In [0]:
class convmodel(nn.Module):
  def __init__(self, drop=0.5, d_linear=124):
    super().__init__()
    self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=0, stride=1)
    self.bn = nn.BatchNorm1d(64)
    self.pool = nn.MaxPool1d(2, stride=2)
    
    self.dropout1 = nn.Dropout(drop)
 
    self.conv = nn.Sequential(self.conv2, nn.ReLU(inplace=True), self.bn,self.pool, self.dropout1)
  def forward(self, x):
    x = self.conv(x)
    return x

class Combine(nn.Module):
    def __init__(self,out_classes):
        super(Combine, self).__init__()
        self.cnn = convmodel().double()
        self.rnn = nn.LSTM(input_size=127, hidden_size=64, num_layers=1,batch_first=True)
        self.linear = nn.Linear(64,out_classes)

    def forward(self, x):
      x = self.cnn(x)
   
      out, hidden=self.rnn(x)

      out = self.linear(out[:, -1, :])

      return torch.sigmoid(out)
        

In [0]:
model = Combine(6).double()
if USE_CUDA == 1:
    model = model.cuda()
optim = torch.optim.Adam(model.parameters(), lr=1e-3, eps=1e-10)

In [0]:
bs = batch_size
def train(traindata,y_train, epochs, printevery=100, shuffle=True):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for i in range(len(traindata)//bs):
            optim.zero_grad()
            x, y = get_batch(traindata,y_train)
            if USE_CUDA == 1:
                x = Variable(x).cuda()
                y = Variable(y).cuda()
            preds = model(x)
            loss = F.binary_cross_entropy(preds.view(-1), y.view(-1))
            loss.backward()
            total_loss += loss.item()
            optim.step()
            if (i + 1) % printevery == 0:
                print("epoch: %d, iter %d/%d, loss %.4f"%(epoch + 1, i + 1, len(traindata)//2000, total_loss/printevery))
                total_loss = 0

In [0]:
def getPredictions(data,labels):
    model.eval()
    p = []
    res = []
    i = window_size
    bs = 2000
    while i < len(data):
        if i + bs > len(data):
            bs = len(data) - i
        x, y = get_batch(data,labels, bs, index=i, val=True)
        x = (x)
        x = x.cuda()
        y = y
        preds = model(x)
        preds = preds.squeeze(1)
        p.append(np.array(preds.cpu().data))
        res.append(np.array(y.data))
        i += bs
    preds = p[0]
    for i in p[1:]:
        preds = np.vstack((preds,i))

    targs = res[0]
    for i in res[1:]:
        targs = np.vstack((targs, i))
    return preds, targs

def valscore(preds, targs):
    aucs = [auc(targs[:, j], preds[:, j]) for j in range(6)]
    total_loss = np.mean(aucs)
    return total_loss


def accurecy(preds, targs):
  preds=np.where(preds>0.3,1,0)
  acc_score=[]
  for j in range(6):
    acc_score.append(accuracy_score(targs[:, j],preds[:, j]))
  return np.mean(acc_score)
 

In [14]:
import numpy as np
from sklearn.model_selection import KFold

subjects=range(1,13)


X=np.array([1,2,3,4,5,6,7,8])
kf = KFold(n_splits=4,shuffle=True)
count=1
auc_score=[]
for train_index, test_index in kf.split(X):
  print("Fold ",count)
  print("train",X[train_index],"test",X[test_index])
  X_train,y_train=load_data(subjects,series =X[train_index])
  X_train=data_preprocess_train(X_train)
  
  X_test,y_test=load_data(subjects,series=X[test_index])
  X_test=data_preprocess_test(X_test)
  train(X_train,y_train,1)
  val_preds, val_targs=getPredictions(X_test,y_test)
  print("check results")
  print(valscore(val_preds, val_targs))
  auc_score.append(valscore(val_preds, val_targs))
  with open("results.txt", "a") as res_file:
    res_file.write("train : "+str(X[train_index])+" test : "+str(X[test_index])+" AUC score : "+str(valscore(val_preds, val_targs))+" Accurecy Score: "+str(accurecy(val_preds, val_targs))+"\n")
  count+=1
print(np.mean(auc_score))

Fold  1
train [1 2 3 5 7 8] test [4 6]
epoch: 1, iter 100/6590, loss 0.2581
epoch: 1, iter 200/6590, loss 0.1192
epoch: 1, iter 300/6590, loss 0.1160
epoch: 1, iter 400/6590, loss 0.1195
epoch: 1, iter 500/6590, loss 0.1176
epoch: 1, iter 600/6590, loss 0.1138
epoch: 1, iter 700/6590, loss 0.1111
epoch: 1, iter 800/6590, loss 0.1087
epoch: 1, iter 900/6590, loss 0.1051
epoch: 1, iter 1000/6590, loss 0.0986
epoch: 1, iter 1100/6590, loss 0.1034
epoch: 1, iter 1200/6590, loss 0.0978
epoch: 1, iter 1300/6590, loss 0.0995
epoch: 1, iter 1400/6590, loss 0.1000
epoch: 1, iter 1500/6590, loss 0.0973
epoch: 1, iter 1600/6590, loss 0.1007
epoch: 1, iter 1700/6590, loss 0.1029
epoch: 1, iter 1800/6590, loss 0.0995
epoch: 1, iter 1900/6590, loss 0.0989
epoch: 1, iter 2000/6590, loss 0.1011
epoch: 1, iter 2100/6590, loss 0.0950
epoch: 1, iter 2200/6590, loss 0.0958
epoch: 1, iter 2300/6590, loss 0.0934
epoch: 1, iter 2400/6590, loss 0.1000
epoch: 1, iter 2500/6590, loss 0.0961
epoch: 1, iter 2600/