# Mice Sleep Detection

In [1]:
import pandas as pd
import numpy as np
from numpy.fft import fft, fftshift
import matplotlib.pyplot as plt
from scipy import signal as sig
from tqdm import tqdm

In [2]:
train_eeg1 = pd.read_csv('train_eeg1.csv')
train_eeg2 = pd.read_csv('train_eeg2.csv')
train_emg = pd.read_csv('train_emg.csv')
test_eeg1 = pd.read_csv('test_eeg1.csv')
test_eeg2 = pd.read_csv('test_eeg2.csv')
test_emg = pd.read_csv('test_emg.csv')
train_labels = pd.read_csv('train_labels.csv')

In [3]:
train_eeg1 = train_eeg1.drop(['Id'],axis=1)
train_eeg2 = train_eeg2.drop(['Id'],axis=1)
train_emg = train_emg.drop(['Id'],axis=1)
test_eeg1 = test_eeg1.drop(['Id'],axis=1)
test_eeg2 = test_eeg2.drop(['Id'],axis=1)
test_emg = test_emg.drop(['Id'],axis=1)
train_labels = train_labels.drop(['Id'],axis=1)

## Preprocessing (Train)

Mouse 1,2,3 = Train

In [4]:
# Split into 3 mice
stop1 = 21600
stop2 = 43200
stop3 = 64800

m1_eeg1 = train_eeg1[0:stop1]
m1_eeg2 = train_eeg2[0:stop1]
m1_emg = train_emg[0:stop1]
m1_labels = train_labels[0:stop1]

m2_eeg1 = train_eeg1[stop1:stop2]
m2_eeg2 = train_eeg2[stop1:stop2]
m2_emg = train_emg[stop1:stop2]
m2_labels = train_labels[stop1:stop2]

m3_eeg1 = train_eeg1[stop2:stop3]
m3_eeg2 = train_eeg2[stop2:stop3]
m3_emg = train_emg[stop2:stop3]
m3_labels = train_labels[stop2:stop3]

#### Padding

In [5]:
def padding(df):
    firstSample = pd.DataFrame(df.iloc[0]).T
    lastSample = pd.DataFrame(df.iloc[-1]).T
    df = pd.concat([firstSample,df,lastSample], axis=0, join='outer', ignore_index=False)
    return df

In [6]:
m1_eeg1 = padding(m1_eeg1)
m1_eeg2 = padding(m1_eeg2)
m1_emg = padding(m1_emg)

m2_eeg1 = padding(m2_eeg1)
m2_eeg2 = padding(m2_eeg2)
m2_emg = padding(m2_emg)

m3_eeg1 = padding(m3_eeg1)
m3_eeg2 = padding(m3_eeg2)
m3_emg = padding(m3_emg)

assert m1_eeg1.shape == (21602,512)
assert m2_eeg1.shape == (21602,512)
assert m3_eeg1.shape == (21602,512)

#### Step a): Fourier Transformation with Hamming windows

In [7]:
def fftHamming(signal):
    '''
    @return: 24 numbers for that window
    '''
    
    # Apply Hamming Window
    assert signal.shape == (256,)
    window = np.hamming(256)
    signal = signal*window
    assert signal.shape == (256,)
    
    # Apply FFT
    fftResult = fft(signal, 128)
    
    # Calculate PSD
    psd = np.square(np.absolute(fftResult))

    return psd
    
def slidingWindow(df):
    '''
    @return: 24
    '''
    allValues = []
    
    ts = df.to_numpy().flatten()
    i = 385 # 512 - 127 (127 bc. we chose hamming window to be center in middle (chose left of the two middle elements))
    k = 0
    
    pbar = tqdm(position=0, leave=True, total = 21600*32)
    while(i < 385+21600*512):
        pbar.update(1)
        k += 1
        
        # Get the 24 Values for this window
        values = fftHamming(ts[i:i+256])
        
        allValues.append(values)
        i+=16
    pbar.close()
    assert int(k) == int((21600*32))
    
    return allValues

In [8]:
m1_eeg1_a = slidingWindow(m1_eeg1)
m1_eeg2_a = slidingWindow(m1_eeg2)
m1_emg_a = slidingWindow(m1_emg)

m2_eeg1_a = slidingWindow(m2_eeg1)
m2_eeg2_a = slidingWindow(m2_eeg2)
m2_emg_a = slidingWindow(m2_emg)

m3_eeg1_a = slidingWindow(m3_eeg1)
m3_eeg2_a = slidingWindow(m3_eeg2)
m3_emg_a = slidingWindow(m3_emg)

100%|██████████| 691200/691200 [00:09<00:00, 72001.35it/s]
100%|██████████| 691200/691200 [00:09<00:00, 73151.38it/s]
100%|██████████| 691200/691200 [00:09<00:00, 72451.46it/s]
100%|██████████| 691200/691200 [00:09<00:00, 72670.50it/s]
100%|██████████| 691200/691200 [00:09<00:00, 73674.13it/s]
100%|██████████| 691200/691200 [00:09<00:00, 72281.74it/s]
100%|██████████| 691200/691200 [00:09<00:00, 71791.47it/s]
100%|██████████| 691200/691200 [00:09<00:00, 71017.24it/s]
100%|██████████| 691200/691200 [00:09<00:00, 71832.08it/s]


#### Step b): Apply Bandpass Filtering or Integration

In [9]:
def bandpassFilter(signal):
    newsignal = []
    i = 1
    while i <= 48:
        newsignal.append(signal[i]+signal[i+1])
        i+=2
    assert len(newsignal) == 24
    return newsignal

def applyBandpassFilter(ts):
    newsignal = []
    for el in ts:
        newsignal.append(bandpassFilter(el))
        
    return newsignal

def applyIntegration(ts):
    newsignal = []
    mysum = 0
    for el in ts:
        mysum = np.sum(el[1:61])
        tempsignal = []
        for i in range(0,24):
            tempsignal.append(mysum)
        newsignal.append(tempsignal)
    
    assert len(newsignal) == len(ts)
    return newsignal

In [10]:
m1_eeg1_b = applyBandpassFilter(m1_eeg1_a)
m1_eeg2_b = applyBandpassFilter(m1_eeg2_a)
m1_emg_b = applyIntegration(m1_emg_a) # Integration
print('mouse1 finished')

m2_eeg1_b = applyBandpassFilter(m2_eeg1_a)
m2_eeg2_b = applyBandpassFilter(m2_eeg2_a)
m2_emg_b = applyIntegration(m2_emg_a) # Integration
print('mouse2 finished')

m3_eeg1_b = applyBandpassFilter(m3_eeg1_a)
m3_eeg2_b = applyBandpassFilter(m3_eeg2_a)
m3_emg_b = applyIntegration(m3_emg_a) # Integration
print('mouse3 finished')

mouse1 finished
mouse2 finished
mouse3 finished


#### Step c): Take Logarithm and standartize

In [11]:
from sklearn import preprocessing

m1_eeg1_c = preprocessing.scale(np.log(np.array(m1_eeg1_b)))
m1_eeg2_c = preprocessing.scale(np.log(np.array(m1_eeg2_b)))
m1_emg_c = preprocessing.scale(np.log(np.array(m1_emg_b)))
print('mouse1 finished')

m2_eeg1_c = preprocessing.scale(np.log(np.array(m2_eeg1_b)))
m2_eeg2_c = preprocessing.scale(np.log(np.array(m2_eeg2_b)))
m2_emg_c = preprocessing.scale(np.log(np.array(m2_emg_b)))
print('mouse2 finished')

m3_eeg1_c = preprocessing.scale(np.log(np.array(m3_eeg1_b)))
m3_eeg2_c = preprocessing.scale(np.log(np.array(m3_eeg2_b)))
m3_emg_c = preprocessing.scale(np.log(np.array(m3_emg_b)))
print('mouse3 finished')

assert m1_eeg1_c.shape == (691200, 24)

mouse1 finished
mouse2 finished
mouse3 finished


#### Bring it into Training shape

In [12]:
def padding2(df):
    df_end = np.insert(df,0,df[0:32],axis=0)
    df_end = np.insert(df_end,0,df[0:32],axis=0)
    df_end = np.append(df_end, df[-32:],axis=0)
    df_end = np.append(df_end, df[-32:],axis=0)

    return df_end

In [13]:
def createTrainSamples(df):
    i = 0
    df_end = []
    while i <= (691328 - 5*32):
        df_end.append(df[i:i+32*5])
        i += 32
    return df_end

In [14]:
m1_eeg1_end = np.array(createTrainSamples(padding2(m1_eeg1_c)))
m1_eeg2_end = np.array(createTrainSamples(padding2(m1_eeg2_c)))
m1_emg_end = np.array(createTrainSamples(padding2(m1_emg_c)))
print('mouse1 finished')

m2_eeg1_end = np.array(createTrainSamples(padding2(m2_eeg1_c)))
m2_eeg2_end = np.array(createTrainSamples(padding2(m2_eeg2_c)))
m2_emg_end = np.array(createTrainSamples(padding2(m2_emg_c)))
print('mouse2 finished')

m3_eeg1_end = np.array(createTrainSamples(padding2(m3_eeg1_c)))
m3_eeg2_end = np.array(createTrainSamples(padding2(m3_eeg2_c)))
m3_emg_end = np.array(createTrainSamples(padding2(m3_emg_c)))
print('mouse3 finished')

mouse1 finished
mouse2 finished
mouse3 finished


In [15]:
m1_end = np.stack([m1_eeg1_end,m1_eeg2_end,m1_emg_end], axis=1)
m2_end = np.stack([m2_eeg1_end,m2_eeg2_end,m2_emg_end], axis=1)
m3_end = np.stack([m3_eeg1_end,m3_eeg2_end,m3_emg_end], axis=1)

In [84]:
mice_train = np.concatenate([m1_end,m2_end],axis=0)
mice_val = m3_end

assert mice_train.shape == (43200,3,160,24)
assert mice_val.shape == (21600,3,160,24)

In [85]:
labels_train = np.concatenate([m1_labels, m2_labels], axis=0)
labels_val = m3_labels.to_numpy()

In [86]:
labels_train = labels_train-1
labels_val = labels_val-1

## Preprocessing (Test)

In [34]:
# Split into 3 mice
stop1 = 21600
stop2 = 43200

m4_eeg1 = test_eeg1[0:stop1]
m4_eeg2 = test_eeg2[0:stop1]
m4_emg = test_emg[0:stop1]

m5_eeg1 = test_eeg1[stop1:stop2]
m5_eeg2 = test_eeg2[stop1:stop2]
m5_emg = test_emg[stop1:stop2]

#### Padding

In [35]:
m4_eeg1 = padding(m4_eeg1)
m4_eeg2 = padding(m4_eeg2)
m4_emg = padding(m4_emg)

m5_eeg1 = padding(m5_eeg1)
m5_eeg2 = padding(m5_eeg2)
m5_emg = padding(m5_emg)

assert m4_eeg1.shape == (21602,512)
assert m5_eeg1.shape == (21602,512)

#### Step a): Fourier Transformation with Hamming windows

In [36]:
m4_eeg1_a = slidingWindow(m4_eeg1)
m4_eeg2_a = slidingWindow(m4_eeg2)
m4_emg_a = slidingWindow(m4_emg)

m5_eeg1_a = slidingWindow(m5_eeg1)
m5_eeg2_a = slidingWindow(m5_eeg2)
m5_emg_a = slidingWindow(m5_emg)

100%|██████████| 691200/691200 [00:09<00:00, 71197.93it/s]
100%|██████████| 691200/691200 [00:09<00:00, 71740.42it/s]
100%|██████████| 691200/691200 [00:09<00:00, 71929.43it/s]
100%|██████████| 691200/691200 [00:09<00:00, 72524.93it/s]
100%|██████████| 691200/691200 [00:09<00:00, 72014.38it/s]
100%|██████████| 691200/691200 [00:09<00:00, 71729.21it/s]


#### Step b): Apply Bandpass Filtering or Integration

In [37]:
m4_eeg1_b = applyBandpassFilter(m4_eeg1_a)
m4_eeg2_b = applyBandpassFilter(m4_eeg2_a)
m4_emg_b = applyIntegration(m4_emg_a) # Integration
print('mouse4 finished')

m5_eeg1_b = applyBandpassFilter(m5_eeg1_a)
m5_eeg2_b = applyBandpassFilter(m5_eeg2_a)
m5_emg_b = applyIntegration(m5_emg_a) # Integration
print('mouse5 finished')

mouse4 finished
mouse5 finished


#### Step c): Take Logarithm and standartize

In [38]:
from sklearn import preprocessing

m4_eeg1_c = preprocessing.scale(np.log(np.array(m4_eeg1_b)))
m4_eeg2_c = preprocessing.scale(np.log(np.array(m4_eeg2_b)))
m4_emg_c = preprocessing.scale(np.log(np.array(m4_emg_b)))
print('mouse4 finished')

m5_eeg1_c = preprocessing.scale(np.log(np.array(m5_eeg1_b)))
m5_eeg2_c = preprocessing.scale(np.log(np.array(m5_eeg2_b)))
m5_emg_c = preprocessing.scale(np.log(np.array(m5_emg_b)))
print('mouse5 finished')

assert m4_eeg1_c.shape == (691200, 24)

mouse4 finished
mouse5 finished


#### Bring it into Training shape

In [39]:
m4_eeg1_end = np.array(createTrainSamples(padding2(m4_eeg1_c)))
m4_eeg2_end = np.array(createTrainSamples(padding2(m4_eeg2_c)))
m4_emg_end = np.array(createTrainSamples(padding2(m4_emg_c)))
print('mouse4 finished')

m5_eeg1_end = np.array(createTrainSamples(padding2(m5_eeg1_c)))
m5_eeg2_end = np.array(createTrainSamples(padding2(m5_eeg2_c)))
m5_emg_end = np.array(createTrainSamples(padding2(m5_emg_c)))
print('mouse5 finished')

mouse4 finished
mouse5 finished


In [87]:
m4_end = np.stack([m4_eeg1_end,m4_eeg2_end,m4_emg_end], axis=1)
m5_end = np.stack([m5_eeg1_end,m5_eeg2_end,m5_emg_end], axis=1)

In [88]:
mice_test = np.concatenate([m4_end,m5_end],axis=0)
assert mice_test.shape == (43200,3,160,24)

## NNet

In [89]:
import torch
from torch.nn import functional as F
from torch import nn
import pytorch_lightning as pl
from pytorch_lightning.core.lightning import LightningModule
from torch.utils.data import Dataset, DataLoader, random_split


import pandas as pd
import torch
import pdb

from torch.nn import functional as F
from torch import nn
import pytorch_lightning as pl
from pytorch_lightning.core.lightning import LightningModule
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif,mutual_info_classif
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import matplotlib.cm as cm
from sklearn import preprocessing
from sklearn.metrics import balanced_accuracy_score

from pytorch_lightning.metrics.functional.classification import stat_scores_multiple_classes as ssmc
from pytorch_lightning.metrics.functional.classification import multiclass_roc as mc
from pytorch_lightning.metrics.functional import confusion_matrix

In [90]:
class TrainDataset(Dataset):
    def __init__(self):
        permutation = torch.randperm(43200)
        self.X_train = torch.from_numpy(mice_train).float()
        self.X_train = self.X_train[permutation]
        self.y_train = torch.from_numpy(labels_train)
        self.y_train = self.y_train[permutation]
        self.n_samples = self.X_train.shape[0]

    def __getitem__(self, index):
        return self.X_train[index], self.y_train[index]
        
    def __len__(self):
        return self.n_samples
    
class ValidationDataset(Dataset):
    def __init__(self):
        permutation = torch.randperm(21600)
        self.X_val = torch.from_numpy(mice_val).float()
        self.X_val = self.X_val[permutation]
        self.y_val = torch.from_numpy(labels_val)
        self.y_val = self.y_val[permutation]
        self.n_samples = self.X_val.shape[0]     
    
    def __getitem__(self, index):
        return self.X_val[index], self.y_val[index]
        
    def __len__(self):
        return self.n_samples
    
class TestDataset(Dataset):
    def __init__(self):
        self.X_test = torch.from_numpy(mice_test).float()
        self.n_samples = self.X_test.shape[0]     
    
    def __getitem__(self, index):
        return self.X_test[index]
        
    def __len__(self):
        return self.n_samples

In [91]:
from sklearn.metrics import balanced_accuracy_score

class NiciNet(LightningModule):
    def __init__(self):
        super().__init__()
        self.losses = []
        self.accs = []
        self.maxPool1 = nn.MaxPool2d((2, 3), stride=(2, 3))
        self.CNN = nn.Conv2d(3, 50, (3, 3), stride=(1, 1))
        self.maxPool2 = nn.MaxPool2d((2, 2), stride=(2, 2))
        self.dense1 = nn.Linear(50*39*3,1000)
        self.dropout1 = nn.Dropout(0.5)
        self.dense2 = nn.Linear(1000,1000)
        self.dropout2 = nn.Dropout(0.5)
        self.dense3 = nn.Linear(1000,3)
        self.dropout3 = nn.Dropout(0.5)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        
        '''
        (B,3,160,24)
        '''
        B = x.shape[0]
        assert x.shape == (B,3,160,24)
        x = self.maxPool1(x)
        assert x.shape == (B,3,80,8)
        
        x = self.CNN(x)
        x = F.relu(x)
        assert x.shape == (B,50,78,6)
        
        x = self.maxPool2(x)
        assert x.shape == (B,50,39,3)
        
        x = x.view(-1,50*39*3)
        x = self.dense1(x)
        x = F.relu(x)
        x = self.dropout1(x)
        x = self.dense2(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.dense3(x)
        x = self.dropout3(x)
        
        x = self.softmax(x)
        return x
        
    def prepare_data(self):
        self.trainDataset = TrainDataset()
        self.validationDataset = ValidationDataset()
        self.testDataset = TestDataset() 
        
    def train_dataloader(self):
        return DataLoader(self.trainDataset, batch_size=100, num_workers=7)
        
    def val_dataloader(self):
        return DataLoader(self.validationDataset, batch_size=21600)
    
    def test_dataloader(self):
        return DataLoader(self.testDataset,batch_size=1)
                         
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=5e-5)
        return optimizer
    
    def myloss(self, logits, labels):
        labels = labels.squeeze(dim=1)
                
        l0 = 0
        l1 = 0
        l2 = 0
        
        for l in labels:
            if (l == 0):
                l0 += 1
            if (l == 1):
                l1 += 1
            if (l == 2):
                l2 += 1
                
        weight0 = 0 
        weight1 = 0
        weight2 = 0
        
        if (l0 == 0):
            weight0 = 0
        else:
            weight0 = 1/l0
        if (l1 == 0):
            weight1 = 0
        else:
            weight1 = 1/l1
        if (l2 == 0):
            weight2 = 0
        else:
            weight2 = 1/l2
                
        lossfn = nn.NLLLoss(weight=torch.tensor([weight0,weight1,weight2]).cuda())        
        loss = lossfn(logits,labels)
        
        return loss
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = self.myloss(logits, y)
        
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}
        
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = self.myloss(logits, y)

        # Calcualte BMAC
        winners = torch.argmax(logits, dim=1).cpu()
        print(balanced_accuracy_score(y.cpu(),winners))
        self.accs.append(balanced_accuracy_score(y.cpu(),winners))
        self.losses.append(loss.item())
        
        tensorboard_logs = {'val_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}
    
    def validation_epoch_end(self, logs):
        avg = np.mean(np.array(self.losses))
        self.losses = []
        self.accs = []
    
  

In [93]:
model = NiciNet()
trainer = pl.Trainer(gpus=1,max_epochs=6,check_val_every_n_epoch=2)
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type       | Params
----------------------------------------
0 | maxPool1 | MaxPool2d  | 0     
1 | CNN      | Conv2d     | 1.4 K 
2 | maxPool2 | MaxPool2d  | 0     
3 | dense1   | Linear     | 5.9 M 
4 | dropout1 | Dropout    | 0     
5 | dense2   | Linear     | 1.0 M 
6 | dropout2 | Dropout    | 0     
7 | dense3   | Linear     | 3.0 K 
8 | dropout3 | Dropout    | 0     
9 | softmax  | LogSoftmax | 0     


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

0.2315800669045244


HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

0.9515024410268463


HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

0.9517424633473156


HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

0.9522616506682868



1

## Predicting Test

In [None]:
model.eval()

In [None]:
mice_test.shape

In [None]:
preds = model(torch.from_numpy(mice_test).float())

In [None]:
winners = torch.argmax(preds, dim=1).cpu()

In [None]:
winners = np.array(winners)

In [None]:
a = 0
b = 0
c = 0

for i in range(0,43200):
    if winners[i] == 0:
        a += 1
    if winners[i] == 1:
        b += 1
    if winners[i] == 2:
        c += 1

print(a)
print(b)
print(c)

In [None]:
## Change to 1,2,3 from 0,1,2 (class labels)
winners += 1

In [None]:
y_handin = pd.read_csv('sample.csv')
y_handin['y'] = winners
y_handin.to_csv(r'y_test.csv', index = False)

## Post Processing

#### Accuracy on m3 before postprocessing


In [94]:
model.eval()

NiciNet(
  (maxPool1): MaxPool2d(kernel_size=(2, 3), stride=(2, 3), padding=0, dilation=1, ceil_mode=False)
  (CNN): Conv2d(3, 50, kernel_size=(3, 3), stride=(1, 1))
  (maxPool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (dense1): Linear(in_features=5850, out_features=1000, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (dense2): Linear(in_features=1000, out_features=1000, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (dense3): Linear(in_features=1000, out_features=3, bias=True)
  (dropout3): Dropout(p=0.5, inplace=False)
  (softmax): LogSoftmax(dim=1)
)

In [95]:
my_train_labels = m3_labels - 1
my_train_data = m3_end

In [96]:
preds = model(torch.from_numpy(my_train_data).float())

In [97]:
winners = np.array(torch.argmax(preds, dim=1).cpu())

In [98]:
print(balanced_accuracy_score(my_train_labels,winners))

0.9522616506682868


#### Trainingset MoNet on M2

In [99]:
pred_train = model(torch.from_numpy(m3_end).float())
my_labels_train = (m3_labels[5:21595] - 1).to_numpy()

In [100]:
pred_train = pred_train.detach().numpy()

In [101]:
trainingset = []

for i in range(5,21595):
    sample = []
    for j in range(i-5,i+6):
        sample.append(pred_train[j][0])
        sample.append(pred_train[j][1])
        sample.append(pred_train[j][2])
    trainingset.append(sample)
        
trainingset = np.array(trainingset)

In [102]:
assert trainingset.shape[0] == my_labels_train.shape[0]

#### Validationset MoNet on M3

In [55]:
my_val_labels = (m3_labels[5:21595] - 1).to_numpy()
my_val_data = m3_end

In [56]:
pred_val = model(torch.from_numpy(my_val_data).float())
pred_val = pred_val.detach().numpy()

In [57]:
validationset = []
for i in range(5,21595):
    sample = []
    for j in range(i-5,i+6):
        sample.append(pred_val[j][0])
        sample.append(pred_val[j][1])
        sample.append(pred_val[j][2])
    validationset.append(sample)
        
validationset = np.array(validationset)

#### Training MoNet

In [103]:
class TrainDataset(Dataset):
    def __init__(self):
        permutation = torch.randperm(21590)
        self.X_train = torch.from_numpy(trainingset).float()
        self.X_train = self.X_train[permutation]
        self.y_train = torch.from_numpy(my_labels_train)
        self.y_train = self.y_train[permutation]
        self.n_samples = self.X_train.shape[0]

    def __getitem__(self, index):
        return self.X_train[index], self.y_train[index]
        
    def __len__(self):
        return self.n_samples
    
class ValidationDataset(Dataset):
    def __init__(self):
        permutation = torch.randperm(21590)
        self.X_val = torch.from_numpy(validationset).float()
        self.X_val = self.X_val[permutation]
        self.y_val = torch.from_numpy(my_val_labels)
        self.y_val = self.y_val[permutation]
        self.n_samples = self.X_val.shape[0]     
    
    def __getitem__(self, index):
        return self.X_val[index], self.y_val[index]
        
    def __len__(self):
        return self.n_samples

In [104]:
from sklearn.metrics import balanced_accuracy_score

class MoNet(LightningModule):
    def __init__(self):
        super().__init__()
        self.losses = []
        self.accs = []

        self.dense1 = nn.Linear(33,10)
        self.dropout1 = nn.Dropout(0.5)
        self.dense2 = nn.Linear(10,3)
        self.dropout2 = nn.Dropout(0.5)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        
        '''
        (B,3,160,24)
        '''
        x = self.dense1(x)
        x = F.relu(x)
        x = self.dense2(x)
        x = F.relu(x)
        
        x = self.softmax(x)
        return x
        
    def prepare_data(self):
        self.trainDataset = TrainDataset()
        self.validationDataset = ValidationDataset()
        self.testDataset = TestDataset() 
        
    def train_dataloader(self):
        return DataLoader(self.trainDataset, batch_size=100, num_workers=7)
        
    def val_dataloader(self):
        return DataLoader(self.validationDataset, batch_size=21590)
    
    def test_dataloader(self):
        return DataLoader(self.testDataset,batch_size=1)
                         
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    
    def myloss(self, logits, labels):
        labels = labels.squeeze(dim=1)
                
        l0 = 0
        l1 = 0
        l2 = 0
        
        for l in labels:
            if (l == 0):
                l0 += 1
            if (l == 1):
                l1 += 1
            if (l == 2):
                l2 += 1
                
        weight0 = 0 
        weight1 = 0
        weight2 = 0
        
        if (l0 == 0):
            weight0 = 0
        else:
            weight0 = 1/l0
        if (l1 == 0):
            weight1 = 0
        else:
            weight1 = 1/l1
        if (l2 == 0):
            weight2 = 0
        else:
            weight2 = 1/l2
                
        lossfn = nn.NLLLoss(weight=torch.tensor([weight0,weight1,weight2]).cuda())        
        loss = lossfn(logits,labels)
        
        return loss
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = self.myloss(logits, y)
        
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}
        
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = self.myloss(logits, y)

        # Calcualte BMAC
        winners = torch.argmax(logits, dim=1).cpu()
        print(balanced_accuracy_score(y.cpu(),winners))
        self.accs.append(balanced_accuracy_score(y.cpu(),winners))
        self.losses.append(loss.item())
        
        tensorboard_logs = {'val_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}
    
    def validation_epoch_end(self, logs):
        avg = np.mean(np.array(self.losses))
        self.losses = []
        self.accs = []
    
  

In [105]:
model2 = MoNet()
trainer = pl.Trainer(gpus=1,max_epochs=4,check_val_every_n_epoch=2)
trainer.fit(model2)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type       | Params
----------------------------------------
0 | dense1   | Linear     | 340   
1 | dropout1 | Dropout    | 0     
2 | dense2   | Linear     | 33    
3 | dropout2 | Dropout    | 0     
4 | softmax  | LogSoftmax | 0     


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

0.29000298217958503


HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

0.9684128226372745


HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

0.9791799162234848



1

#### Return on m3 to see improvement?

In [106]:
model2.eval()

MoNet(
  (dense1): Linear(in_features=33, out_features=10, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (dense2): Linear(in_features=10, out_features=3, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (softmax): LogSoftmax(dim=1)
)

In [107]:
preds = model2(torch.from_numpy(validationset).float())

In [108]:
winners = np.array(torch.argmax(preds, dim=1).cpu())

In [109]:
print(balanced_accuracy_score(my_val_labels,winners))

0.9791459995229014


## Apply Postprocessing on Testset

In [110]:
model.eval()

NiciNet(
  (maxPool1): MaxPool2d(kernel_size=(2, 3), stride=(2, 3), padding=0, dilation=1, ceil_mode=False)
  (CNN): Conv2d(3, 50, kernel_size=(3, 3), stride=(1, 1))
  (maxPool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (dense1): Linear(in_features=5850, out_features=1000, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (dense2): Linear(in_features=1000, out_features=1000, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (dense3): Linear(in_features=1000, out_features=3, bias=True)
  (dropout3): Dropout(p=0.5, inplace=False)
  (softmax): LogSoftmax(dim=1)
)

In [111]:
preds_m4 = model(torch.from_numpy(m4_end).float())

In [112]:
preds_m4 = preds_m4.detach().numpy()

In [113]:
model.eval()

NiciNet(
  (maxPool1): MaxPool2d(kernel_size=(2, 3), stride=(2, 3), padding=0, dilation=1, ceil_mode=False)
  (CNN): Conv2d(3, 50, kernel_size=(3, 3), stride=(1, 1))
  (maxPool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (dense1): Linear(in_features=5850, out_features=1000, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (dense2): Linear(in_features=1000, out_features=1000, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (dense3): Linear(in_features=1000, out_features=3, bias=True)
  (dropout3): Dropout(p=0.5, inplace=False)
  (softmax): LogSoftmax(dim=1)
)

In [114]:
preds_m5 = model(torch.from_numpy(m5_end).float())

In [115]:
preds_m5 = preds_m5.detach().numpy()

#### For samples (5,21595) do postprocessing

In [116]:
testdataset1 = []

for i in range(5,21595):
    sample = []
    for j in range(i-5,i+6):
        sample.append(preds_m4[j][0])
        sample.append(preds_m4[j][1])
        sample.append(preds_m4[j][2])
    testdataset1.append(sample)
        
testdataset1 = np.array(testdataset1)

In [117]:
testdataset2 = []

for i in range(5,21595):
    sample = []
    for j in range(i-5,i+6):
        sample.append(preds_m5[j][0])
        sample.append(preds_m5[j][1])
        sample.append(preds_m5[j][2])
    testdataset2.append(sample)
        
testdataset2 = np.array(testdataset2)

In [118]:
model2.eval()

MoNet(
  (dense1): Linear(in_features=33, out_features=10, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (dense2): Linear(in_features=10, out_features=3, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (softmax): LogSoftmax(dim=1)
)

In [119]:
preds_test1 = model2(torch.from_numpy(testdataset1).float())

In [120]:
preds_test2 = model2(torch.from_numpy(testdataset2).float())

#### Replace postprocessed samples

In [121]:
preds_m4

array([[-2.3841855e-07, -1.5570528e+01, -1.6095476e+01],
       [ 0.0000000e+00, -1.6804090e+01, -1.7417183e+01],
       [-3.5762781e-07, -1.5415998e+01, -1.6527508e+01],
       ...,
       [-3.4952323e+00, -2.0757461e-01, -1.8508370e+00],
       [-2.8224123e+00, -3.6785606e-01, -1.3930334e+00],
       [-2.7304614e+00, -2.4597836e-01, -1.8781569e+00]], dtype=float32)

In [122]:
winners1 = np.argmax(preds_m4, axis=1)
winners1[5:21595] = np.array(torch.argmax(preds_test1, dim=1).cpu())

In [123]:
winners2 = np.argmax(preds_m5, axis=1)
winners2[5:21595] = np.array(torch.argmax(preds_test2, dim=1).cpu())

In [134]:
finalpred = np.concatenate([winners1,winners2])

In [135]:
finalpred += 1

In [136]:
a = 0
b = 0
c = 0

for i in range(0,43200):
    if finalpred[i] == 1:
        a += 1
    if finalpred[i] == 2:
        b += 1
    if finalpred[i] == 3:
        c += 1

print(a)
print(b)
print(c)

18846
22154
2200


In [137]:
y_handin = pd.read_csv('sample.csv')
y_handin['y'] = finalpred
y_handin.to_csv(r'y_test.csv', index = False)

## Post-Post Processing

In [148]:
finalpred = pd.read_csv('y_test (1).csv')
finalpred = finalpred['y'].to_numpy()

In [149]:
## Eliminate loners
k = 0
for i in range (3, finalpred.shape[0]-3):
    if finalpred[i-1] == finalpred[i+1] and finalpred[i] != finalpred[i+1]:
        finalpred[i] = finalpred[i+1]
        k+=1

In [150]:
y_handin = pd.read_csv('sample.csv')
y_handin['y'] = finalpred
y_handin.to_csv(r'y_test.csv', index = False)