In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pre_processing import *

import os
import pickle as pkl

import sklearn
from sklearn.model_selection import train_test_split, GridSearchCV

from scipy.signal import spectrogram

import concurrent.futures

import torch
from torch import sigmoid
import torch.nn.init as init
from torch.nn import Module, Conv2d, Linear, MaxPool2d, AvgPool2d, ReLU, LogSoftmax, Flatten, Dropout, BCELoss
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets
from torchvision import models
from torchsummary import summary
from torch.utils.data import DataLoader, Dataset, random_split
import torch.nn.functional as F
torch.manual_seed(42)
from dataclasses import dataclass

import skorch

from PIL import ImageFile
from skimage.io import imread
from matplotlib import image as mpimg

from bayes_opt import BayesianOptimization

import warnings
warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [25]:
train_data = np.load('E:Work/kcl/Scalogram_ali/25sec_5shift/dic/train/Train_data.npy', mmap_mode='r')
train_labels = np.load('E:Work/kcl/Scalogram_ali/25sec_5shift/dic/train/Train_label.npy', mmap_mode='r')

In [26]:
test_data = np.load('E:Work/kcl/Scalogram_ali/25sec_5shift/dic/test/Test_data.npy', mmap_mode='r')
test_labels = np.load('E:Work/kcl/Scalogram_ali/25sec_5shift/dic/test/Test_label.npy', mmap_mode='r')

In [27]:
val_data = np.load('E:Work/kcl/Scalogram_ali/25sec_5shift/dic/val/Val_data.npy', mmap_mode='r')
val_labels = np.load('E:Work/kcl/Scalogram_ali/25sec_5shift/dic/val/Val_label.npy', mmap_mode='r')

In [28]:
l1_val = 1e-4
class cnn_maker(Module):
    def __init__(self):
        super(cnn_maker, self).__init__()
        self.conv1 = Conv2d(in_channels=30, out_channels=64, kernel_size=2)
        self.pool = MaxPool2d(kernel_size=1, stride=1)
        self.conv2 = Conv2d(in_channels=64, out_channels=128, kernel_size=1)
        self.conv3 = Conv2d(in_channels=128, out_channels=128, kernel_size=1)
        self.fc1 = Linear(128*5174, 128)
        #self.fc2 = Linear(128, 64)
        #self.fc3 = Linear(64, 16)
        self.drop = Dropout(0.25)
        self.fc_f = Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.drop(x)
        x = x.view(-1,128*5174)
        x = F.relu(self.fc1(x))
        #x = F.relu(self.fc2(x))
        #x = F.relu(self.fc3(x))
        
        x = sigmoid(self.fc_f(x))
        return x
    
    def l1_reg(self):
        l1_loss = 0
        for param in self.parameters():
            l1_loss += torch.sum(abs(param))
        return l1_val * l1_loss

In [29]:
from skorch.callbacks import EpochScoring, EarlyStopping
from skorch.helper import predefined_split
from skorch.dataset import Dataset

In [30]:
model = skorch.NeuralNetBinaryClassifier(cnn_maker, criterion = torch.nn.BCELoss, 
                                         optimizer = optim.Adagrad, max_epochs = 50, 
                                         batch_size = 64, device='cuda', 
                                         callbacks=[EpochScoring(scoring='accuracy', name='train_acc'),#, on_train=True), 
                                                    #EpochScoring(scoring='accuracy', name='val_acc', on_train=False), 
                                                    EarlyStopping(monitor="valid_acc", patience=3)],
                                        )
summ = cnn_maker().to(device)

In [31]:
model.fit(train_data.astype(np.float32), train_labels.astype(np.float32))

  epoch    train_acc    train_loss    valid_acc    valid_loss      dur
-------  -----------  ------------  -----------  ------------  -------
      1       [36m0.3827[0m       [32m61.1152[0m       [35m0.3827[0m       [31m61.7336[0m  67.5438
      2       0.3827       61.7336       0.3827       61.7336  7.7117
      3       0.3827       61.7336       0.3827       61.7336  8.5423
Stopping since valid_acc has not improved in the last 3 epochs.


<class 'skorch.classifier.NeuralNetBinaryClassifier'>[initialized](
  module_=cnn_maker(
    (conv1): Conv2d(30, 64, kernel_size=(2, 2), stride=(1, 1))
    (pool): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
    (conv3): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
    (fc1): Linear(in_features=662272, out_features=128, bias=True)
    (drop): Dropout(p=0.25, inplace=False)
    (fc_f): Linear(in_features=128, out_features=1, bias=True)
  ),
)

In [23]:
model.score(test_data.astype(np.float32), test_labels.astype(np.float32))

0.55

In [24]:
model.score(train_data.astype(np.float32), train_labels.astype(np.float32))

0.38266384778012685

In [20]:
train_data.shape

(4730, 30, 5175, 2)

In [25]:
model.score(val_data.astype(np.float32), val_labels.astype(np.float32))

0.6060606060606061

# Post-Processing

In [34]:
preds = model.predict(test_data.astype(np.float32))

In [35]:
preds

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1], dtype=uint8)

In [36]:
test_labels

memmap([1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0.,
        0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1.,
        0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1.,
        1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1.,
        1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0.,
        0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
        0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
        1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1.,
        1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0.,
        0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1.,
        1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0.,
        0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1.,
        1., 1., 1., 0., 0., 0., 0., 0.])

# Post-processing Code

In [62]:
for i in range(len(preds)):
    try:
        past = preds[i-2]
        pres = preds[i]
        fut = preds[i+2]
        if past == fut:
            if pres != past:
                preds[i] = past
        else:
            continue
    except:
        continue

In [63]:
preds

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

# Should fix this by maybe splitting the channels, running code and then going over both channels for duration and getting mean of both

In [100]:
preds_1 = [preds[i] for i in range(0, len(preds), 2)]
preds_2 = [preds[i] for i in range(1, len(preds), 2)]

In [101]:
preds_1_old = preds_1.copy()

In [102]:
for i in range(len(preds_1)):
    try:
        past = preds_1[i-1]
        pres = preds_1[i]
        fut = preds_1[i+1]
        tot = past+pres+fut
        if (pres != fut) or (pres != past): 
            if (tot > 0) and ((pres - past) > 0):
                if preds_1[i+2] + preds_1[i+3] == 2:
                    continue
                else:
                    preds_1[i] = 0
            elif (tot > 0) and ((pres - past) < 0):
                preds_1[i] = 1
    except:
        continue

In [19]:
for i in range(len(preds_2)):
    try:
        past = preds_2[i-1]
        pres = preds_2[i]
        fut = preds_2[i+1]
        if past == fut:
            if pres != past:
                preds_2[i] = past
        else:
            continue
    except:
        continue

In [103]:
for i, j in zip(preds_1_old, preds_1):
    print(i, j)

0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
1 0
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
0 0
1 1
1 1
1 1
1 1
0 0
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


In [104]:
d = pd.DataFrame({'prediction':preds_1_old, 'flipped':preds_1})
d

Unnamed: 0,prediction,flipped
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
325,1,1
326,1,1
327,1,1
328,1,1


In [5]:
val_labels

memmap([1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1.,
        0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1.,
        1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1.,
        1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1.,
        1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0.,
        0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
        1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1.,
        0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1.,
        1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
        1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.,
        1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.,
        1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.,
        1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.,
        1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 

In [6]:
len(val_labels)

330

# We know there will be 4 patterns here (validation set has four sessions for 1 subject) and each of these patterns are repeated 10 times (one for each channel config). In order to do the post-proc we need to make sure we have the right understanding of how to get this info

In [5]:
def sequence(x):
    seq = []
    for i in range(len(x)):
        try:
            pres = x[i]
            fut = x[i+1]
            seq.append(pres)
            if pres == 0 and fut == 1:
                break
        except:
            continue
    return seq

In [6]:
seq = sequence(val_labels)
seq

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]

In [7]:
val_labels[:10*len(seq)]

memmap([1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1.,
        0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1.,
        1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1.,
        1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1.,
        1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0.,
        0., 1., 1., 1., 1., 1., 1., 1., 0., 0.])

In [8]:
seqs = []
for _ in range(4):
    if _ == 0:
        labs = val_labels
    else:
        labs = labs
    seq = sequence(labs)
    seqs.append(seq)
    labs = labs[10*len(seq):]

In [9]:
seqs

[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
 [1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
 [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]]

# Now need to make the 4 part here more general to the number of subjects and sessions as well as included that information in the flipping

In [21]:
path = 'E:/Work/kcl/Scalogram_ali/25sec_5shift/dic/val'
dat = dict(session = [], subject = [], truth = [])
for i in range(len(os.listdir(path))):
    file = os.listdir(path)[i]
    if file.endswith('npy'):
        continue
    nom = file.split('.')
    sub = nom[0]
    sesh = nom[1]
    dat['session'] += [sesh]*(len(seqs[i])*10)
    dat['subject'] += [sub]*(len(seqs[i])*10)
    dat['truth'] += seqs[i]*10

In [22]:
dat

{'session': ['1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '1',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  '2',
  

In [23]:
for i in range(len(preds)):
    try:
        past = preds[i-2]
        pres = preds[i]
        fut = preds[i+2]
        if past == fut:
            if pres != past:
                preds[i] = past
        else:
            continue
    except:
        continue

NameError: name 'preds' is not defined

# Full code to make dataframe of flipped predicted

In [66]:
DATA_TO_PREDICT = val_data.astype(np.float32)
PATH_OF_DIRECTORY_OF_DATA_TO_PREDICT = "E:/Work/kcl/Scalogram_ali/25sec_5shift/dic/val"
TRUTH_LABELS = val_labels.astype(np.float32)

preds = model.predict(DATA_TO_PREDICT)

path = PATH_OF_DIRECTORY_OF_DATA_TO_PREDICT

dat = dict(truth = [], predicted = [], flipped = [], session = [], subject = [])

for i in range(len(os.listdir(path))):
    file = os.listdir(path)[i]
    if file.endswith('npy'):
        continue
    nom = file.split('.')
    sub = nom[0]
    sesh = nom[1]
    dat['session'] += [sesh]*(len(seqs[i])*10)
    dat['subject'] += [sub]*(len(seqs[i])*10)
    dat['truth'] += seqs[i]*10

flips = []
for i in range(len(preds)):
    if i == 0:
        flips.append(preds[i])
        continue
    elif i == len(preds)-1:
        flips.append(preds[-1])
        continue
        
    past = preds[i-1]
    pres = preds[i]
    fut = preds[i+1]
    if (past == fut) and (pres != past):
            flips.append(past)
    else:
        flips.append(pres)
        
dat['truth'] = list(TRUTH_LABELS)
dat['predicted'] = preds
dat['flipped'] = flips
df = pd.DataFrame.from_dict(dat)    

In [67]:
df

Unnamed: 0,truth,predicted,flipped,session,subject
0,1.0,1,1,1,P20
1,1.0,1,1,1,P20
2,1.0,1,1,1,P20
3,1.0,1,1,1,P20
4,1.0,1,1,1,P20
...,...,...,...,...,...
325,0.0,1,1,4,P20
326,0.0,1,1,4,P20
327,0.0,1,1,4,P20
328,0.0,1,1,4,P20
