In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext
import random
import math
import time
import torch.nn.functional as F

from tqdm.notebook import tqdm
from torch import optim
from torch.utils.data import DataLoader, Dataset,  WeightedRandomSampler
import numpy as np
from  scipy import stats
import scipy
np.random.seed(1)
torch.manual_seed(1)
random.seed(1)

In [2]:
import os
os.chdir("/content/drive/MyDrive/MADE/Project/deap")

In [3]:
def get_padding(in_size, kernel_size, stride):
    if (in_size % stride == 0):
        padding = max(kernel_size - stride, 0)
    else:
        padding = max(kernel_size - (in_size % stride), 0)
    return (padding)    

In [4]:
def get_feature_extractor():
  conv1 = nn.Conv2d(1, 64, kernel_size = (3, 3), stride=(1, 1), padding='same')
  bn1 = nn.BatchNorm2d(64)
  conv2 = nn.Conv2d(64, 128, kernel_size = (3, 3), stride=(1, 1), padding='same')
  bn2 = nn.BatchNorm2d(128)
  conv3 = nn.Conv2d(128, 256, kernel_size = (3, 3), stride=(1, 1), padding='same')
  bn3 = nn.BatchNorm2d(256)
  result = torch.nn.Sequential(conv1, bn1, conv2, bn2, conv3, bn3)
  return(result)

In [5]:
import glob
import pickle
from sklearn.preprocessing import MinMaxScaler

data = []
labels = []
data_dir = './data_preprocessed_python'
files = glob.glob(os.path.join(data_dir, "*.dat"))
data_raw = []
for file_data in files:
    raw_data = pickle.load(open(file_data, 'rb'), encoding='latin1')
    data.append(raw_data['data'])
    #data_raw.append(raw_data['data'][:, :, :])
    # data_raw[-1][0, :32, 0]
    # print(data_raw[-1][:, :31, :].min())
    # print(data_raw[-1][:, :31, :].max())
    # scaler = MinMaxScaler()
    # for i in range(40):
    #     #scaler.fit(data[-1][i])
    #     scaler = MinMaxScaler()
    #     data[-1][i] = scaler.fit_transform(data[-1][i])
    #     print(data_raw[-1][i])
    #     print(data[-1][i])
    #     break
    labels.append(raw_data['labels'])
    #break


In [6]:
class EmotionNet(torch.nn.Module): 
   def __init__(self, hcanals, wcanals, nfeatures, ntimes_in_sample):
      super().__init__()
      #print("1")
      self.convs = nn.ModuleList([get_feature_extractor() for _ in range(ntimes_in_sample)])
      self.conv2 = nn.Conv2d(256 * ntimes_in_sample, 15, kernel_size = 1, stride=1)
      self.flat = nn.Flatten(1, 3)
      self.fc = nn.Linear(1215, 2)
      
   def forward(self, input):
      #print(f"input_shape = {input.shape}")
      input = input.permute(0, 3, 1, 2)
      #input (bs, time = 128,  h=9, w=9)

      outputs = []
      for i, conv in enumerate(self.convs):
            outputs.append(conv(input[:, i, :, :].unsqueeze(1)))
            #print(outputs[i])

      output = torch.cat(outputs, dim = 1)
      #output (bs, s * 256,  h=9, w=9)
      output1 = self.conv2(output)


      output2 = output1.permute(0, 2, 3, 1)
      output3 = self.flat(output2)
      #output1 (bs, 15,  h=9, w=9)
      return output3

In [7]:
LEN_RECORD_IN_SECONDS = 60
NVIDEOS = 40
HCANALS = 9
WCANALS = 9
NTIMES_IN_SAMPLE = 128
NTIMES_IN_SEC = 128
NCANALS = 32
NFEATURES = 32
electrode_matrix = {}
electrode_matrix['FP1'] = [0, 3]
electrode_matrix['FP2'] = [0, 5]
electrode_matrix['AF3'] = [1, 3]
electrode_matrix['AF4'] = [1, 5]
electrode_matrix['F7']  = [2, 0]
electrode_matrix['F3']  = [2, 2]
electrode_matrix['FZ']  = [2, 4]
electrode_matrix['F4']  = [2, 6]
electrode_matrix['F8']  = [2, 8]
electrode_matrix['FC5']  = [3, 1]
electrode_matrix['FC1']  = [3, 3]
electrode_matrix['FC2']  = [3, 5]
electrode_matrix['FC6']  = [3, 7]
electrode_matrix['T7']  = [4, 0]
electrode_matrix['C3']  = [4, 2]
electrode_matrix['CZ']  = [4, 4]
electrode_matrix['C4']  = [4, 6]
electrode_matrix['T8']  = [4, 8]
electrode_matrix['CP5']  = [5, 1]
electrode_matrix['CP1']  = [5, 3]
electrode_matrix['CP2']  = [5, 5]
electrode_matrix['CP6']  = [5, 7]
electrode_matrix['P7']  = [6, 0]
electrode_matrix['P3']  = [6, 2]
electrode_matrix['PZ']  = [6, 4]
electrode_matrix['P4']  = [6, 6]
electrode_matrix['P8']  = [6, 8]
electrode_matrix['PO3'] = [7, 3]
electrode_matrix['PO4'] = [7, 5]
electrode_matrix['O1'] = [8, 3]
electrode_matrix['OZ'] = [8, 4]
electrode_matrix['O2'] = [8, 5]

list_electrodes = ['FP1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3',	'T7',	'CP5',	'CP1',	'P3',	'P7',	'PO3',	'O1',	'OZ',	'PZ',	'FP2',	'AF4', 'FZ', 'F4', 'F8', 'FC6',	'FC2',	'CZ', 'C4', 'T8', 'CP6',	'CP2',	'P4', 	'P8',	'PO4',	'O2']
data_dir = './data_preprocessed_python'
TRAIN_SIZE = 0.9
THRESHOLD = 5

In [8]:
import glob
import pickle
from collections import Counter

class EmotionDataset(Dataset):
    def __init__ (self, data_dir, type, ind, data, labels):
       self.data = []
       self.labels = []
       self.cnt = [Counter(), Counter(), Counter(),Counter()]
       #data_dir = './data_preprocessed_python'
       #files = glob.glob(os.path.join(data_dir, "*.dat"))[0:1]
       self.type = type
       #split = int(LEN_RECORD_IN_SECONDS)# *  TRAIN_SIZE)
       self.ind = ind
       self.len_files = []
       for s in range(len(data)):
            #print(file_data)
            #raw_data = pickle.load(open(file_data, 'rb'), encoding='latin1')
            #print(raw_data['data'].shape)
            #labels = raw_data['labels']
            self.data.append(data[s][ind, :, 3 * NTIMES_IN_SEC :LEN_RECORD_IN_SECONDS * NTIMES_IN_SEC + 3 * NTIMES_IN_SEC])
            self.len_files.append(len(ind) *  LEN_RECORD_IN_SECONDS - 1)
            self.len_record = LEN_RECORD_IN_SECONDS
            labels_bin_sub = (labels[s] >= THRESHOLD)
            self.labels.append(labels_bin_sub[ind])          
            for i in range(4):
              self.cnt[i].update(list(self.labels[-1][:, i]))    

            
       self.len_cumsum = np.cumsum(self.len_files)     
       print(self.data[0].shape)
       print(self.labels[0].shape)



    def __len__(self):
        result =  sum(self.len_files) - 10
        return result

    def get_index_record(self, item):
      for i_file in range(len(self.len_cumsum)):
         #print(item, self.len_cumsum[i_file])
         if (item > self.len_cumsum[i_file]):
            continue
         else:
            break
      if i_file == 0:
         index_in_file = item
      else:
         index_in_file = item  - self.len_cumsum[i_file - 1]
      nvideo = index_in_file//(self.len_record)# * LEN_RECORD_IN_SECONDS *  NTIMES_IN_SEC)
      nsec = (index_in_file - nvideo * self.len_record) # *   NTIMES_IN_SEC)

      return i_file, index_in_file, nvideo, nsec

    def __getitem__(self, item):
      sample = {}
      #print(item)
      i_file, index_in_file, nvideo, nsec = self.get_index_record(item)
      #print(i_file, nvideo, nsec )
      sample['data'] = np.zeros((HCANALS, WCANALS, NTIMES_IN_SAMPLE))
      sample_from_one_canals = []
      for i_canal in range(NCANALS):
        sample_from_one_canal = self.data[i_file][nvideo, i_canal, nsec * 128 : nsec * 128 + 128]
        sample_from_one_canals.append(sample_from_one_canal)
        #print(sample_from_one_canal.shape)
      sample_from_one_canals = np.asarray(sample_from_one_canals).copy()
      sample_from_one_canals = scipy.stats.zscore(sample_from_one_canals, axis = None)
      for i_canal in range(NCANALS):
          sample['data'][electrode_matrix[list_electrodes[i_canal]][0],  electrode_matrix[list_electrodes[i_canal]][1]] = sample_from_one_canals[i_canal]
      #for i in range(NTIMES_IN_SAMPLE): 
      #    sample['data'][:, :, i] = scipy.stats.zscore(sample['data'][:, :, i])
      sample['data'] = torch.FloatTensor(sample['data'])
      sample['labels']  = torch.LongTensor(self.labels[i_file][nvideo])
      
      return sample

In [9]:
def get_model():
  model = EmotionNet(HCANALS, WCANALS, NFEATURES, NTIMES_IN_SAMPLE).to(device)
  return model


In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = get_model()

In [41]:
# def init_weights(m):
#     for name, param in m.named_parameters():
#         if 'weight' in name:
#             nn.init.normal_(param.data, mean=0, std=0.01)
#         else:
#             nn.init.constant_(param.data, 0)
            
# model.apply(init_weights)

def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

model.apply(initialize_weights)

EmotionNet(
  (convs): ModuleList(
    (0): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=same)
     

In [42]:
files = glob.glob(os.path.join(data_dir, "*.dat"))
files.sort()
files = np.asarray(files)
nfiles = len(files)
koeff1 = 0.99
# koeff2 = 0.05
ind_train = random.sample(range(nfiles), int(nfiles * koeff1))
ind_val = list(set(range(nfiles)) - set(ind_train))
#ind_val = random.sample(ind, int(len(ind) * koeff2))
# ind_test = list(set(ind) - set(ind_val))

In [43]:
print(ind_val)

[26]


In [15]:
class Args:
  def __init__(self): #(data_path, epoch, batch_siz, image_size, learning_rate, weight_deca, learning_rate, learning_rate_gamma, weight_bce, load, output_dir)
    self.data_path = "/content/drive/MyDrive/MADE/semester2/CV/contest02/data/"
    self.epochs = 2
    self.batch_size = 100
    self.lr= 3e-4
    self.weight_decay= 1e-6
    self.learning_rate=None
    self.learning_rate_gamma=None
    self.weight_bce=1
    self.load=None
    self.output_dir="runs/segmentation_baseline"
    self.data_dir ="./data_preprocessed_python/"# "/content/drive/MyDrive/MADE/Project/train/physionet.org/"
args = Args()    

In [16]:
type_emotion = 0

In [17]:
from sklearn.model_selection import StratifiedKFold 
from sklearn.metrics import f1_score, accuracy_score
k  = 5
labels_bin = []
for i in range(32):
  temp = labels[i] > 4.5
  #print(labels[i])
  #print(temp)
  labels_bin.append(temp)
X = np.arange(40)
y = np.array(labels_bin[0][:, type_emotion])
skf = StratifiedKFold(n_splits=k, random_state=None, shuffle=True)
balanced_split = skf.split(X, y)
for ind_train, ind_test in  balanced_split:
    print(ind_train, ind_test)
    print(sum(labels_bin[0][ind_train, type_emotion]))
    print(sum(labels_bin[0][ind_test, type_emotion]))
    break

[ 1  2  3  5  6  7  8  9 10 12 13 14 15 16 18 20 21 22 23 24 25 26 28 30
 31 32 33 35 36 37 38 39] [ 0  4 11 17 19 27 29 34]
16
4


In [18]:

train_dataset = EmotionDataset(args.data_dir, 'train', ind_train, data, labels)

class_weights_all = [1/train_dataset.cnt[0][i] for i in range(2)]
weights_samples =  [0] * train_dataset.__len__()
for i in range(train_dataset.__len__()):
    i_file, index_in_file, nvideo, nsec = train_dataset.get_index_record(i)
    #print(train_dataset.labels[i_file][nvideo])
    weights_samples[i] = class_weights_all[int(train_dataset.labels[i_file][nvideo, 0])]

weighted_sampler = WeightedRandomSampler(
    weights=weights_samples,
    num_samples=len(weights_samples),
    replacement=True
)
train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=1,
                              pin_memory=True, shuffle=False, drop_last=True, sampler=weighted_sampler)


val_dataset = EmotionDataset(args.data_dir, 'val', ind_test, data, labels)
val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=1,
                              pin_memory=True, shuffle=False, drop_last=False)

(32, 40, 7680)
(32, 4)
(8, 40, 7680)
(8, 4)


In [48]:
# train_dataset = EmotionDataset(files[ind_train])
# train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=1,
#                               pin_memory=True, shuffle=True, drop_last=True)


# val_dataset = EmotionDataset(files[ind_val])
# val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=1,
#                               pin_memory=True, shuffle=False, drop_last=False)

In [49]:
criterion = nn.CrossEntropyLoss(reduce = 'mean')#torch.nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=3e-5, momentum = 0.9)#, weight_decay=args.weight_decay)
optimizer = optim.Adam(model.parameters(), lr=3e-5)#, momentum = 0.9)#, weight_decay=args.weight_decay)



In [50]:
# print(train_dataset.cnt)
# print(val_dataset.cnt)
# print(files[ind_train])
# print(files[ind_val])

In [51]:
def train(model, loader, criterion, optimizer, device, batch = None):
    model.train()
    train_loss = []
    inputs = []
   
    #lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)#, mode='min', factor=0.1, patience=10, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08, verbose=False)
    for batch in tqdm(loader, total=len(loader), desc="training...", position=0 , leave = True):

            optimizer.zero_grad()
            src  = batch['data'].to(device)
            #print(src.shape)
            trg = batch['labels'][:, 0]

            #print(batch)
            #print(trg.shape)
            levels_pred = model(src)  # B x (2 * NUM_PTS)
            #print(levels_pred.shape)
            levels_pred = levels_pred.cpu()

            #usual cross entropy
            #output = levels_pred[:, 1:].reshape(-1, levels_pred.shape[-1])
            #trg1 = trg[:, 1:].reshape(-1)
            loss = criterion(levels_pred, trg) 

            #print("after")
            train_loss.append(loss.item())
            loss.backward()
            optimizer.step()
            #break
    return np.mean(train_loss)#, mid_outputs


In [52]:
def evaluate(model, loader, criterion, device):
    
    model.eval()
    epoch_loss = 0
    history = []
  
    with torch.no_grad():
    
        for s, batch in enumerate(tqdm(loader, total=len(loader), desc="validating...", position=0 , leave = True)):
            src  = batch['data'].to(device)
            #print(src.shape)
            trg = batch['labels'][:, 0]



            levels_pred = model(src)  # B x (2 * NUM_PTS)
            #print(levels_pred.shape)
            levels_pred = levels_pred.cpu()

            
            loss = criterion(levels_pred, trg) 

            epoch_loss += loss.item() 
        
    return epoch_loss / s

In [12]:
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report

def calculate_predictions(model, loader):
    model.eval()
    epoch_loss = 0
    history = []
    real = []
    pred = []
    with torch.no_grad():

        for i, batch in enumerate(tqdm(loader, total=len(loader), desc="predicting...", position=0 , leave = True)):
            src  = batch['data'].to(device)
            #print(src.shape)
            trg = batch['labels'][:, 0]
           

            levels_pred = model(src)  # B x (2 * NUM_PTS)
            levels_pred = levels_pred.cpu()
            #print(levels_pred.shape)
            trg_pred = levels_pred.argmax(1)
            
            real.extend(trg)
            pred.extend(trg_pred) 

            
        print(accuracy_score(real, pred)) 
        print(confusion_matrix(real, pred))  
        print(classification_report(real, pred))   
        #plt.hist(real)

In [55]:
args.epochs = 20000
#criterion =  fnn.mse_loss
train_loss_min = 10000
val_loss_min = 10000
#batch = next(iter(train_dataloader))
for epoch in range(args.epochs):
    #logger.info(f"Starting epoch {epoch + 1}/{args.epochs}.")
    
    train_loss = train(model, train_dataloader, criterion, optimizer ,device)
    #if epoch % 500 == 0:
    print(train_loss)

    if (train_loss < train_loss_min):
        train_loss_min      = train_loss
        torch.save({
                         'model_state_dict': model.state_dict(),
                         'optimizer_state_dict': optimizer.state_dict(),
                       },
                       os.path.join("/content/drive/MyDrive/MADE/Project/CNN_models/", "train.tgz")
            )  

    val_loss = evaluate(model, val_dataloader, criterion, device)
    # #break
    print(val_loss)
    #break

    # #calculate_predictions(model, val_dataloader)
    if (val_loss < val_loss_min):
         val_loss_min      = val_loss
         torch.save({'model_state_dict': model.state_dict(),    'optimizer_state_dict': optimizer.state_dict(),}, os.path.join("/content/drive/MyDrive/MADE/Project/CNN_models/", f"val.tgz"))

training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7710948858720055


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7532008827901354


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7498610008015718


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.76551458531735


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7363210552862768


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7384427489798053


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7282030616459995


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7290228067659864


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7194830634271145


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7514509028858609


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7162513183536188


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.727243170239567


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7117543025079015


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7277727715330187


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7090901453382233


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7065629028027354


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.704772208779321


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7183699845488555


training...:   0%|          | 0/613 [00:00<?, ?it/s]

0.7018680126204188


validating...:   0%|          | 0/154 [00:00<?, ?it/s]

0.7302298923723058


training...:   0%|          | 0/613 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [19]:
model_state  = torch.load(os.path.join("/content/drive/MyDrive/MADE/Project/CNN_models/", f"val.tgz"))
#model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device)
model.load_state_dict(model_state['model_state_dict'])
calculate_predictions(model, val_dataloader)


predicting...:   0%|          | 0/154 [00:00<?, ?it/s]

0.5338817077947513
[[2285 3282]
 [3858 5893]]
              precision    recall  f1-score   support

           0       0.37      0.41      0.39      5567
           1       0.64      0.60      0.62      9751

    accuracy                           0.53     15318
   macro avg       0.51      0.51      0.51     15318
weighted avg       0.54      0.53      0.54     15318

