# Let the challenge begin

**Notes on data** 

- 5 EEG derivations sampled at 250Hz
- 3 Accelerometers derivations sampled at 50Hz
- Sleep epoch = 30 sec
- hypnogram = succession of the sleep stages (0...5)

**General info sleep**
- Sleep stages = (N1, N2) = light sleep, N3 = deep sleep, REM
- Low frequency power: N3 > N2 > N1-REM-Wake

**Wake**
- During Wake epoch alpha waves are clearly visible on the F-O derivation
- Movement occured mainly during wake periods, noisy signals during movement
- Alpha wave frequency ranges between 8 and 13 hertz = wake, relaxed
**N1**
- Theta waves freq betw 4 and 8 Hz = N1, N2

**N2**
- On N2 epoch, power in the spindle range is much higher on frontal-frontal channels
- Theta waves freq betw 4 and 8 Hz = N1, N2
- During N2, sleep spindles (fast rythm between 12-14Hz which last between 0.5 up to 2 seconds) are more visible on the Frontal-frontal derivation

**N3**
- On N3 epoch, we can see more power in the low frequencies
- Delta waves freq betw 1 and 4 Hz = N3

**REM**
- REM sleep distinguishable with steady EEG and eyes movement which can be seen when looking at Frontal-occipital vs frontal-frontal derivation.
- The EEG power increases in the low-frequency band when the sleep stage change from REM to NREM sleep stages
- REM epoch have more steady EEG

**Formulas**
- Spectrogram are the time-frequency matrix z = P(t, f)
- Spectrum correspond to the curves y = P(frequency)
- Average Spectrum can therefore be computed as the mean of spectromgram over a specified period 

**Links**
https://opentext.wsu.edu/psych105/chapter/stages-of-sleep/
https://www.sleepfoundation.org/how-sleep-works/alpha-waves-and-sleep
https://centralesupelec.edunao.com/pluginfile.php/242107/course/section/36663/Challenge%20Data%20Dreem-1.pdf
https://centralesupelec.edunao.com/pluginfile.php/242107/course/section/36663/entropy-18-00272.pdf



In [None]:
from sklearnex import patch_sklearn
patch_sklearn()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, confusion_matrix
import matplotlib.pyplot as plt
import json
import yasa
import os
import numpy as np
import pandas as pd
from scipy.signal import iirfilter, filtfilt
from scipy.signal import welch
from lspopt import spectrogram_lspopt
from scipy.signal import spectrogram
from os import listdir
from random import randint
import random as rd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.ensemble import RandomForestClassifier

In [None]:
plt.imshow(plt.imread('corr_sleep_stages.png'))

In [None]:
eeg = np.load('sample/sample/f7_O2.npy') 
print('EEG duration', eeg.shape[0] / 250)
accelerometer_x = np.load('sample/sample/accelerometer_x.npy') 
print('Accelerometer duration', accelerometer_x.shape[0] / 50)
hypnogram = np.array(json.load(open('sample/sample/hypnogram.json')))
eeg = np.load('sample/sample/f7_O2.npy')
eeg_frontal = np.load('sample/sample/f8_f7.npy')
accelerometer_x = np.load('sample/sample/accelerometer_x.npy')


In [None]:
def get_average_spectrum_for_epochs(eeg,epochs):
    """
    Return the average power in each of the fourier bin for several epochs.
    """
    EEG_FS = 250
    psds = []
    for epoch in epochs:
        idx_start,idx_end = 250 * 30 * epoch,250 * 30 * (epoch + 1)
        freqs,t,psd = spectrogram_lspopt(np.clip(eeg[idx_start:idx_end],-150,150),250,nperseg = 1000)
        psds += [np.mean(psd ** 2,1)]
    return freqs,np.array(psds).mean(0)

In [None]:
# predictions = []
# true_labels = hypnogram

# # Get your scores
# scores = {}
# scores['balanced_accuracy'] = balanced_accuracy_score(true_labels, predictions)
# scores['cohen_kappa'] = cohen_kappa_score(true_labels, predictions)
# scores['confusion_matrix'] = confusion_matrix(true_labels, predictions)

# print(scores)

In [None]:
## Function to plot N sleep epochs for a specific stage 

freq = 250 
epoch_s = 30

def random_sleep_epoch(N, sleep_stage) :
    k = 0
    a = randint(0,len(hypnogram))
    epochs = []
    while k < N:
        if hypnogram[a] == sleep_stage :
            epochs.append(a)
            k += 1
            a = randint(0,len(hypnogram))
        else :
            a = randint(0,len(hypnogram))
    eeg_ff = np.load('sample/sample/f8_f7.npy')
    for epoch in epochs : 
        t0 = epoch*epoch_s*freq
        eeg_short = eeg_ff[t0:t0+(epoch_s*freq)]
        plt.figure(figsize=(25, 8))
        plt.plot(eeg_short)
        plt.ylim([-200, 200])
        plt.xlim(0,len(eeg_short))
        plt.show()

2: Machine learning models

In [None]:
frequency_bands = {
        "delta": [0.5, 4],
        "theta": [4, 8],
        "alpha": [8, 12],
       "sigma": [12, 16],
       "beta": [16, 30]
    }

EEG_FS = 250
ACC_FS = 50 
epoch_s = 30
n_EEG = 5
n_ACC = 3

In [None]:
def get_relative_spectral_power_for_epoch(data):
    """
    Compute the relative spectral power for each of the frequency bands defined above
    """
    EEG_FS = 250
    psds = []
    sfreqs,t,psd = spectrogram(data,250,nperseg = 1000,noverlap = 750)
    psd = np.mean(np.abs(psd),-1)
    spectral_power_band = {}
    for name, freqband in frequency_bands.items():
        spec_power = psd[:,(sfreqs >= freqband[0]) & (sfreqs < freqband[1])]
        spec_power = np.sum(spec_power, 1)
        spectral_power_band[name] = spec_power / np.sum(psd,1)
        
    return spectral_power_band

In [None]:
statistics = {
    "stdev":lambda x:np.std(x,1),
    "mean":lambda x:np.mean(np.abs(x),1)
}
variable_lists = list(frequency_bands) + list(statistics)

In [None]:
def compute_stats(data):
    """
    Compute the statistics of a signal
    """
    EEG_FS = 250
    result = {k:f(data) for k,f in statistics.items()}
    return result


In [None]:
data_for_records_all = {}
hypnogram_for_records = {}
hypnograms = pd.read_csv('targets_train.csv')
for record in os.listdir("training_records"):
    record_number = int(record[-5])
    x = np.load(f'training_records/{record}')
    EEG = x[:,1:EEG_FS * epoch_s * n_EEG + 1]
    ACC = x[:,EEG_FS * epoch_s * n_EEG + 1:]
    EEG = EEG.reshape(len(x), n_EEG, EEG_FS * epoch_s)
    ACC = ACC.reshape(len(x), n_ACC, ACC_FS * epoch_s)

    for i in range (n_EEG) :
        sfreqs,t,psd = spectrogram(EEG[:,i,:], EEG_FS, nperseg = 1000,noverlap = 750)
        psd = np.mean(np.abs(psd),-1)
        spectral_power_band = []
        l = []
        for name, freqband in frequency_bands.items():
            spec_power = psd[:,(sfreqs >= freqband[0]) & (sfreqs < freqband[1])]
            spec_power = np.sum(spec_power, 1)
            l.append(spec_power / np.sum(psd,1))
        matrice = np.array(l) 
        matrice = np.vstack((matrice, np.array([np.mean(EEG[k,i,:]) for k in range (len(EEG))]).T))
        matrice = np.vstack((matrice, np.array([np.std(EEG[k,i,:]) for k in range (len(EEG))]).T))

        if i == 0:
            complete_array = matrice 
        else :
            complete_array = np.vstack((complete_array,matrice))
    
    for i in range (n_ACC) : 
        sfreqs,t,psd = spectrogram(ACC[:,i,:], ACC_FS, nperseg = 1000,noverlap = 750)
        psd = np.mean(np.abs(psd),-1)
        spectral_power_band = []
        l = []
        for name, freqband in frequency_bands.items():
            spec_power = psd[:,(sfreqs >= freqband[0]) & (sfreqs < freqband[1])]
            spec_power = np.sum(spec_power, 1)
            l.append(spec_power / np.sum(psd,1))
        matrice = np.array(l)
        matrice = np.vstack((matrice, np.array([np.mean(ACC[k,i,:]) for k in range (len(ACC))]).T))
        matrice = np.vstack((matrice, np.array([np.std(ACC[k,i,:]) for k in range (len(ACC))]).T))
        complete_array = np.vstack((complete_array, matrice))
    data_for_records_all[record] = complete_array.T
    hypnogram_for_records[record] = list(hypnograms[hypnograms['record'] == record_number]['target'])
    
variable_lists = (list(frequency_bands) + list(statistics)) 

In [None]:
variable_list = []
stats = 'mean', 'std'
for n in range (n_EEG) :
    for freq in list(frequency_bands) : 
        variable_list.append(f"EEG_{n+1}_{freq}")
    for stat in stats :
        variable_list.append(f"EEG_{n+1}_{stat}")
for n in range (n_ACC) : 
    for freq in list(frequency_bands) : 
        variable_list.append(f"ACC_{n+1}_{freq}")
    for stat in stats :
        variable_list.append(f"ACC_{n+1}_{stat}")

In [None]:
print(len(variable_list))
print(len(data_for_records_all['record_0.npy'][0]))

In [None]:
def correlations(record, data, n) :
    corr = [0]*n
    for i in range (n) :
        corr[i] = [0]*n
        for j in range (n) :
            corr[i][j] = np.corrcoef(data[record,i,:], data[record,j,:])[0][1]
    return corr 

# EEG
corr = correlations(0, EEG, n_EEG)
for i in range (n_EEG) :
    print(i+1,corr[i])

# ACC
corr = correlations(0, ACC, n_ACC)
for i in range (n_ACC) :
    print(i+1,corr[i])


TODO: 
- include all EEG / accelerometer? channels
    - for EEG: do spectral analysis for each channel
    - think about what to do with accelerometer channels 

change variable names

https://github.com/Kaggle/kaggle-api

In [None]:
# Create train and cross-validation partitions
rd.seed(2134)
records_list = list(data_for_records_all)
rd.shuffle(records_list)
training_record,test_records = records_list[:4],records_list[4:]

print('Training records: ',training_record)
print('Test records: ', test_records)

VERY LATER: take training data and test data already cut  / for now keep this division

In [None]:
def build_dataset(records, data_for_records_all,hypnogram_for_records):
    X,y = [],[]
    for record in records : 
        y.extend(hypnogram_for_records[record])
        X.extend(data_for_records_all[record])
    return X,y

X_train,y_train = build_dataset(training_record,data_for_records_all,hypnogram_for_records)
X_test,y_test = build_dataset(test_records,data_for_records_all,hypnogram_for_records)

In [None]:
print(len(X_train), len(y_train))

# Random forest classifier

In [None]:
# run decision tree
clf_rf = RandomForestClassifier(random_state=42)
print('training...')
clf_rf.fit(X_train, y_train)

# test it
predictions = clf_rf.predict(X_test)
scores = {'balanced_accuracy': balanced_accuracy_score(y_test, predictions),
            'cohen_kappa': cohen_kappa_score(y_test, predictions),
            'macro_f1': f1_score(y_test, predictions,average ='macro')}

print(scores)
plot_confusion_matrix(clf_rf, X_test,y_test,display_labels = ['Wake','N1','N2','N3','REM'])
plt.show()

In [None]:
# import sklearn

# metrics = "balanced_accuracy","f1_macro"

# sklearn.model_selection.cross_validate(clf_rf, X_train, y_train, scoring=metrics)

# clf_rf

### Evaluate importance of each parameter: Permutation importance

In [None]:
print('permutations...')
from sklearn.inspection import permutation_importance
# We use the build-in sklearn fontion to compute the permutation importance
result = permutation_importance(clf_rf, X_test, y_test, n_repeats=50, random_state=0,scoring = 'f1_macro')
sorted_idx = result.importances_mean.argsort()

# And plot the importance of each variable
fig, ax = plt.subplots(figsize=(25, 10))
ax.boxplot(result.importances[sorted_idx].T,
           vert=False, labels=variable_list)
ax.set_title("Permutation Importances (Test set)")
plt.xlabel('Decrease in Macro-F1')
fig.tight_layout()
plt.show()

In [None]:
f1 = []
for k in range (len(variable_list)) : 
    X_train_k = X_train.copy()
    X_train_k = np.delete(X_train_k, k, 1)
    X_test_k = X_test.copy()
    X_test_k = np.delete(X_test_k, k, 1)
    clf_rf.fit(X_train_k, y_train)
    predictions = clf_rf.predict(X_test_k)
    f1.append(f1_score(y_test, predictions,average ='macro'))


In [None]:
plt.plot(f1)
f1_order = np.argsort(np.array(f1))
f1_sorted = [f1[i] for i in f1_order]
print(f1_order)
print(f1_sorted)
variables_sorted = [variable_list[i] for i in f1_order]
print(variables_sorted)

In [None]:
plt.plot(f1)
f1_order = np.argsort(np.array(f1))
f1_sorted = [f1[i] for i in f1_order]
print(f1_order)
print(f1_sorted)
variables_sorted = [variable_list[i] for i in f1_order]
print(variables_sorted)

Spectral components have a very marginal effect on classification -> to be improved

Todo: implement cross validation and compare with no cross validation
check if CV useful with random 
MAJOR CHANGE

TODO: plot test error for CV/test set

# DEEP LEARNING OH YEAH

## (temporary) load single channel data

In [None]:
data_for_records = {}
hypnogram_for_records = {}
hypnograms = pd.read_csv('targets_train.csv')
# load training records
for record in os.listdir("training_records"):
    record_number = int(record[-5])
    x = np.load(f'training_records/{record}')
    # data_for_records[record] = x[:,1:250 * 30 + 1]
    data_for_records[record] = x
    hypnogram_for_records[record] = list(hypnograms[hypnograms['record'] == record_number]['target'])

In [None]:
#### use actual training examplars


In [None]:
x.shape

In [None]:
rd.seed(1234)
records_list = list(data_for_records)
rd.shuffle(records_list)
training_records,test_records = records_list[:5],records_list[5:]

print('Training records: ',training_records)
print('Test records: ', test_records)

TO BE CHANGED TO DEFINE AS TEST SET THE TEST SAMPLES 

In [None]:
def build_dataset(records, data_for_records,hypnogram_for_records):
    X,y = [],[]
    for record in records:
        X.append(data_for_records[record])
        y.extend(hypnogram_for_records[record])

    return np.concatenate(X),y


X_train,y_train = build_dataset(training_records,data_for_records,hypnogram_for_records)
X_test,y_test = build_dataset(test_records,data_for_records,hypnogram_for_records)



define load functions 

In [None]:
""" Load project data
    DataLoader and Dataset for single-channel EEG

"""

import torch
from torch.utils.data import Dataset, DataLoader


def normalize_data(eeg_array):
    """normalize signal between 0 and 1"""

    normalized_array = np.clip(eeg_array, -250, 250)
    normalized_array = normalized_array / 250

    return normalized_array


class EegEpochDataset(Dataset):
    """EEG Epochs dataset."""

    def __init__(self, x_data, y_data, transform=None):
        """
        Args:
            x_data (numpy array): Numpy array of input data.
            y_data (list of numpy array): Sleep Stages
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.y_data = y_data
        self.x_data = x_data
        self.transform = transform

        self.x_data = normalize_data(x_data)

    def __len__(self):
        return len(self.y_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        signal = np.expand_dims(self.x_data[idx], axis=0)
        stage = self.y_data[idx]

        if self.transform:
            signal = self.transform(signal)

        return signal, stage


training_dataset = EegEpochDataset(X_train,y_train)
training_dataloader = DataLoader(training_dataset,batch_size = 32)
# validation_dataset = EegEpochDataset(X_test,y_test)
# validation_dataloader = DataLoader(validation_dataset,batch_size = 32)
test_dataset = EegEpochDataset(X_test,y_test)
test_dataloader = DataLoader(test_dataset,batch_size = 32)

First CNN model
+ max pooling

In [None]:
# import torch
# import torch.nn as nn


# class SingleChannelConvNet(nn.Module):

#     def __init__(self):
#         super(SingleChannelConvNet, self).__init__()
#         self.conv_a = nn.Conv1d(1, 8, 25, stride=5)
#         self.conv_b = nn.Conv1d(8, 16, 10, stride=5)
#         self.conv_c = nn.Conv1d(16, 32, 10, stride=5)
#         self.conv_d = nn.Conv1d(32, 64, 10, stride=5)

#         self.relu = nn.ReLU()

#         self.fc1 = nn.Linear(64, 5)

#     def forward(self, x):
#         x = self.conv_a(x)
#         print(x.shape)
#         x = self.relu(x)
#         # x = self.relu(self.conv_a(x))
#         x = self.relu(self.conv_b(x))
#         x = self.relu(self.conv_c(x))
#         x = self.relu(self.conv_d(x))
#         x = x.max(-1)[0]
#         x = self.fc1(x)

#         return x


In [None]:
X_train.shape

In [None]:
import torch
import torch.nn as nn


class SingleChannelConvNet(nn.Module):

    def __init__(self):
        super(SingleChannelConvNet, self).__init__()
        self.conv_a = nn.Conv1d(1, 8, 25, stride=5)
        self.batchnorm_a = nn.BatchNorm1d(8, eps=0.001, momentum=0.99)
        self.conv_b = nn.Conv1d(8, 32, 10, stride=3)
        self.batchnorm_b = nn.BatchNorm1d(32, eps=0.001, momentum=0.99)
        self.conv_c = nn.Conv1d(32, 64, 10, stride=3)
        self.batchnorm_c= nn.BatchNorm1d(64, eps=0.001, momentum=0.99)
        self.conv_d = nn.Conv1d(64, 128, 10, stride=2)
        self.batchnorm_d= nn.BatchNorm1d(128, eps=0.001, momentum=0.99)
        self.conv_e = nn.Conv1d(128, 256, 10, stride=1)
        self.batchnorm_e= nn.BatchNorm1d(256, eps=0.001, momentum=0.99)
       

        self.max_pool = nn.MaxPool1d(1, 1)
        # Size of a layer after convolution : (-W - F + 2P)/S +1 
        # / size of conv a after max pool : (8 - 2)/1 +1 = 7
        # self.avg_pool=torch.nn.AvgPool1d(kernel_size=256)
    
        

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

        self.fc1 = nn.Linear(256, 5)

    def forward(self, x):

        x = self.batchnorm_a(self.relu(self.conv_a(x)))
        x = self.batchnorm_b(self.relu(self.conv_b(x)))
        x = self.batchnorm_c(self.relu(self.conv_c(x)))
        x = self.batchnorm_d(self.relu(self.conv_d(x)))
        x = self.batchnorm_e(self.relu(self.conv_e(x)))
        x = x.max(-1)[0]
        x = self.fc1(x)
        # x = self.avg_pool(x)
        # x = self.max_pool(x)
        # print(x.shape)

        
        # print(x.shape)

        return x

In [None]:
#  X_train.shape : (4286, 42001)
# shape after conv a : torch.Size([32, 8, 8396])


training

* implement early stopping to avoid overfitting

In [None]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [None]:
import torch.optim as optim
from skorch import NeuralNetClassifier

from sklearn.model_selection import KFold

In [None]:
import torch.optim as optim
from skorch import NeuralNetClassifier

from sklearn.model_selection import KFold



# device: use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# parameters
n_epoch = 50
learning_rate = 1e-3

min_validation_loss = np.inf


# neural network and co
my_net = SingleChannelConvNet()
my_net = my_net.to(device) # model into GPU
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(my_net.parameters())
my_net.train()
print('training...')

#KFold setting
KCV = KFold(n_splits=6) #shuffle=True, random_state=15011999
Kidx = KCV.get_n_splits(X_train)

for fold, (train_index, val_index) in enumerate(KCV.split(X_train)):
    print('------------fold no---------{}----------------------'.format(fold))

    early_stopper = EarlyStopper(patience=6, min_delta=0.05)

    train_subsampler = torch.utils.data.SubsetRandomSampler(train_index)
    val_subsampler = torch.utils.data.SubsetRandomSampler(val_index)

    training_dataloader = DataLoader(training_dataset, sampler=train_subsampler, batch_size = 32)
    validation_dataloader = DataLoader(training_dataset, sampler=val_subsampler, batch_size = 32)

    
    for epoch in range(n_epoch):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(training_dataloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device).float(), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = my_net.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
        print('epoch %d, %d samples, loss: %.3f' % (epoch + 1, (i+1)*training_dataloader.batch_size,running_loss / (i+1)), end = ", ")
        
        running_loss = 0.0

        #ADD CROSSVALIDATION
        my_net.eval()
        validation_loss = 0.0

        for i, data in enumerate(validation_dataloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device).float(), labels.to(device)

            # forward
            outputs = my_net.forward(inputs)
            loss = criterion(outputs, labels)
            
            validation_loss += loss.item()

            # print statistics
        print('validation loss: %.3f' % (validation_loss / (i+1)))

        if validation_loss < min_validation_loss:
            min_validation_loss = validation_loss
            print('new minimal validation error')
            torch.save(my_net.state_dict(), 'my_net_opti')

        
        if early_stopper.early_stop(validation_loss):    
            print('aie at epoch', epoch)         
            break
            

print('Finished Training')

https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch

In [None]:
# IMPLEMENTING ADABOOST CNN https://www.sciencedirect.com/science/article/abs/pii/S0925231220304379

In [None]:
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, confusion_matrix
from sklearn.metrics import plot_confusion_matrix, f1_score
# params
classes = ['Wake', 'N1', 'N2', 'N3', 'REM']

# Load saved parameters dictionnary
my_net.load_state_dict(torch.load('my_net_opti'))

with torch.no_grad():
    prediction_list = torch.empty(0).to(device)
    true_list = torch.empty(0).to(device)
    for data in test_dataloader:
        inputs, labels = data
        inputs, labels = inputs.to(device).float(), labels.to(device)
        
        outputs = my_net(inputs)
        _, predicted = torch.max(outputs, 1)
        prediction_list = torch.cat([prediction_list, predicted])
        true_list = torch.cat([true_list, labels])

true_list = true_list.cpu().numpy()
prediction_list = prediction_list.cpu().numpy()
scores = {'balanced_accuracy': balanced_accuracy_score(true_list, prediction_list),
            'macro_f1': f1_score(true_list, prediction_list, average = 'macro'),
            'confusion_matrix': confusion_matrix(true_list, prediction_list)}

for elt in scores:
    print(elt)
    print(scores[elt])

* implement k-fold cross validation
* implement Ensemble with 1 CNN for each EEG/accelerometer channel (raw data)

Implement weighted average ensemble:
* weight the prediction of each CNN according to its validation accuracy

In [None]:
new_net = NeuralNetClassifier(SingleChannelConvNet)
new_net.initialize()  # This is important!
new_net.load_params(f_params='my_net_opti')
# X_train = X_train.astype(np.float32)
# y_train = np.array(y_train, dtype='int64')
for i, data in enumerate(training_dataloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data
    # inputs, labels = np.array(inputs), np.array(labels, dtype=int)
    inputs, labels = inputs.to(device).float(), labels.to(device)
    new_net.fit(inputs,labels)

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import ParameterGrid, GridSearchCV
# n_CNN = n_EEG
# n_CNN = 1

# weights_grid = ParameterGrid({'weights': [[np.linspace(0,1,9)] for i in range(n_CNN)]})
# weights_grid = ParameterGrid({'weights': [[0, 1]]})
weights_grid = {'weights': [[0.5, 1], [1, 0.5], [1, 1]]}


In [None]:
[[np.linspace(0,1,9)] for i in range(n_CNN)]
[[0, 1]]

In [None]:
# vc = VotingClassifier(estimators=[('cnn', NeuralNetClassifier(SingleChannelConvNet().load_state_dict(torch.load('my_net_opti')))), ('rf',RandomForestClassifier())],
#                    voting='soft',n_jobs=-1)
vc = VotingClassifier(estimators=[('cnn', new_net), ('rf',RandomForestClassifier())],
                   voting='soft',n_jobs=-1)

grid_Search = GridSearchCV(param_grid = weights_grid, estimator=vc, refit=False, scoring='f1_macro', verbose=2)
new_net.set_params(train_split=False, verbose=0)

# for i, data in enumerate(validation_dataloader, 0):
#     # get the inputs; data is a list of [inputs, labels]
#     inputs, labels = data
#     inputs, labels = np.array(inputs), np.array(labels, dtype=int)
#     # inputs, labels = inputs.to(device).float(), labels.to(device)
grid_Search.fit(X_train, y_train)

# print(grid_Search.best_Score_)



In [None]:
def your_function(number):
    print(number)

from sklearn.model_selection import ParameterGrid
param_grid = {'param1': [1, 2, 3]}

grid = ParameterGrid(param_grid)

for params in grid:
    your_function(params['param1'])

In [None]:
RepeatedStratifiedKFold?

In [None]:
https://machinelearningmastery.com/stacking-ensemble-machine-learning-with-python/

# Submitting to the contest

In [None]:
predictions = []
for record in os.listdir("test_records"):
    record_number = int(record[-5])
    # x = np.load(f'test_records/{record}')
    # raw_data_for_record = x[:,1:250 * 30 + 1]
    # data_for_record = get_relative_spectral_power_for_epoch(raw_data_for_record)
    # data_for_record.update(compute_stats(raw_data_for_record))
    # data_for_record = np.array([value for value in data_for_record.values()]).T
    # preds = pipeline.predict(data_for_record)
    for i, pred in enumerate(prediction_list):
        predictions.append({"identifier":record_number * 10000 + i,'target':int(prediction_list[i])})

predictions = pd.DataFrame(predictions)
print(predictions)
predictions.to_csv('submission.csv',index = None)