#### • Chargement des différentes librairies

In [1]:
### • Chargement des différentes librairies

import sys, os, math, time

sys.path.insert(0, os.path.dirname(os.getcwd()))

from src.thot.sesh import *
from src.thot.catch_features import *

# from scipy.fft import fft
import pywt, librosa            # type: ignore

import matplotlib.pyplot as plt
# import seaborn as sns

from sklearn import model_selection, preprocessing as sk_p      # type: ignore

# Supervised learning
from sklearn import ensemble, svm, neighbors, linear_model      # type: ignore
# Unsupervised learning
from sklearn import cluster

# from sklearn.svm import SVC
from sklearn.pipeline import Pipeline                           # type: ignore
from sklearn.preprocessing import StandardScaler, RobustScaler  # type: ignore
from sklearn.decomposition import PCA                           # type: ignore

# from sklearn.linear_model import LogisticRegression
# from sklearn.ensemble import VotingClassifier, GradientBoostingClassifier

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_validate # type: ignore
from sklearn.metrics import classification_report               # type: ignore

from keras.models import Sequential                             # type: ignore
from keras.callbacks import EarlyStopping                       # type: ignore
from keras.layers import GlobalAveragePooling1D, MaxPooling1D   # type: ignore
from keras.layers import Dense, Dropout, Conv1D, LSTM, Flatten  # type: ignore
from keras.layers import LeakyReLU, ReLU, PReLU, ConvLSTM1D
# from keras.layers import Bidirectional, TimeDistributed, RepeatVector, Flatten

# import tensorflow as tf
# from tensorflow.keras.optimizers import AdamW, Adam            # type: ignore

# from pyriemann.spatialfilters import CSP

%matplotlib inline

#### • Déclaration de constantes

In [2]:
### • Déclaration de constantes

# Fréquence d'échantillonnage - Hz (Nombre de valeur / sec)
SAMPLE_RATE  = 250
# Temps additionel pour étendre le domaines d'étude.
LAG : int    = 0 # SAMPLE_RATE >> 2 # -62     # Décalage du signal dû signal ~250ms
#
PW2 : int    = int(np.floor(np.log2(SAMPLE_RATE))) # 2 << SAMPLE_RATE // 32
#
NFFT : int   = 1 << PW2
# Epoque en sec donnée en nombre d'échantillon consectutif # 4" de données (multiple de 2)
SCOPE : int  = SAMPLE_RATE >> 0 # << 1 # (1 << PW2) * 4
# Deux enregistrements bipolaires + neutre
eeg_Chans    = ['C3', 'C4', 'Cz']
# Liste des cannaux eeg associés aux évènement 0 et 1
eeg_left     = [f'{c}_0' for c in eeg_Chans]
eeg_right    = [f'{c}_1' for c in eeg_Chans]
full_eeg     = eeg_left + eeg_right
# Trois enregistrements musculaires
ecg_Chans    = ['EOG:ch01', 'EOG:ch02', 'EOG:ch03']
# Liste de tous les cannaux des dataframes
all_chans    = eeg_Chans + ecg_Chans
# Correspondance pour la classification
hands_event  = {0: 'Left', 1: 'Right'}
# Nombre dévènement à prédire
num_events   = range(len(hands_event))
# Les bandes de fréquences d'intérêt
eeg_bands    = {'Delta' : (0.1, 4.0),
                'Theta' : (4.1, 8.0),
                'Alpha' : (8.1, 14.0),
                'Beta'  : (14.1, 30.0),
                'Gamma' : (30.1, (SAMPLE_RATE >> 1) - 1),}
# Coefficients pour filtres Butterworth numérique d'ordre N pour le filtrage passe-bande
bands_coeff   = {band : butter_bandpass(low, high, SAMPLE_RATE) for band, (low, high) in eeg_bands.items()}
# Largeur de bande retenue pour étude de cas
band_interest = butter_bandpass(1e-3, eeg_bands['Alpha'][1], SAMPLE_RATE)

LAG, SCOPE #, *band_interest

(0, 250)

In [3]:
# %time
def split_and_merge(datas : list[Board], labels : list[Board] | None, Channels : Clause,
                    events : int | Index, level : bool = True,
                    merge : bool = False) -> tuple[Board, Index, Index] :
    parts = []        #
    temp  = [[], []]  # Les époques pour tous les cannaux et tous les évènements.
    spots = [[], []]  # Apparitions des évènements
    
    # Pour la standardisation du nombre d'échantillon max conservé
    loop = [len(x['EventType']) for x in labels]
    ceil = [min(loop)] * len(datas) if level else loop

    if type(events) == int : events = range(events)

    # Extraction des données relavitives à l'apparition des évènements.
    for i in range(len(datas)) :
        input = datas[i]
        types = labels[i]['EventType'][: ceil[i]]
        sites = np.where(input['EventStart'] == 1)[: ceil[i]]

        parts.append(zero_removal(input[Channels[0]], 75))

        for j in events :
            spots[j].append(np.array(*sites)[*np.where(types == j)])

            room = event_epochs(spots[j][-1], SCOPE, LAG)

            temp[j].append([full_event(input[c], room, merge) for c in Channels])

    del loop, ceil

    # Regroupement des données en fonction du type de l'évènement et du cannal d'observation
    if merge :
        n    = len(Channels)
        temp = [[[np.append([], T[j :: n]) for T in temp[i]] for j in range(n)]
                for i in events]
    else :
        pool = [[[], [], []], [[], [], []]]
        
        [[[[pool[i][j].append(x) for x in A] for j, A in enumerate(T)] for T in temp[i]]
         for i in events]
        
        temp = pool
    
    eras = [pd.DataFrame({**dict(zip(Channels, [pd.Series(X) for X in temp[i]])), 'EventType': i})
            for i in events]
    
    del pool, temp, room, types
    
    gc.collect()

    return eras, spots, parts

def simple_struct(data : Board, event : int, col : str) -> Board :
    room = data[data[col] == event]
    b, a = bands_coeff['Delta']
    # b, a = band_interest[0], band_interest[1]

    vals = [normalized(bandpass_filter(v4 - v3, b, a)) for v3, v4 in zip(room['C4'], room['C3'])]
    # vals = [bandpass_filter(v4  - v3, b, a)
    #         for v3, v4 in zip(room['C4'], room['C3'])]
    # vals = [normalized(v4  - v3) for v3, v4 in zip(room['C4'], room['C3'])]
    # vals = [v4  - v3 for v3, v4 in zip(room['C4'], room['C3'])]
    
    # _df = pd.DataFrame([*np.subtract(room['C4'], room['C3'])])
    # _df = pd.DataFrame([normalized(v4) - normalized(v3) for v3, v4 in zip(room['C4'], room['C3'])])
    _df = pd.DataFrame(vals)

    _df[col] = event
    
    return _df

#### • Try catch_22

def titre(txt : str, size : int) -> str :
    n   = len(txt)
    avt = ((size - n) >> 1) - 1

    return f"{'-' * avt} {txt.upper()} {'-' * (size - (avt + n + 1))}"

def catch(data : Board, col : str, channels : Clause, event : int,
            norm : bool = False) -> Board :
    func = [np.min, np.max, np.median]
    #, np.mean func = [np.min, np.max, np.std, np.var, np.mean, np.median]
    # func = np.append(func, catch_)
    room = data[data[col] == event]
    name = [f.__name__ for f in func]
    # head = [f"{f}_diff" for f in name]
    head = [f"{c}_{f}" for c in channels for f in name] \
         + [f"{f}_diff" for f in name]
    
    if norm :
        temp = [[normalized(x) for x in room[c]] for c in channels]
        c3, c4 = [[[f(v) for v in s] for f in func] for s in temp]
    else :
        c3, c4 = [[[f(v) for v in room[c]] for f in func] for c in channels]
    
    sub = np.subtract(c3, c4)
    _df = pd.DataFrame(np.array((*c3, *c4, *sub)).T, columns = head)
    # _df = pd.DataFrame(np.array(sub).T, columns = head)

    print(np.array((np.stack([c3, c4], axis= 1), *sub)))

    # display(_df)
    
    _df[col] = event
    
    return _df

def pool_(data : Board, event : int) -> Board :
    return catch(data, 'EventType', eeg_Chans[: -1], event, norm = True)

#### • Acquisition des données d'entrainement

In [4]:
### • Acquisition des données d'entrainement

target   = "../data/data.zip"
size     = len('train/')
files    = [x[size :] for x in files_in_zip(target, directory = 'train')]
# files    = [f for f in files if f not in files[:: -3]]
# files    = files[:: -3]

# Acquisition des fichiers du répertoir dans le fichier zip
entrants  = csv_in_zip(target, directory = 'train', files = files)
label_tmp = csv_in_zip(target, directory = 'y_train_only', files = files)

#### • Acquisition des données de test

In [None]:
### • Acquisition des données de test

test_csv = csv_in_zip(target, directory = 'test')

#### • Pré-traitement des données

In [5]:
n_files = len(files)
size    = range(n_files)
files   = np.array(files)
unic    = len(np.unique([x[2] for x in files]))
step    = n_files // unic
offre   = -1

match offre :
    case 1 | 2 :
        n_test    = np.array([1, 3]) if offre == 2 else \
                    single_draw(1, unic, math.ceil(.2 * unic))
        test_pos  = [range(i, i + step) for i in (n_test - 1) * step]
        test_pos  = np.append([], test_pos).astype(int)
        train_pos = [i for i in size if i not in test_pos]

        print(*n_test, '\n')
    case 3 :
        n_test    = np.random.randint(1, unic) - 1
        i         = n_test * step
        test_pos  = range(i, i + step)
        train_pos = [i for i in size[:: -3] if i not in test_pos]
        test_pos  = test_pos[: step - 1]
    case 4 :
        train_pos, test_pos, _, _ = train_test_split(size, size, test_size = .2, random_state = 42)
    case _ :
        train_pos = np.flip(size[:: -3])
        test_pos  = [i for i in size if i not in train_pos]

# -------------------- Train --------------------
train_files = files[train_pos]
train_csv   = [entrants[i] for i in train_pos]
train_label = [label_tmp[i] for i in train_pos]
# --------------------- Test --------------------
test_files  = files[test_pos]
test_csv    = [entrants[i] for i in test_pos]
test_label  = [label_tmp[i] for i in test_pos]

print("Fichiers d'entrainements :\n ", *train_files)
print()
print("Fichiers tests :\n ", *test_files)
print()

### • Pré-traitement des données

train_runs, train_spots, train_parts = split_and_merge(train_csv, train_label, eeg_Chans, num_events)
test_runs, test_spots, test_parts    = split_and_merge(test_csv, test_label, eeg_Chans, num_events)

df_trains = pd.concat(train_runs, ignore_index = True)
df_test   = pd.concat(test_runs, ignore_index = True)

Fichiers d'entrainements :
  B0103T.csv B0203T.csv B0303T.csv B0403T.csv B0503T.csv B0603T.csv B0703T.csv B0803T.csv B0903T.csv

Fichiers tests :
  B0101T.csv B0102T.csv B0201T.csv B0202T.csv B0301T.csv B0302T.csv B0401T.csv B0402T.csv B0501T.csv B0502T.csv B0601T.csv B0602T.csv B0701T.csv B0702T.csv B0801T.csv B0802T.csv B0901T.csv B0902T.csv



In [6]:
X_train = pd.concat((simple_struct(df_trains, i, 'EventType') for i in num_events), ignore_index = True)
X_test  = pd.concat((simple_struct(df_test, i, 'EventType') for i in num_events), ignore_index = True)

# X_train = pd.concat((pool_(df_trains, i) for i in num_events), ignore_index = True)
# X_test  = pd.concat((pool_(df_test, i) for i in num_events), ignore_index = True)

# Pour éviter les biais d'apprentissage
X_train = X_train.sample(frac = 1)
X_test  = X_test.sample(frac = 1)

print(titre("Custome 'train_test_split'", 40))
print(titre('train', 40))
print(X_train.info())
print(titre('test', 40))
print(X_test.info())
print('-' * 40)

y_train = X_train['EventType']
y_test  = X_test ['EventType']

X_train.drop(columns = ['EventType'], inplace = True)
X_test.drop (columns = ['EventType'], inplace = True)

# display(X_train)
# display(X_test)
# print(sum(X_test.isna()))

# X_train, X_test, y_train, y_test = train_test_split(df_trains.drop(columns = ['EventType']),
#                                                     df_trains['EventType'], test_size = .2, random_state = 42)

------ CUSTOME 'TRAIN_TEST_SPLIT' -------
---------------- TRAIN ------------------
<class 'pandas.core.frame.DataFrame'>
Index: 1440 entries, 232 to 651
Columns: 251 entries, 0 to EventType
dtypes: float64(250), int64(1)
memory usage: 2.8 MB
None
----------------- TEST ------------------
<class 'pandas.core.frame.DataFrame'>
Index: 2160 entries, 2026 to 144
Columns: 251 entries, 0 to EventType
dtypes: float64(250), int64(1)
memory usage: 4.2 MB
None
----------------------------------------


#### • Test de classification - Proposition 01

In [7]:
# csp = CSP(nfilter = 2)
# std = RobustScaler()
pca = PCA(SCOPE >> 4)
std = StandardScaler()

# X_train_scaled = std.fit_transform(X_train)
# X_test_scaled  = std.transform(X_test)

# X_train_scaled = pca.fit_transform(X_train)
# X_test_scaled  = pca.transform(X_test) 

# print(*pca.singular_values_.shape)

X_train_scaled = X_train
X_test_scaled  = X_test

#
clf = svm.SVC(gamma = 'scale', kernel = 'poly')
# 
rfc = ensemble.RandomForestClassifier(n_jobs = -1)
#
lrg = linear_model.LogisticRegression(solver = 'newton-cholesky') # 'saga' ''
# #
# knc = neighbors.KNeighborsClassifier() # .RadiusNeighborsRegressor()
# #
# lsg = linear_model.SGDClassifier()
# # 
# gbc = ensemble.GradientBoostingClassifier()
# # 
# nnp = MLPClassifier(solver = 'sgd', learning_rate = 'invscaling')
# # 
# kmn = cluster.KMeans(n_clusters = 2, algorithm='elkan')

for reg in [rfc, clf, lrg] :
    scores : dict = cross_validate(reg, X_train_scaled, y_train, scoring = ['accuracy'])
    
    reg.fit(X_train_scaled, y_train)

    res  = scores['test_accuracy']
    pred = reg.predict(X_test_scaled)
    
    print(f"• {reg} : Accuracy -> {res.mean():.1%} (±{res.std():.2}, max : {res.max():.1%})")
    print(f"\t-> Classification report [ Test-score / : {reg.score(X_test_scaled, y_test):.1%} ]")
    print(classification_report(y_test, pred, digits = 3))
    # print(f"\t-> ● Accuracy score : {accuracy_score(y_test, pred):.1%}")
    display(pd.crosstab(y_test, pred, rownames = ['Vrai'], colnames = ['Prono']))

#ExtraTreesClassifier 
# Voting_clf = VotingClassifier(estimators = [('knn', clf1), ('svm', clf2), ('rf', clf3)], voting = 'hard')
# cv3        = model_selection.KFold(n_splits = 3, random_state = 42, shuffle = True), clf4

# Create a pipeline
# pip = Pipeline([('RFC', rfc), ('SVM', clf)])    # ('CSP', csp), 

# pip.fit(X_train, y_train)

# y_pred = pip.predict(X_test)

# y_train = np.array(y_train)

    # if 'cluster' in type(reg).__name__ :
    #     pred = reg.fit_predict(X_test_scaled)
    # else :
    #     pred = reg.predict(X_test_scaled)


• RandomForestClassifier(n_jobs=-1) : Accuracy -> 70.0% (±0.049, max : 73.6%)
	-> Classification report [ Test-score / : 62.2% ]
              precision    recall  f1-score   support

           0      0.625     0.609     0.617      1080
           1      0.619     0.635     0.627      1080

    accuracy                          0.622      2160
   macro avg      0.622     0.622     0.622      2160
weighted avg      0.622     0.622     0.622      2160



Prono,0,1
Vrai,Unnamed: 1_level_1,Unnamed: 2_level_1
0,658,422
1,394,686


• SVC(kernel='poly') : Accuracy -> 71.9% (±0.032, max : 75.0%)
	-> Classification report [ Test-score / : 62.7% ]
              precision    recall  f1-score   support

           0      0.634     0.600     0.617      1080
           1      0.620     0.654     0.637      1080

    accuracy                          0.627      2160
   macro avg      0.627     0.627     0.627      2160
weighted avg      0.627     0.627     0.627      2160



Prono,0,1
Vrai,Unnamed: 1_level_1,Unnamed: 2_level_1
0,648,432
1,374,706


• LogisticRegression(solver='newton-cholesky') : Accuracy -> 72.8% (±0.024, max : 75.0%)
	-> Classification report [ Test-score / : 64.4% ]
              precision    recall  f1-score   support

           0      0.653     0.616     0.634      1080
           1      0.636     0.672     0.654      1080

    accuracy                          0.644      2160
   macro avg      0.644     0.644     0.644      2160
weighted avg      0.644     0.644     0.644      2160



Prono,0,1
Vrai,Unnamed: 1_level_1,Unnamed: 2_level_1
0,665,415
1,354,726


https://raphaelvallat.com/bandpower.html

In [None]:
notes   = filename(train_files)
headers = [f"{t} . {i + 1}" for i, t in enumerate(notes)]
count   = range(len(train_files))

pd.DataFrame(np.stack(df_trains['C4'] - df_trains['C3'], axis = 0))

df_trains['C4'] @ df_trains['C3'] # Produit scalaire

#### • Test prédiction

In [None]:
### • Visualisation des spectrogrammes / Test

def logMelSpectrogram(data : Vector, rate : int, dt : float = 1e-2) -> Vector :
    tps = 1 << int(np.floor(np.log2(rate * dt)))
    # print(tps)
    # Spectrogramme
    stfts = np.abs(librosa.stft(y = data, n_fft = tps, hop_length = 1 << 2, center = True)).T
    # Filtre de MEL
    liny  = librosa.filters.mel(sr = rate, n_fft = tps + 1, n_mels = stfts.shape[-1]).T
    # Application du filtre au spectrogramme
    mel_  = np.tensordot(stfts, liny, 1)

    return np.log(mel_ + 1e-6)
    
def structure(data : Board | Vector, rate : int, whr : Clause) -> Vector :
    # return np.array([logMelSpectrogram(X, rate, 2) for X in data[whr]])
    # return np.stack([[signal.welch(X, rate)[1] for X in data[c]] for c in whr], axis = 2)
    return np.stack(df_trains['C4'] - df_trains['C3'], axis = 0)
    # return np.stack([[X for X in data[c]] for c in whr], axis = 2)

def img_spectrogram(raw : Vector, rate : int, nfft : int = 1 << 10) -> Vector :
    return librosa.feature.melspectrogram(y = raw, sr = rate, hop_length = 1, 
                            n_fft = nfft, n_mels = 32, fmin = 0, fmax = 20, win_length = 32)

def spectrogram_dep(data : Board, rate : int, channels : Clause, n_row : int = 5, n_col : int = 12) :
    sample = np.random.default_rng().integers(data.shape[0], size = n_row)

    sample.sort()

    plt.figure(figsize = (18, 2 * .48 * n_row))

    pos = 0

    for k in sample :
        for c in channels :
            # x   = normalized(data[c][k])
            raw = img_spectrogram(raw = data[c][k], rate = rate)
            pos += 1
            
            plt.subplot(n_row, n_col, pos)
            plt.title(f"{((pos - 1) // 3) + 1} . {k} - {c}", fontsize = 8)
            librosa.display.specshow(data = 1 - raw, sr = rate, hop_length = 1)
            
            # pos = n_col * (i >> 1) + j
            # f, t, Sxx = signal.spectrogram(x, rate)
            # plt.subplot(n_row, n_col, pos + 4)
            # plt.pcolormesh(t, f, 1 - Sxx, shading = 'gouraud')

    plt.tight_layout()
    plt.show();

def spectrogram(data : Board, rate : int, channels : Clause, n_row : int = 5, n_col : int = 12) :
    sample = np.random.default_rng().integers(data.shape[0], size = n_row)

    sample.sort()

    plt.figure(figsize = (18, 2 * .48 * n_row))

    freq = np.arange(1, rate >> 1)
    pos  = 0
    # extd = np.append([0, 1, 1], freq[-1])
    
    for k in sample :
        for c in channels :
            pos += 1
            x   = normalized(data[c][k])
            coefficients, _ = pywt.cwt(x, scales = freq, wavelet = 'cmor')

            plt.subplot(n_row, n_col, pos)
            plt.imshow(np.abs(coefficients), aspect = 'auto', cmap = 'jet') #, extent = extd
            # plt.colorbar(label="Magnitude")
            # plt.ylabel("Scale")
            # plt.xlabel("Time")
            # plt.title("CWT of a Chirp Signal")
            plt.axis('off')

    plt.show()

def plot_logMelSpectrogram(data, rate) :
    sns.heatmap(np.rot90(logMelSpectrogram(data, rate)), cmap = 'inferno')
    
    # loc, _ = plt.xticks()
    # l      = np.round((loc - loc.min()) * len(data) / fe / loc.max(), 2), vmin = -6

    # plt.xticks(loc, l)
    plt.yticks([])
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Mel)")

In [None]:
coefficients, frequencies = pywt.cwt(train_runs[0]['C3'][0], scales = np.arange(1, SAMPLE_RATE >> 1), wavelet = 'cmor')

1 / frequencies

In [None]:
H = [list(harmonic(x, bands_coeff).values()) for x in [df_trains[c] for c in eeg_Chans]]

np.shape(H), np.shape(np.stack(np.stack(H, axis = 1), axis = 2)), np.shape(H[0][1][0])
# harmonic(trains['C3'][256], bands_coeff).values())), H

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_trains.drop(columns = ['EventType']),
                                                    df_trains['EventType'], test_size = .2, random_state = 42)

# X_train, X_test, y_train, y_test = train_test_split([v.tolist() for v in np.array(trains[eeg_Channels])],
#                                                     trains['EventType'], test_size = .2, random_state = 42)

In [None]:
# train_dataset = np.array(X_train)
# test_dataset  = X_test

# train_dataset = structure(X_train, SAMPLE_RATE, eeg_Chans[:2])
# test_dataset  = structure(X_test, SAMPLE_RATE, eeg_Chans[:2])

# train_dataset = structure(X_train, SAMPLE_RATE, eeg_Chans[: 2])
# test_dataset  = structure(X_test, SAMPLE_RATE, eeg_Chans[: 2])

train_dataset = structure(df_trains.drop(columns = ['EventType']), SAMPLE_RATE, eeg_Chans[: 2])
test_dataset  = structure(df_test.drop(columns = ['EventType']), SAMPLE_RATE, eeg_Chans[: 2])

# print(train_dataset.shape)

In [None]:
#### • Test prédiction

# UNITS     : int   = 100
BATCHSIZE : int   = 32
EPOCH     : int   = 1000
ZERO      : int   = 32
DROPOUT   : float = 1 / 4

OPTIMIZER = 'AdamW'     # adamax, , adafactor, adam, nadam
# kl_divergence mean_squared_logarithmic_error mean_absolute_error
LOSS      = 'sparse_categorical_crossentropy'
ACTIV     = ReLU   # PReLU, LeakyReLU
K_SIZE    = (5)

model = Sequential([
    # - Couche 1 -
    Conv1D(filters = ZERO, kernel_size = K_SIZE, dilation_rate = 2,
           input_shape = train_dataset.shape),
    ACTIV(),
    MaxPooling1D(pool_size = 2, strides = 1),
    Dropout(rate = DROPOUT),
    # - Couche 2 -
    Conv1D(filters = ZERO << 1, kernel_size = K_SIZE, dilation_rate = 2),
    ACTIV(),
    MaxPooling1D(pool_size = 2, strides = 1),
    Dropout(rate = DROPOUT),
    # - Couche 3 -
    Conv1D(filters = ZERO << 2, kernel_size = K_SIZE, dilation_rate = 2),
    ACTIV(),
    MaxPooling1D(pool_size = 2, strides = 1),
    Dropout(rate = DROPOUT),
    # - Flatten layer -
    Flatten(),
    GlobalAveragePooling1D(),
    # - Couches de sortie -
    Dense(units = ZERO << 2),
    ACTIV(),
    Dropout(rate = DROPOUT),
    Dense(units = len(hands_event), activation = 'softmax'), # sigmoid 
])

model.summary()
model.compile(optimizer = OPTIMIZER, loss = LOSS, metrics = ['acc'])

print()

stop    = EarlyStopping(monitor = 'val_accuracy', mode = 'max', verbose = 1, patience = 50)
history = model.fit(train_dataset, y_train, validation_data = (test_dataset, y_test), verbose = 1,
                    batch_size = BATCHSIZE, epochs = EPOCH, callbacks = [stop])

In [None]:
pred = model.predict(test_dataset)

sum([np.where(x > .5)[0][0] for x in pred] == y_test) / len(pred)

In [None]:
history_dict = history.history
loss_values  = history_dict['loss']
acc_values   = history_dict['accuracy']
absc         = range(1, len(loss_values) + 1)

plt.figure(figsize = (12, 4))

plt.subplot(121)
plt.plot(absc, loss_values, label = 'Loss')
plt.plot(absc, acc_values, label = 'Accuracy')
plt.title('Training')
plt.xlabel('Epochs')
plt.legend()

plt.subplot(122)
plt.plot(absc, history_dict['val_loss'], label = 'Loss')
plt.plot(absc, history_dict['val_accuracy'], label = 'Accuracy')
plt.title('Testing')
plt.xlabel('Epochs')
plt.legend()
plt.show();

#### • Visualisation densité spectrale du Signal

In [None]:
### • Visualisation Densité Spectral du Signal

plot_psd(entrants, train_runs, rate = SAMPLE_RATE, Channels = eeg_Chans, titled = headers)

#### • Densité spectrale / échantillon

In [None]:
### • Densité spectral / échantillon

inc   = 40
scp = samples(train_samples, inc)
boolInt = -2

plt.figure(figsize = (15, inc * 1.5))

for i in scp :
    boolInt += 2

    for c in eeg_Chans :
        y = train_runs[0][c][i]
        yest, Pxx_den = signal.welch(y, SAMPLE_RATE)   # , scaling = 'spectrum'
        
        plt.subplot(inc, 4, boolInt + 1)
        plt.semilogy(yest, Pxx_den, label = c)
        plt.title(f"welch - {i + 1}", fontsize = 11)
        plt.grid()

        plt.subplot(inc, 4, boolInt + 2)
        res, _ = plt.psd(y, Fs = SAMPLE_RATE, label = c) # , NFFT = NFFT
        plt.title(f"psd - {i + 1}", fontsize = 11)
        plt.xlabel('')
        plt.ylabel('')
        # plt.legend()

plt.xlabel('frequency [Hz]')
# plt.ylabel('PSD [V**2/Hz]')
plt.tight_layout()
plt.show();

# f, Pxx_den = signal.welch(train_eras[0]['C3'][752], SAMPLE_RATE)

# print(len(Pxx_den))

#### • Visualisation Epoques

In [None]:
### • Visualisation Epoques

for i in range(len(files))[:: 3] :
    plot_signal(entrants[i], train_parts[i], train_spots[0][i], train_spots[1][i], channels = eeg_Chans, # 
                period = SCOPE, lag = LAG, title = headers[i])

#### • Visualisation décomposition des signaux

In [None]:
### • Visualisation Décomposition des signaux

# Test de décomposition des signaux en bandes de fréquences spécifiques compatibles avec les répartitions usuelles
# dans le domaine des EEG ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']

for input, token in zip(train_runs, ['Gauche', 'Droite']) :
    print(f"Exemples - Évènement Discriminé Main {token}")
    plot_wavelets(input, bands_coeff, eeg_Chans, scope = 30, headers = headers)

#### • Visualisation des spectrogrammes (test)

In [None]:
for i, t in zip(num_events, ['Gauche', 'Droite']) :
    print(f"Exemples - Évènement Discriminé Main {t}")
    spectrogram(train_runs[i], SAMPLE_RATE, eeg_Chans, 32)

#### • Test PCA - (Non cloncluant)

In [None]:
### • Test PCA - (Non cloncluant)

nca = SCOPE >> 2
pca = PCA(nca)

_, ax = plt.subplots(nrows = 2, ncols = 3, figsize = (15, 5))

for i, d in enumerate(train_runs) :
    for j, c in enumerate(eeg_Chans) :
        Z = std.fit_transform(list(d[c].to_list())) # 
        principal_components = pca.fit_transform(Z)
        
        ax[i, j].plot(range(nca), np.cumsum(pca.explained_variance_ratio_))
        ax[i, j].set_title(f'{c} . {i}')

plt.tight_layout()
plt.show();

#### • MNE époque (test)

In [None]:
### • MNE époque (test)

raw_csv = entrants[0][eeg_Chans]
info    = mne.create_info(ch_names = eeg_Chans, sfreq = SAMPLE_RATE, ch_types = 'eeg')
raw_mne = mne.io.RawArray(raw_csv.T * 1e-6, info)
sites     = np.where(entrants[0]['EventStart'] == 1)[0]

# display(compare(np.sort(np.concatenate((train_spots[0][0], train_spots[1][0]))), loc))

tmin, tmax = -0., 1

# loc = mne.find_events(raw_mne, stim_channel = 'C3')
# event_id = dict(C3 = 1, aud_r = 2, vis_l = 3, vis_r = 4)
# raw = mne.io.Raw(raw_mne, preload = True)
# raw.filter(2, None, method = 'iir')           # replace baselining with high-pass
# events = mne.read_events(event_fname)

# raw.info['bads'] = ['MEG 2443']  # set bad channels
# picks = mne.pick_types(info, meg = 'grad', eeg = True, eog = False, exclude = 'bads')
# Read epochs
absc = mne.Epochs(raw_mne, np.array([sites, sites, sites]).T, None, tmin, tmax, proj = False,
                    picks = None, baseline = None, preload = True, verbose = False) # event_id picks

# labels = epochs.events[::5, -1]

# events

# raw_mne.plot();

# raw_mne['C3'][0][0], len(df_train_csv[2]['Cz'])

display(absc)

#### • Test de classification - Proposition 02

In [None]:
#### • Test de classification - Proposition inputs 01

# X1 = np.where(trains['EventType'] == 0, trains['C3'], trains['Cz']) 
# X2 = np.where(trains['EventType'] == 0, trains['Cz'], trains['C4']) 
# X = [signal.welch(list(v), SAMPLE_RATE)[1] for v in X1 + X2]

# X = [np.append([], list(harmonic(v, bands_coeff).values())) for v in X]
# X = np.where(trains['EventType'] == 0, trains['C3'], 2 * trains['Cz'])
# X = [signal.welch(list(v), SAMPLE_RATE)[1] for v in X], trains['C3'] + trains['Cz'], trains['C4'] + trains['Cz']

# diff = np.array([np.cos(x) for x in (trains['C3'] ** 2 + trains['C4'] ** 2)])  + 2 * trains['Cz']

# dist =np.array([list(np.sqrt(x)) for x in (trains['C3'] ** 2 + trains['C4'] ** 2)])
# display(np.shape(np.array()))

# diff = np.array([d - v for v, d in zip(, dist)]) - trains['Cz'] * [x.mean() for x in trains['Cz']]

# cz_min = [v.min() for v in trains['Cz']]
# cz_max = [v.max() for v in trains['Cz']]

# display(compare(cz_min, cz_max), )

# trio = zip(trains['C4'], trains['C3'], trains['Cz'])

# prd = [(c3 - c4) * cz for c3, c4, cz in trio] 

# 'Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'

# b, a = bands_coeff['Delta']

# c3 = np.array([bandpass_filter(bw, b, a) for bw in trains['C3']])
# c4 = np.array([bandpass_filter(bw, b, a) for bw in trains['C4']])

# diff = (c4 - c3) / trains['Cz'].max() #/ cz.max() # [for v in cz]
# X = [np.sign(s) for s in trains['C4'] - trains['C3']]
# X = np.array([normalized(bandpass_filter(bw, b, a)) for bw in X])

# X = [sk_p.minmax_scale(pywt.dwt(c4, wavelet = 'db4')[0] - pywt.dwt(c3, wavelet = 'db4')[0]) for c3, c4 in zip(trains['C3'], trains['C4'])]
# X = [v) for v in X]

# X = [pywt.dwt(v, wavelet = 'db4')[0] for v in X]
# X = [normalized(pywt.dwt(v, wavelet = 'db4')[0]) for v in X]
# X = [normalized(pywt.dwt(v, wavelet = 'db4')[0]) for v in X]
# X = [normalized(v) for v in X]

# X = [np.append([], v.tolist()) for v in X]

# ret = pywt.dwt(data, wavelet = 'db1') #, level = 4, mode = 'antisymmetric'

# for o in ret :
#     print(np.shape(o))

# # np.shape(ret[3])[0] / np.shape(train_csv[0]['Cz'])[0]
# # (ret)

# plt.figure(figsize = (20, 4))

# # plt.subplot(1, 3, 1)
# plt.plot(data)

# for o in ret :
#     plt.plot(o)

# # plt.title("Original Signal")
# # plt.subplot(1, 3, 2)
# # plt.title("Approximation Coefficients")
# # plt.subplot(1, 3, 3)
# # plt.plot(cD)
# # plt.title("Detail Coefficients")
# # plt.tight_layout()
# plt.show();

# train_files, train_csv, train_label = [], [], []
# test_files, test_csv, test_label    = [], [], []

# for i in range(n_files) :
#     sites = np.where(entrants[i]['EventStart'] == 1)

#     if i // step in n_test :
#         test_files.append(files[i])
#         test_csv  .append(entrants[i])
#         test_label.append(label_tmp[i])
#     else :
#         train_files.append(files[i])
#         train_csv  .append(entrants[i])
#         train_label.append(label_tmp[i])



# print((1, *X_train_scaled.shape[:1]))

# model = Sequential()

# model.add()
# model.add()
# model.add()

# model.add()
# model.add()
# model.add()

# model.add()
# model.add()
# model.add()
# model.add()

# model.add()
# model.add()
# model.add()
# model.add() 

'''
# Ajout de la premiere couche lstm
model.add(LSTM(ZERO, input_shape = train_dataset.shape[1:], activation = ACTIV(), return_sequences = True)) #
model.add(LSTM(ZERO, dropout = DROPOUT, return_sequences = False))

# Ajout de la couche de sortie
model.add(Dense(len(hands_event), activation = 'softmax'))
'''

In [None]:
b, y = bands_coeff['Theta']

i = np.random.randint(np.shape(X)[0])
y = df_trains['C4'][i] - df_trains['C3'][i]
# X = [np.sign(s) for s in (trains['C4'] - trains['C3'])]
# y = {band: bandpass_filter(X[i], b, a) for band, (b, a) in bands_coeff.items()}

plt.figure()
# plt.style.use('')

plt.plot(normalized(bandpass_filter(y, b, y)), label = 'C4 - C3')
plt.plot(normalized(bandpass_filter(np.sign(y), b, y)), label = '[C4 - C3]', c = np.random.rand(1, 3)[0])
# plt.plot(pywt.dwt(y, wavelet = 'db4')[0], label = 'C4 - C3')
# plt.plot(pywt.dwt(np.sign(y), wavelet = 'db4')[0], label = '[C4 - C3]')
# plt.plot(np.zeros(512), ls = '--', c = np.random.rand(1, 3)[0])

# print(np.shape(pywt.dwt(y, wavelet = 'db8')))

# plt.plot(bandpass_filter(np.sign(trains['C3'][i]), b, a), label = 'C3')
# plt.plot(bandpass_filter(np.sign(trains['C4'][i]), b, a), label = 'C4')
# plt.plot(bandpass_filter(np.sign(trains['Cz'][i]), b, a), label = 'Cz')

# for (band, signal) in reversed(y.items()) :
#     plt.plot(pd.Series(signal), label = f'{band}', c = np.random.rand(1, 3)[0])

plt.title(f"{i}")
plt.legend(loc = 'upper right')

plt.grid()
plt.tight_layout()
plt.show();

In [None]:
params = {
    ## K-plus proches voisins
    'knn__n_neighbors' : range(2),
    ## SVM
    'svm__C'      : [0.1, 1, 5],
    'svm__kernel' : ['linear', 'softmax', 'sigmoid', 'rbf'],
    ## RandomForest
    # 'rf__max_features'      : ['sqrt', 'log2', None],
    # 'rf__min_samples_split' : range(2, 32, 2),
    # , ('rf', clf3), ('rf', clf3)
    'estimators': [[('knn', knc), ('svm', svm)], [('knn', knc), ('svm', svm)]] 
    }

grid = model_selection.GridSearchCV(estimator = Voting_clf, param_grid = params, cv = 5) \
    .fit(X_train_scaled, y_train)

# parametres = {'max_features' : ['log2', 'sqrt', None], 'min_samples_split' : range(2, 32, 2)}

# vclf = model_selection.GridSearchCV(estimator = clf3, param_grid = parametres, cv = 3) \
#     .fit(X_train_scaled, y_train)

print(grid.best_estimator_)
print(grid.best_score_)
print('score train :', grid.score(X_train_scaled, y_train))
print('score test :', grid.score(X_test_scaled, y_test))

# print(vclf.best_estimator_, vclf.best_params_, vclf.best_score_)
# print('score train :', grid.score(X_train_scaled, y_train), vclf.score(X_train_scaled, y_train))
# print('score test  :', grid.score(X_test_scaled, y_test), vclf.score(X_test_scaled, y_test))

df_train_cpy, event_start = fancy_df(entrants, label_tmp['EventType'], hands_event, SCOPE)

fig, axes = plt.subplots(1, 1, figsize = (24, 5), sharey = True)
sig = .05

axes.plot(entrants['C3'])

for p in event_start :
    axes.axvspan(p[0] - (SCOPE >> 1), p[0] + 1.5 * SCOPE, facecolor = 'orangered', alpha = .5)

plt.xlabel('Frequency (Hz)')
plt.ylabel('Score')

In [None]:
boolInt = 16
start   = event_start[boolInt][0]
entrant = start + SCOPE
input   = df_train_cpy['C3_4'][start : entrant]
smooth  = input.copy()
inc     = 5
alpha   = 1 / 3
dec     = int(inc / alpha)

plt.figure(figsize = (24, 5))
plot_window(entrants, ['C3', 'C4', 'C3 + C4'], start, SCOPE)

# Lissage des hautes fréquences
for _ in range(inc) :
    smooth = simple_exponential_smoothing(smooth, alpha, 0)

smooth = pd.Series(index = range(start, entrant + inc - dec), data = smooth[dec :])

# plt.plot(raw - smooth, label = hands[event_start[pos][1]])
plt.plot(smooth, '--', label = hands_event[event_start[boolInt][1]])
plt.legend()
plt.show()

#### • Apendix

<h2 style = "text-align:center" ><b>EEG</b> - Prédiction des Mouvements Imaginaires de la Main</h2>

---

#### **1. Le projet**
- Intoduction  
https://github.com/DataScientest-Studio/mar24_cds_eeg/blob/eric/references/Description_projet_EEG.pdf  
https://www.bbci.de/competition/iv/desc_2b.pdf
- Ressources / Données   
https://www.kaggle.com/competitions/ucsd-neural-data-challenge/overview  
- Bibliographie  
https://www.bbci.de/competition/iv/desc_2b.pdf
#### **2. Liens utils**
- SciPy - *open-source software for mathematics, science, and engineering*  
https://docs.scipy.org/doc/scipy/index.html  
https://docs.scipy.org/doc/scipy/reference/signal.html  
- MNE - *MEG + EEG Analysis & Visualisation*
   - Accueil  
   https://mne.tools/stable/index.html

   - MNE - Data structures from arbitrary data  
   https://mne.tools/stable/auto_tutorials/io/10_reading_meg_data.html#creating-mne-data-structures-from-arbitrary-data-from-memory
   
   - MNE - EEG Preprocessing  
   https://mne.tools/dev/auto_tutorials/preprocessing/index.html  

- pyRiemann - *Biosignals classification with Riemannian geometry*  
https://pyriemann.readthedocs.io/en/latest/  
- neurodsp - *Neuro Digital Signal Processing Toolbox*  
https://neurodsp-tools.github.io/neurodsp/index.html#
- Rythme Mu  
https://fr.wikipedia.org/wiki/Rythme_Mu
- Spectrogram from EEG  
https://www.kaggle.com/code/cdeotte/how-to-make-spectrogram-from-eeg
- Divers  
https://signalprocessingsociety.org/  
https://fr.wikipedia.org/wiki/Filtre_de_Butterworth  
https://fr.wikipedia.org/wiki/Moyenne_mobile  
https://terpconnect.umd.edu/~toh/spectrum/Differentiation.html  
https://perso.etis-lab.fr/ghaffari/2014_CCMB_Floride_USA.pdf  
https://www.youtube.com/watch?v=wB417SAbdak&list=PLXc9qfVbMMN2TAoLHVW5NvNmJtwiHurzw  
https://fastercapital.com/fr/sujet/identification-des-artefacts-de-traitement-du-signal-dans-des-sc%C3%A9narios-r%C3%A9els.html#:~:text=L'inspection%20visuelle%20est%20la,des%20pertes%20et%20du%20bruit.  
   - Z-Score Normalisation  
   https://fr.wikipedia.org/wiki/Cote_Z_(statistiques)  
   https://typeset.io/questions/why-is-z-score-normalisation-necessary-in-pre-processing-eeg-1xv5jepyq5  

   - Traitement numérique du signal  
   https://fr.wikipedia.org/wiki/Traitement_num%C3%A9rique_du_signal  
   - Ondelette  
      - Wiki  
      https://fr.wikipedia.org/wiki/Ondelette  

      - L’analyse par ondelettes dans la vie de tous les jours  
      https://interstices.info/lanalyse-par-ondelettes-dans-la-vie-de-tous-les-jours/  

      - A guide for using the Wavelet Transform in Machine Learning  
      https://ataspinar.com/2018/12/21/a-guide-for-using-the-wavelet-transform-in-machine-learning/
      
      - pyWavelets - *open source wavelet transform*  
      https://pywavelets.readthedocs.io/en/latest/

      - Ondelettes et applications  
      https://www.i2m.univ-amu.fr/~caroline.chaux/GEOMDATA/TI-te5215.pdf

   - Maximum de vraisemblance  
   https://pmarchand1.github.io/ECL8202/notes_cours/03-Maximum_vraisemblance.html  
   https://fr.wikipedia.org/wiki/Maximum_de_vraisemblance#:~:text=En%20statistique%2C%20l'estimateur%20du,maximisant%20la%20fonction%20de%20vraisemblance  

   - Transformation de Fourier discrète  
   https://fr.wikipedia.org/wiki/Transformation_de_Fourier_discr%C3%A8te  
      - La Transformation de Fourier n’est pas adaptée à l’analyse des signaux non stationnaires.
   - Neural Data Science in Python  
   https://neuraldatascience.io/intro.html

   - Preprocessing of EEG  
   https://www.frontiersin.org/articles/10.3389/fninf.2015.00016/full#:~:text=The%20depositable%20preprocessing%20pipeline%20consists,with%20a%20low%20recording%20SNR  
   https://typeset.io/papers/preprocessing-of-eeg-4go8vhcbty  
   https://learn.neurotechedu.com/preprocessing  
   https://g0rella.github.io/gorella_mwn/preprocessing_eeg.html  
   
   - Biblio :  
   https://perso.telecom-paristech.fr/bloch/P6Image/ondelettestrsp.pdf  
   https://www.math.u-bordeaux.fr/~jbigot/Site/Enseignement_files/ondelettesIMAT.pdf  
   http://w3.cran.univ-lorraine.fr/perso/radu.ranta/pdf/cours_deb_ond%28fr%29.pdf
   
   - Digital Filtering  
   http://notebooks.pluxbiosignals.com/notebooks/Categories/Pre-Process/digital_filtering_eeg_rev.html

   - Processus stationnaire  
   https://fr.wikipedia.org/wiki/Processus_stationnaire

   - Analyse en composantes principales  
   https://fr.wikipedia.org/wiki/Analyse_en_composantes_principales#:~:text=L'ACP%2C%20d%C3%A9sign%C3%A9e%20en%20g%C3%A9n%C3%A9ral,une%20grandeur%20physique%2C%20comme%20les