In [1]:
import pandas as pd
import os
import mne
import numpy as np


In [2]:
df_info = pd.read_csv('processed_eeg_data.csv')
df_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  54 non-null     int64 
 1   subject_id  54 non-null     object
 2   age         54 non-null     int64 
 3   gender      54 non-null     object
 4   edf_path    54 non-null     object
 5   epilepsy    54 non-null     int64 
dtypes: int64(3), object(3)
memory usage: 2.7+ KB


In [3]:
df_info.describe()

Unnamed: 0.1,Unnamed: 0,age,epilepsy
count,54.0,54.0,54.0
mean,26.5,52.777778,0.555556
std,15.732133,17.115222,0.50157
min,0.0,24.0,0.0
25%,13.25,50.0,0.0
50%,26.5,53.0,1.0
75%,39.75,59.0,1.0
max,53.0,77.0,1.0


In [4]:
df_info.head(5)

Unnamed: 0.1,Unnamed: 0,subject_id,age,gender,edf_path,epilepsy
0,0,aaaaamhx,57,F,EEG_Epilepsy/01_no_epilepsy/aaaaamhx/s001_2011...,0
1,1,aaaaamhx,57,F,EEG_Epilepsy/01_no_epilepsy/aaaaamhx/s001_2011...,0
2,2,aaaaamhx,57,F,EEG_Epilepsy/01_no_epilepsy/aaaaamhx/s001_2011...,0
3,3,aaaaamhx,57,F,EEG_Epilepsy/01_no_epilepsy/aaaaamhx/s001_2011...,0
4,4,aaaaamhx,57,F,EEG_Epilepsy/01_no_epilepsy/aaaaamhx/s001_2011...,0


In [6]:
def standardize_dataframe(df):
    # Make a copy to avoid modifying the original dataframe

    df_standardized = df.drop(['time','epoch', 'condition'], axis=1).copy()
    
    # Only standardize numeric columns
    numeric_columns = df_standardized.select_dtypes(include=np.number).columns
    
    for column in numeric_columns:
        mean = df[column].mean()
        std = df[column].std()
        
        df_standardized[column] = (df[column] - mean) / std
    
    result = pd.concat([df[['time','epoch']], df_standardized], axis=1)

    return result

def preprocess_eeg_file(edf_path, fmin=0.7, fmax=45.0, segment_lenght=5, overlap=0):

    # 1. Charger le fichier EDF avec MNE
    raw = mne.io.read_raw_edf(edf_path, preload=True, verbose='ERROR')

    # 2. Filtrage passe-bande (1-45 Hz)
    raw.filter(fmin, fmax, fir_design='firwin', verbose=False)

    # 3. Suppression des canaux non EEG
    eeg_channels = mne.pick_types(raw.info, eeg=True, exclude=[])
    raw.pick(eeg_channels)
    col_to_drop = []
    eeg_cols = raw.ch_names
    for ch in (['RESP ABDOMEN-REF', 'IBI', 'BURSTS', 'SUPPR','EEG LOC-REF', 'EEG ROC-REF', 'EEG EKG1-REF']):
        if ch in eeg_cols:
            col_to_drop.append(ch)
    raw.drop_channels(col_to_drop)

    # 4. Segmentation
    epochs = mne.make_fixed_length_epochs(raw, duration=segment_lenght, preload=False, overlap=overlap)

    # 5. Transformation en DataFrame
    
    df = epochs.to_data_frame()

    # 6. Normalisation canal par canal (centrage-réduction)

    df_std = standardize_dataframe(df)
    
    return df_std

In [7]:
def get_Datas(df_info, n_subject = 10, epoch_per_acq = 1, balanced = True, random_state = 42):
    """

    Description :
    Cette fonction permet de charger et de prétraiter des données EEG à partir d'un DataFrame contenant des informations 
    sur les patients et leurs fichiers EEG. Elle retourne les signaux EEG segmentés (X) et leurs étiquettes associées (y), 
    en fonction des paramètres spécifiés.

    Paramètres :
    - df_info (DataFrame) : 
        Le DataFrame contenant les informations sur les patients, incluant les colonnes 'subject_id', 'edf_path' (chemin des fichiers EEG), 
        et 'epilepsy' (étiquette binaire indiquant la présence ou non d'épilepsie).
    - n_subject (int, par défaut = 10) : 
        Le nombre total de patients à inclure dans le dataset.
    - epoch_per_acq (int, par défaut = 1) : 
        Le nombre de segments (ou "epochs") à extraire pour chaque acquisition EEG.
    - balanced (bool, par défaut = True) : 
        Si True, la fonction équilibre le dataset en sélectionnant un nombre égal de patients épileptiques et non épileptiques.
    - random_state (int, par défaut = 42) : 
        La graine aléatoire utilisée pour garantir la reproductibilité lors de l'échantillonnage des patients.

    Retour :
    - X (list) : 
        Une liste contenant les segments EEG extraits pour chaque patient.
    - y (list) : 
        Une liste contenant les étiquettes correspondantes (1 pour épilepsie, 0 pour non épilepsie).
    """

    X, y = [], []
    # Clean df_info :
    try:
        df_info.drop(['Unnamed: 0', 'eeg_segments'], axis=1, inplace=True)
    except:
        pass


    paths = []
    if balanced is True:
        df = df_info.groupby('epilepsy', group_keys=False).apply(
            lambda x: x.sample(n=int(n_subject/2), random_state=random_state)
        )  # sample balanced classes
    else:
        df = df_info
    for name in df['subject_id'].unique():
        paths.append(df[df['subject_id']==name].iloc[:n_subject]['edf_path'])
        
    paths = pd.concat(paths)

    for i in range(0,n_subject):
        datas = preprocess_eeg_file(paths.iloc[i]) # Read preprocess the datas for each
        for j in range(epoch_per_acq):   # We only add the first n intervals defined from param epoch_per_acq
            X.append(datas[datas['epoch'] == j])
            y.append(df_info[df_info['edf_path']==paths.iloc[i]].iloc[0]['epilepsy'])
            print(i,j)

    return X, y

In [8]:
X,y = get_Datas(df_info,10,5)

print('X:',len(X), 'y:',len(y))

  df = df_info.groupby('epilepsy', group_keys=False).apply(


Not setting metadata
15 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 15 events and 1250 original time points ...
0 bad epochs dropped
0 0
0 1
0 2
0 3
0 4
Not setting metadata
15 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 15 events and 1250 original time points ...
0 bad epochs dropped
1 0
1 1
1 2
1 3
1 4
Not setting metadata
60 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 60 events and 1280 original time points ...
0 bad epochs dropped
2 0
2 1
2 2
2 3
2 4
Not setting metadata
10 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 10 events and 1250 original time points ...
0 bad epochs dropped
3 0
3 1
3 2
3 3
3 4
Not setting metadata
289 matching events found
No baseline correction applied
0 projection items act

In [10]:
len(y), sum(y)

(50, 25)

# Model Robin test

In [None]:
import tensorflow as tf
tf.__version__

In [1]:
import matplotlib.pyplot as plt


In [None]:
from tensorflow.keras.models import Sequential


In [None]:
# Sequential

# import tensorflow as tf
# tf.keras.models.Sequential

In [None]:
from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.utils import to_categorical

In [None]:
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Flatten, Dense, BatchNormalization

In [2]:
from sklearn.model_selection import train_test_split

In [None]:
# 1. Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:

# 2. Modèle CNN 1D
model = Sequential([
    Conv1D(32, kernel_size=5, activation='relu', input_shape=X.shape[1:]),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Conv1D(64, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')  # Pour classification binaire
])


In [None]:

# 3. Compilation
model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()


In [None]:

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=15,
    batch_size=32
)

plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Val')
plt.title("Accuracy")
plt.legend()
plt.show()

# Prédictions 
y_pred_probs = model.predict(X_test)

# Conversion en classes binaires (0 ou 1)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()