# Final TP

- XGBoost and AdaBoost

In [10]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
import scipy
import mne

from collections import Counter
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from mne.datasets.sleep_physionet.age import fetch_data

import epoch

In [11]:
"""
psg_data
    The psg_data variable contains polysomnography data, which typically includes multiple physiological signals such as EEG, EOG, and EMG, recorded during sleep.

hypnogram_data
    The hypnogram_data variable represents annotations or labels associated with sleep stages, providing information about the temporal distribution of wakefulness, different sleep stages (NREM and REM), and transitions during a sleep recording.
    
fs 
    represents the sampling frequency (or sampling rate) of the polysomnography (PSG) data, indicating the number of samples per second in the signal.
"""

# Reading data (psg) and hypnogram (labels)
psg_file = "data/SC4001E0-PSG.edf"
hypnogram_file = "data/SC4001EC-Hypnogram.edf"

# psg_data = mne.io.read_raw_edf(psg_file)
# hypnogram_data = mne.read_annotations(hypnogram_file)

In [12]:
s = epoch.sleepRecording()
s.init_from_file("data/SC4001E0-PSG.edf", "data/SC4001EC-Hypnogram.edf")

Extracting EDF parameters from /Users/stig/Desktop/- TU:e/5Q1 ITBA Buenos Aires/72.75 Aprendizarje Automatico/72.75-ML/final/data/SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  psg_data_raw.set_annotations(hypnogramm_annotations)


In [13]:
# Checking how much of each label we have in the dataset

label_list = [s.epochs[i].label for i in range(len(s.epochs))]
label_counts = Counter(label_list)
for label, count in label_counts.items():
    print(f"{label}: {count}")

Sleep stage W: 1997
Sleep stage 1: 58
Sleep stage 2: 250
Sleep stage 3: 101
Sleep stage 4: 119
Sleep stage R: 125


In [14]:
s.epochs[10].ch_names

['EEG Fpz-Cz',
 'EEG Pz-Oz',
 'EOG horizontal',
 'Resp oro-nasal',
 'EMG submental',
 'Temp rectal',
 'Event marker']

In [15]:
from createFeatures import create_features_recording_session
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [25]:
FREQ_BANDS = {'SO':[0.5, 1],
            'delta': [1, 4],
            'theta': [4, 8],
            'alpha': [8, 13],
            'sigma': [13,15],
            'beta': [15, 30],
            'gamma': [30, 60]}

# Applying feature extraction functions to each epoch
create_features_recording_session(s)

# Initialize lists for features and labels
features = []
labels = []

# Iterate over each epoch to gather features and labels
for e in s.epochs:
    # Extract the features for the current epoch
    feature_row = [e.features[f'integral_{band}_Pz-Oz'] for band in FREQ_BANDS] + \
                  [e.features[f'integral_{band}_Fpz-Cz'] for band in FREQ_BANDS] + \
                  [e.features['avg_temp']]
    
    # Append the feature row and label to their respective lists
    features.append(feature_row)
    labels.append(e.label)

# Convert to NumPy arrays
X = np.array(features)
y = np.array(labels)

# Handling missing values (if any)
# Example: Replace NaNs with the mean of the respective column
X = np.nan_to_num(X, nan=np.nanmean(X))

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the dataset into Training and Test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train.shape   

(2120, 15)

array([[-3.96044414e-01, -4.36557503e-01, -8.31992680e-01, ...,
        -8.17114069e-01, -8.67001411e-01,  2.30210468e-01],
       [-2.55430189e-01, -2.81306440e-01, -1.97783460e-04, ...,
        -1.21320996e+00, -1.22514941e+00, -7.84179473e-01],
       [-4.00882658e-01, -4.43575610e-01, -4.72969343e-01, ...,
         3.52760626e-01, -1.00102220e-01,  1.93839639e-01],
       ...,
       [ 1.37173374e+00,  3.78380443e+00,  1.78247445e+00, ...,
        -1.27319533e+00, -1.25689200e+00, -1.28593421e+00],
       [ 1.31136347e-01,  3.19079436e-01,  4.09858494e-01, ...,
        -1.24564139e+00, -1.25491878e+00, -1.54588001e+00],
       [-4.76500996e-02, -3.37125288e-01, -1.97046517e-01, ...,
         5.37801623e-01,  9.08775773e-01, -3.14097082e-01]])