In [48]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,

  You can safely remove it manually.
  You can safely remove it manually.


In [71]:
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import mne
import matplotlib.pyplot as plt
import pyvista
import ipywidgets
import ipyevents
import pyvistaqt
import yasa

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
%matplotlib qt
# to make plots interactive

## *Pickle data

### Importing data

In [3]:
file_path = r"C:\EEG DATA\FL_label_data.pickle"
# added r in front of file path to make it a raw string, to make sure that \ is not interpreted as a newline character

# open the pickle file
with open(file_path, "rb") as file:
    label_data = pickle.load(file)

# show the label_data type
print(type(label_data))

<class 'dict'>


### Functions

In [40]:
# to return all the results
# returns a dict so should have commas between values

def extract_onsets(label_data):
    onset_dict = {}
    for key, value in label_data.items():
        labels = np.atleast_1d(value['label'])
        onsets = np.atleast_1d(value['onset'])
        # to ensure that labels and onsets are treated as array
        # because subsequently using np.where
        indices = np.where((labels == 1) | (labels == 2))[0]
        # returns indices where the label is 1 (N2) or 2 (N3)
        if indices.size > 0 and np.all(indices < len(onsets)):
            # to ensure that no out-of-bounds error
            selected_onsets = onsets[indices]
            # retrieve onset value corresponding to label 1 or 2
            onset_dict[key] = selected_onsets
            # save extracted onset under correct key in dict
            print(f"Key: {key}, Onset values for labels 1 (N2) and 2 (N3): {', '.join(map(str, selected_onsets))}")
        else:
            print(f"Key: {key}, Warning: The indices do not match")
    return onset_dict
    # returning the onset_dict and what you're printing
    # should I be only returning what is supposed to be printed? or maybe only the dict, since already has commas?

def group_by_increment(onset_values, increment=30):
    groups = []
    # will be a list of lists
    current_group = [float(onset_values[0])]
    # initializes this list with the first value from onset_values (the input)
    
    for i in range(1, len(onset_values)):
        # loops through all the onset values
        if onset_values[i] - onset_values[i - 1] == increment:
            # if i = 1, if onset_values[1] - onset_values[0] == 30
            current_group.append(float(onset_values[i]))
            # add the value at current index
        else:
            # if not a difference of 30
            # means you've reached the end of that sublist
            if len(current_group) > 1:
                # if there is more than one value in that group
                groups.append(current_group)
                # add the sublist to the big list
            current_group = [float(onset_values[i])]
            # starts a new current group with the new value at the current index
    
    if len(current_group) > 1:
        groups.append(current_group)
    # once you exit the group, if the last current_group contains more than one value
    # then you can add it to group
    # to make sure that last sequence is not left out
    
    return groups

def extract_segments(raw, groups):
    raw_segments = []
    
    for group in groups:
        start = group[0]
        # start = first value in group
        #stop = min(group[-1], max_time) 
        stop = group[-1]
        # stop = last value in group

        #if start >= max_time:
            #continue
        # takes the smaller of the two values
        segment = raw.copy().crop(tmin=start, tmax=stop)
        raw_segments.append(segment)
    
    return raw_segments

## *Raw data Participant 067

### Importing 

In [31]:
# follow instructions from YASA

participant_067_file = r"C:\EEG DATA\067\eeg\TMR.vhdr"
participant_067_raw = mne.io.read_raw_brainvision(vhdr_fname=participant_067_file, preload=True)
participant_067_raw.filter(0.1, 40)
# bandpass filter between 0.1 Hz and 40 Hz
participant_067_raw.resample(100)
# downsample to 100 Hz
participant_067_raw.pick(['Fz'])

Extracting parameters from C:\EEG DATA\067\eeg\TMR.vhdr...
Setting channel info structure...
Reading 0 ... 14024599  =      0.000 ... 28049.198 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.10
- Lower transition bandwidth: 0.10 Hz (-6 dB cutoff frequency: 0.05 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 16501 samples (33.002 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    9.5s


Unnamed: 0,General,General.1
,Filename(s),TMR.eeg
,MNE object type,RawBrainVision
,Measurement date,2023-09-06 at 23:28:45 UTC
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,07:47:30 (HH:MM:SS)
,Sampling frequency,100.00 Hz
,Time points,2804920
,Channels,Channels


### Put data in YASA format

In [7]:
participant_067_data, participant_067_times = participant_067_raw.get_data(return_times=True) 

# put data in npz format for the hypnogram 
# numpy array
np.savez("participant_067_npz.npz", data=participant_067_data, times=participant_067_times, ch_names=participant_067_raw.ch_names, sfreq=participant_067_raw.info["sfreq"])

In [8]:
# format the npz data

npzfile_067 = np.load("participant_067_npz.npz")
data_067, ch_names_067 = npzfile_067['data'], npzfile_067['ch_names']
sf_067 = 100
times_067 = np.arange(data_067.size) / sf_067

print(data_067.shape, ch_names_067)
print(np.round(data_067[:, 0:5], 3))
# prints the data from the first 4 channels

# 1 channel with 2,237,708 samples

(1, 2804920) ['Fz']
[[-0. -0. -0.  0.  0.]]


In [9]:
# retrieve the labels for participant 067

original_labels_067 = label_data['067']['label']
original_labels_067

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1,
       2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2,

In [10]:
# remap values for hypnogram
# original values:
# 0: wake/N1, 1: N2, 2: N3, 3: REM
# YASA values
# 0: wake, 1: N1, 2: N2, 3: N3, 4: REM

yasa_labels_067 = np.array(original_labels_067, dtype=object)
yasa_labels_067[original_labels_067 == 0] = "N1"
# only keep N1 for efficiency
yasa_labels_067[original_labels_067 == 1] = "N2"
yasa_labels_067[original_labels_067 == 2] = "N3"
yasa_labels_067[original_labels_067 == 3] = "REM"

yasa_labels_067

array(['N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1',
       'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1',
       'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1',
       'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1',
       'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1', 'N1',
       'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2',
       'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N3', 'N2',
       'N2', 'N2', 'N3', 'N3', 'N3', 'N3', 'N3', 'N2', 'N2', 'N3', 'N2',
       'N3', 'N3', 'N3', 'N3', 'N3', 'N3', 'N1', 'N1', 'N1', 'N1', 'N2',
       'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N1', 'N1', 'N1', 'N1',
       'N1', 'N1', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2',
       'N1', 'N1', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2',
       'N1', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2', 'N2',
       'N2', 'N2', 'N3', 'N3', 'N3', 'N3', 'N3', 'N

### Hypnogram and spectrogram

In [11]:
hypnogram_067 = yasa.Hypnogram(yasa_labels_067, freq="30s")
hypnogram_067.hypno

Epoch
0      N1
1      N1
2      N1
3      N1
4      N1
       ..
929    N1
930    N1
931    N1
932    N1
933    N1
Name: Stage, Length: 934, dtype: category
Categories (7, object): ['WAKE', 'N1', 'N2', 'N3', 'REM', 'ART', 'UNS']

In [13]:
# plot the hypnogram

fig, ax = plt.subplots(1, 1, figsize=(7,3), constrained_layout=True, dpi=80)
ax = hypnogram_067.plot_hypnogram(fill_color="gainsboro", ax=ax)

In [14]:
# upsample the hypnogram for the spectrogram

hypnogram_067_upsampled = yasa.hypno_upsample_to_data(hypno=hypnogram_067.hypno, sf_hypno=(1/30), data=data_067, sf_data=sf_067)
print(hypnogram_067_upsampled.shape, 'Unique values =', np.unique(hypnogram_067_upsampled))



(2804920,) Unique values = ['N1' 'N2' 'N3' 'REM']


In [15]:
hypnogram_067_upsampled
# the first values and the last values are N1 so this is normal

array(['N1', 'N1', 'N1', ..., 'N1', 'N1', 'N1'],
      shape=(2804920,), dtype=object)

In [16]:
# convert back to numbers
# had converted to stages before for hypnogram

# remap values for spectrogram
# original values:
# 0: wake/N1, 1: N2, 2: N3, 3: REM
# YASA values
# 0: wake, 1: N1, 2: N2, 3: N3, 4: REM

hypnogram_067_upsampled_int = np.array(hypnogram_067_upsampled)
hypnogram_067_upsampled_int[hypnogram_067_upsampled == "N1"] = 1
# only keep N1 for efficiency
hypnogram_067_upsampled_int[hypnogram_067_upsampled == "N2"] = 2
hypnogram_067_upsampled_int[hypnogram_067_upsampled == "N3"] = 3
hypnogram_067_upsampled_int[hypnogram_067_upsampled == "REM"] = 4

hypnogram_067_upsampled_int

array([1, 1, 1, ..., 1, 1, 1], shape=(2804920,), dtype=object)

In [17]:
fig = yasa.plot_spectrogram(data_067[0, :], sf_067, hypnogram_067_upsampled_int)
# data must be a 1D numpy array
fig.suptitle("Spectrogram with Hypnogram of Participant 067", fontsize=14)

plt.show()

### Sleep spindles detection

In [18]:
# convert back to correct unit
data_067_uv = data_067 * 1e6

# convert back to 1D array data since only one channel

data_067_1d = data_067_uv[0]  

In [19]:
# use yasa spindle detection

spindles_067 = yasa.spindles_detect(data_067_1d, sf_067, ch_names=ch_names_067, hypno=hypnogram_067_upsampled_int, include=(2,3))
spindles_067.summary().round(3)

Unnamed: 0,Start,Peak,End,Duration,Amplitude,RMS,AbsPower,RelPower,Frequency,Oscillations,Symmetry,Stage,Channel,IdxChannel
0,1857.69,1858.02,1858.27,0.58,15.557,3.652,1.144,0.352,12.733,6.0,0.559,2,Fz,0
1,1916.07,1916.54,1916.61,0.54,14.541,3.059,0.992,0.277,12.061,6.0,0.855,2,Fz,0
2,1938.52,1939.09,1939.61,1.09,15.329,3.423,0.924,0.258,12.877,13.0,0.518,2,Fz,0
3,1944.09,1944.82,1944.95,0.86,11.033,2.504,0.660,0.295,12.667,11.0,0.839,2,Fz,0
4,1947.26,1947.53,1947.77,0.51,17.502,4.614,1.362,0.304,13.302,7.0,0.519,2,Fz,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,25161.63,25161.84,25162.20,0.57,16.403,3.404,1.047,0.299,13.509,8.0,0.362,2,Fz,0
340,25215.11,25215.77,25216.07,0.96,12.447,3.208,1.058,0.433,13.624,13.0,0.680,2,Fz,0
341,25219.13,25219.49,25219.92,0.79,12.979,2.668,0.875,0.411,12.573,10.0,0.450,2,Fz,0
342,25281.23,25281.72,25282.21,0.98,16.634,4.372,1.337,0.421,11.978,12.0,0.495,2,Fz,0


In [20]:
# visualize it by NREM stage 2 and 3

spindles_067.summary(grp_chan=True, grp_stage=True, aggfunc='mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Density,Duration,Amplitude,RMS,AbsPower,RelPower,Frequency,Oscillations,Symmetry
Stage,Channel,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,Fz,318,1.448747,0.737579,14.269072,3.286705,0.988772,0.338724,12.79763,9.216981,0.488694
3,Fz,26,0.346667,0.677308,14.798405,3.372273,1.06134,0.332913,12.914282,8.346154,0.452184


In [21]:
# plot the average spindle

spindles_067.plot_average(errorbar=None, palette="Set1")

<Axes: title={'center': 'Average spindle'}, xlabel='Time (sec)', ylabel='Amplitude (uV)'>

### Define 1-second epochs

In [41]:
#label_data_onsets_067 = label_data_onsets['067']
#groups_067 = group_by_increment(label_data_onsets_067, increment=30)
#groups_067

In [22]:
epoch_length_sec = 1
# desired length of each epoch
n_epochs = int(len(data_067_1d) / (sf_067 * epoch_length_sec))
# to figure out how many one-second epochs fit into the signal
# number of epochs = total samples divided by samples per epoch

# Split the signal into 1-second epochs
epochs_067 = np.array_split(data_067_1d[:n_epochs * sf_067], n_epochs)
# 2,237,708 samples
# 100 samples/sec
# 22,377 of 1-second epochs

# with this code, dropping the last epoch if not one second

In [23]:
epochs_067[:10]

# epochs_067 is an array of 1-second EEG arrays (each 100 samples long)

[array([-2.66815378e-14, -6.60419617e-01, -4.04582239e-01,  1.78309696e+00,
         2.15793634e+00, -1.12720425e+00,  6.45268020e-02,  2.66375233e+00,
         9.42635388e-01,  9.53113191e-01,  3.35584268e+00,  1.15003993e+00,
         6.57636830e-01,  3.75438921e+00,  6.85973524e-01, -1.45793775e+00,
         5.84595787e-01,  1.05466313e+00, -1.75840572e-01,  2.71950997e+00,
         5.95706967e+00,  4.38133451e+00,  4.07996171e+00,  6.66719847e+00,
         5.69120878e+00,  3.33323533e+00,  5.78755324e+00,  7.30576037e+00,
         8.44336537e+00,  8.60859499e+00,  6.11776265e+00,  5.85849082e+00,
         4.15491632e+00,  3.42153292e+00,  4.25909924e+00,  1.01512142e+00,
         1.43951245e+00,  4.85867780e+00,  5.65734557e+00,  5.06041145e+00,
         5.80096296e+00,  4.86616598e+00, -4.20249160e-01, -2.41501868e-01,
         1.65705003e+00, -5.40416827e-02, -3.41356165e-01, -7.28873391e-01,
        -2.95477505e+00, -3.43636698e+00,  2.90027222e-02,  1.03536985e+00,
        -1.3

In [32]:
len(epochs_067)

28049

In [38]:
data_067_1d[:5]

array([-2.66815378e-14, -6.60419617e-01, -4.04582239e-01,  1.78309696e+00,
        2.15793634e+00])

### Label the epochs

We will use as labels 0 for no spindle and 1 for spindle in the 1-second epoch. Importantly, the code checks whether part of the spindle is in that epoch. We are not checking whether the entire spindle is in the epoch.

##### Function

In [33]:
def label_spindle_epochs(epochs, spindle_starts, spindle_ends, epoch_length_sec=1):

    epoch_starts = np.arange(len(epochs)) * epoch_length_sec
    # new np array with the start time of each epoch
    # epoch_starts[i] is the start time of each epoch

    epoch_labels = np.zeros(len(epochs), dtype=int)
    # initialize all the labels as 0 initially

    for start, end in zip(spindle_starts, spindle_ends):
        # loop through the start and end times of detected spindles by YASA
        for i, epoch_start in enumerate(epoch_starts):
            # loop through the one-second epochs that are not labelled yet
            epoch_end = epoch_start + epoch_length_sec
            # for each epoch, calculate the epoch end time
            # which is epoch_start + length of epoch
            # so now have the time range of each epoch
            if (start < epoch_end) and (end > epoch_start):
                # if the spindle started before the epoch ends
                # and the spindle ended after the epoch started
                epoch_labels[i] = 1
                
    return epoch_labels

##### For participant 067

In [34]:
spindles_067_df = spindles_067.summary()

spindle_starts_067 = spindles_067_df['Start'].values
spindle_ends_067 = spindles_067_df['End'].values

epoch_labels_067 = label_spindle_epochs(epochs_067, spindle_starts_067, spindle_ends_067)

In [35]:
epoch_labels_067[:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [36]:
len(epoch_labels_067)

28049

### Prepare EEG data for CNN input

CNN expects the data to be: [batch_size, time_steps, channels]
batch_size: number of 1-second epochs
time_steps: number of timepoints per epoch (sampling rate)
channels: number of EEG channels

In [45]:
# reshape the EEG epochs (not the labels)
# currently epochs_067 is a list of 1D arrays

epochs_067_reshaped = np.array(epochs_067)
epochs_067_reshaped = epochs_067_reshaped.reshape(-1, epochs_067_reshaped.shape[1], 1)
# reshaped to number of epochs, number of samples per epoch, 1
# -1: numpy calculates the appropriate size based on the other dimensions
# epochs_reshaped.shape[1]: contains information about number of samples in each epoch
#  1: one channel

In [46]:
epochs_067_reshaped.shape

(28049, 100, 1)

In [58]:
import tensorflow as tf

def build_cnn_model(input_shape=(100, 1)):

    # Define the input layer
    input_layer = tf.keras.layers.Input(shape=input_shape)

    # Block 1: Convolution with kernel size 5
    conv1 = tf.keras.layers.Conv1D(filters=10, kernel_size=5, strides=1, padding='same')(input_layer)
    conv1 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv1)
    conv1 = tf.keras.layers.MaxPooling1D(pool_size=2)(conv1)
    conv1 = tf.keras.layers.BatchNormalization()(conv1)

    # Block 2: Convolution with kernel size 11
    conv2 = tf.keras.layers.Conv1D(filters=10, kernel_size=11, strides=1, padding='same')(input_layer)
    conv2 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv2)
    conv2 = tf.keras.layers.MaxPooling1D(pool_size=2)(conv2)
    conv2 = tf.keras.layers.BatchNormalization()(conv2)

    # Block 3: Convolution with kernel size 21
    conv3 = tf.keras.layers.Conv1D(filters=10, kernel_size=21, strides=1, padding='same')(input_layer)
    conv3 = tf.keras.layers.LeakyReLU(alpha=0.01)(conv3)
    conv3 = tf.keras.layers.MaxPooling1D(pool_size=2)(conv3)
    conv3 = tf.keras.layers.BatchNormalization()(conv3)

    # Concatenate the outputs of all blocks
    concatenated = tf.keras.layers.Concatenate()([conv1, conv2, conv3])

    # GRU Layer
    gru = tf.keras.layers.GRU(64)(concatenated)

    # Fully connected (dense) layer
    dense = tf.keras.layers.Dense(64, activation='relu')(gru)

    # Softmax output layer for classification (spindle detection)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(dense)

    # Create the model
    model = tf.keras.models.Model(inputs=input_layer, outputs=output)

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Return the compiled model
    return model



In [74]:
# Example usage
input_shape = (100, 1)  # 1-second EEG epoch with 100 samples (e.g., 100 Hz)
cnn_model = build_cnn_model(input_shape)

# Print model summary to check the architecture
cnn_model.summary()



In [65]:
epochs_067[:5]

[array([-2.66815378e-14, -6.60419617e-01, -4.04582239e-01,  1.78309696e+00,
         2.15793634e+00, -1.12720425e+00,  6.45268020e-02,  2.66375233e+00,
         9.42635388e-01,  9.53113191e-01,  3.35584268e+00,  1.15003993e+00,
         6.57636830e-01,  3.75438921e+00,  6.85973524e-01, -1.45793775e+00,
         5.84595787e-01,  1.05466313e+00, -1.75840572e-01,  2.71950997e+00,
         5.95706967e+00,  4.38133451e+00,  4.07996171e+00,  6.66719847e+00,
         5.69120878e+00,  3.33323533e+00,  5.78755324e+00,  7.30576037e+00,
         8.44336537e+00,  8.60859499e+00,  6.11776265e+00,  5.85849082e+00,
         4.15491632e+00,  3.42153292e+00,  4.25909924e+00,  1.01512142e+00,
         1.43951245e+00,  4.85867780e+00,  5.65734557e+00,  5.06041145e+00,
         5.80096296e+00,  4.86616598e+00, -4.20249160e-01, -2.41501868e-01,
         1.65705003e+00, -5.40416827e-02, -3.41356165e-01, -7.28873391e-01,
        -2.95477505e+00, -3.43636698e+00,  2.90027222e-02,  1.03536985e+00,
        -1.3

In [70]:
epochs_067_np = np.array(epochs_067)
print(epochs_067_np.shape)
print(epoch_labels_067)

(28049, 100)
[0 0 0 ... 0 0 0]


In [72]:
# split into X and y (labels) data
X = epochs_067_np
y = epoch_labels_067

# split into train and test set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# stratify = y to ensure that same proportion of classes in both training and test set

In [78]:
training_info = cnn_model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=64)

Epoch 1/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.9837 - loss: 0.0502 - val_accuracy: 0.9833 - val_loss: 0.0514
Epoch 2/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.9838 - loss: 0.0469 - val_accuracy: 0.9817 - val_loss: 0.0532
Epoch 3/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.9835 - loss: 0.0504 - val_accuracy: 0.9820 - val_loss: 0.0527
Epoch 4/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.9837 - loss: 0.0460 - val_accuracy: 0.9820 - val_loss: 0.0525
Epoch 5/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - accuracy: 0.9853 - loss: 0.0445 - val_accuracy: 0.9820 - val_loss: 0.0535
Epoch 6/20
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - accuracy: 0.9843 - loss: 0.0474 - val_accuracy: 0.9811 - val_loss: 0.0570
Epoch 7/20
[1m281/281

In [79]:
def plot_training_history(training_info):
  fig, axs = plt.subplots(1, 2, figsize=(16, 5))
  axs[0].plot(training_info.history['loss'], label="training set")
  axs[0].plot(training_info.history['val_loss'], label="validation set")
  axs[0].set_xlabel("Epoch")
  axs[0].set_ylabel("Loss")
  axs[0].grid(True)
  axs[0].legend()
  try:
    axs[1].plot(training_info.history['accuracy'], label="training set")
    axs[1].plot(training_info.history['val_accuracy'], label="validation set")
    axs[1].set_xlabel("Epoch")
    axs[1].set_ylabel("Accuracy")
    axs[1].grid(True)
    axs[1].legend()
  except:
    pass
  plt.show()

plot_training_history(training_info)

In [81]:
cnn_model.evaluate(X_test, y_test)

[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9829 - loss: 0.0781


[0.084015391767025, 0.9805704355239868]