In [31]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 


def read_edf_file(file_path):
    """
    Reads an .edf file and returns the EEG and EMG streams as pandas DataFrames.
    """
    f = pyedflib.EdfReader(file_path)

    # Assuming the EEG channel is the first channel and EMG is the second channel
    eeg_signal = f.readSignal(0)
    emg_signal = f.readSignal(1)

    # Extract the channel names for the DataFrame
    eeg_channel_name = f.getSignalLabels()[0]
    emg_channel_name = f.getSignalLabels()[1]

    # Get the sample frequency
    sample_frequency = f.getSampleFrequency(0)  # Assuming both streams have the same frequency

    # Calculate the timestamps for the samples
    n_samples = min(len(eeg_signal), len(emg_signal))
    time = [i / sample_frequency for i in range(n_samples)]

    # Create pandas DataFrame
    df = pd.DataFrame({
        'Time': time,
        eeg_channel_name: eeg_signal[:n_samples],
        emg_channel_name: emg_signal[:n_samples],
    })

    # Close the EdfReader
    f.close()

    return df

file = 'edf_293.edf'

data = read_edf_file(file)


x = data.Time
y = data.EEG

In [32]:
# Labels
label_df = pd.read_csv("Data_293.csv")
labels = label_df["NAPS_Numeric"].iloc[1:]
labels = [int(label) for label in labels]

# Label List

Label 1: W (Awake)

Label 2: WA (Awake Artifact)?

Label 3: NR (NREM)

Label 4: Not defined

Label 5: R (REM)

Label 7: U (Artifacts?)


# Local Approach

In [33]:
# How many segments per label do you want to analyze?
no_segments = len(labels) # complete data

In [34]:
indices_dict = {}

for label in list(set(labels)): 
    indices = [index for index, value in enumerate(labels) if value == label][:no_segments]
    indices_dict[label] = indices

In [35]:
def segment_data(df, segment_size, step_size = 2):
    n_segments = int(df["Time"].iloc[-1]) // segment_size
    eeg_segments = []
    emg_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size*1000/step_size)
        end_idx = start_idx + int(segment_size*1000/step_size)
        segment = df.iloc[start_idx:end_idx]
        eeg_segments.append(list(segment["EEG"]))
        emg_segments.append(list(segment["EMG"]))

    return eeg_segments, emg_segments

In [36]:
# Segment the data
segment_length = 4  # seconds
eeg_segments, emg_segments = segment_data(data, segment_length, step_size = 2)

## Finding the optimal embedding dimension and time delay

There are two techniques that can be used to determine these parameters automatically:
- Mutual information to determine the time delay
- False nearest neighbours to determine the embedding dimension

In [37]:
# Initialise the embedding

max_embedding_dimension = 30
max_time_delay = 30
stride = 5

embedder = SingleTakensEmbedding(
    parameters_type="search",
    time_delay=max_time_delay,
    dimension=max_embedding_dimension,
    stride=stride,
)

In [38]:
def fit_embedder(embedder: SingleTakensEmbedding, y: np.ndarray, verbose: bool=True) -> np.ndarray:
    """Fits a Takens embedder and displays optimal search parameters."""
    y_embedded = embedder.fit_transform(y)

    if verbose:
        print(f"Shape of embedded time series: {y_embedded.shape}")
        print(
            f"Optimal embedding dimension is {embedder.dimension_} and time delay is {embedder.time_delay_}"
        )

    return y_embedded

In [39]:
# Look at some random segments
y_embedded = fit_embedder(embedder, eeg_segments[0])
y_embedded = fit_embedder(embedder, eeg_segments[100])
y_embedded = fit_embedder(embedder, eeg_segments[177])
y_embedded = fit_embedder(embedder, eeg_segments[1000])
# The optimal values are all similar (=> Just use embedding dimension 5 and time delay 25)

Shape of embedded time series: (380, 5)
Optimal embedding dimension is 5 and time delay is 26
Shape of embedded time series: (383, 4)
Optimal embedding dimension is 4 and time delay is 29
Shape of embedded time series: (383, 5)
Optimal embedding dimension is 5 and time delay is 22
Shape of embedded time series: (373, 6)
Optimal embedding dimension is 6 and time delay is 27


## Creating Persistence Diagrams

In [40]:
# Setting parameters for point cloud embeddings

embedding_dimension= 5
embedding_time_delay = 25
stride = 10

embedder_periodic = SingleTakensEmbedding(
    parameters_type="fixed",
    n_jobs=2,
    time_delay=embedding_time_delay,
    dimension=embedding_dimension,
    stride=stride,
)

In [41]:
# We will look at 0, 1 and 2 dimensional holes TODO try more?
homology_dimensions = [0, 1, 2]

# We will use a Vietoris Rips filtrations
persistence = VietorisRipsPersistence(
    homology_dimensions=homology_dimensions, n_jobs=10
)

### Computing Points Clouds and Persistence Diagrams

In [42]:
# Label 1

# Point cloud embeddings
y_embedded1 = {} 

# Persistence diagrams
diagrams1 = {}

# Loop through the first segments with label '1'
for label_idx in indices_dict[1]:
    y_embedded1[label_idx] = embedder_periodic.fit_transform(eeg_segments[label_idx])[None, :, :]
    diagrams1[label_idx] = persistence.fit_transform(y_embedded1[label_idx])

In [43]:
# Label 3

# Point cloud embeddings
y_embedded3 = {} 

# Persistence diagrams
diagrams3 = {}

# Loop through the first segments with label '3'
for label_idx in indices_dict[3]:
    y_embedded3[label_idx] = embedder_periodic.fit_transform(eeg_segments[label_idx])[None, :, :]
    diagrams3[label_idx] = persistence.fit_transform(y_embedded3[label_idx])

In [44]:
# Label 5

# Point cloud embeddings
y_embedded5 = {} 

# Persistence diagrams
diagrams5 = {}

# Loop through the first segments with label '5'
for label_idx in indices_dict[5]:
    y_embedded5[label_idx] = embedder_periodic.fit_transform(eeg_segments[label_idx])[None, :, :]
    diagrams5[label_idx] = persistence.fit_transform(y_embedded5[label_idx])

In [None]:
# Label 7

# Point cloud embeddings
y_embedded7 = {} 

# Persistence diagrams
diagrams7 = {}

# Loop through the first segments with label '1'
for label_idx in indices_dict[7]:
    y_embedded7[label_idx] = embedder_periodic.fit_transform(eeg_segments[label_idx])[None, :, :]
    diagrams7[label_idx] = persistence.fit_transform(y_embedded7[label_idx])

## Save persistence diagrams

In [45]:
persistence_diagrams1 = [row[0] for row in diagrams1.values()]
persistence_diagrams3 = [row[0] for row in diagrams3.values()]
persistence_diagrams5 = [row[0] for row in diagrams5.values()]

In [46]:
np.save('PD1.npy', np.array(persistence_diagrams1, dtype=object), allow_pickle=True)
np.save('PD3.npy', np.array(persistence_diagrams3, dtype=object), allow_pickle=True)
np.save('PD5.npy', np.array(persistence_diagrams5, dtype=object), allow_pickle=True)