In [1]:
import numpy as np
import pandas as pd
import h5py
import numpy as np
from sklearn.decomposition import PCA, FastICA
from sklearn.mixture import GaussianMixture
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
import random

# Choose EEG or EMG

In [2]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG" # Does not have an effect yet, will be added later when processing anesthesia data
#data_type = "EMG"

In [3]:
# choose individuum
subject = "m292"

In [4]:
# Number of components for joint ICA

hidden_dim = 10

# Import Data

In [5]:
label_list = [0, 1, 2, 3, 4]

data_type_list = ["EEG", "EMG"]

subject_list = ["m292", "m294"]

In [6]:
# EEG & EMG data
time_series_dataframes = {}

for label in label_list:
    time_series_dataframes[label] = pd.read_csv("../Time_Series/Data/"+str(subject)+"/run0"+str(label)+"/Time_Series_Data.csv")

In [7]:
brain_imaging_dataframes = {}

for label in label_list:
    filename = "../Brain_Imaging/Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    brain_imaging_dataframes[label] = file['Data']

In [8]:
# Segment EEG & EMG data into segments of the same "length" that one brain imaging picture has

def segment_data(df, segment_size, step_size = 2):
    """
    Segments time-series data into EEG and EMG segments.

    Parameters:
    - df (DataFrame): The input dataframe containing the columns "Time", "EEG" and "EMG".
    - segment_size (float): The desired size of each segment in seconds.
    - step_size (float, optional): The step size of "Time" in milliseconds. Default is 2 millisecond.

    Returns:
    Tuple of two lists:
    - List of EEG segments.
    - List of EMG segments.
    """

    n_segments = int(df["time"].iloc[-1]) // segment_size
    eeg_segments = []
    emg_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size*1000/step_size)
        end_idx = start_idx + int(segment_size*1000/step_size)
        segment = df.iloc[start_idx:end_idx]
        eeg_segments.append(list(segment["voltage"]))
        emg_segments.append(list(segment["emg"]))

    return eeg_segments, emg_segments

In [9]:
segment_size = 4
eeg_segments = {}


for label in label_list:
    eeg_segments[label],_ = segment_data(time_series_dataframes[label], segment_size)

In [10]:
# Segment the data
segment_size = 80
brain_imaging_segments = {}

def segment_brain_imaging_data(df, segment_size):

    n_segments = 75
    brain_imaging_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size)
        end_idx = start_idx + int(segment_size)
        segment = df[start_idx:end_idx]

        brain_imaging_segments.append(list(segment))

    return brain_imaging_segments

for label in label_list:
    segments = segment_brain_imaging_data(brain_imaging_dataframes[label], segment_size) 
     # Remove the last segment for each label
    segments = segments[:-1]
    # Save in dictionary
    brain_imaging_segments[label] = segments

# Data Fusion

## Approach 1: Joint_ICA function with dimensionality reduction for both EEG and imaging segments

Both functions do still not converge now?

In [11]:
def joint_ICA_with_dim_reduction(EEG_segments, imaging_segments, hidden_dim):
    n_segments = EEG_segments.shape[0]
    n_features = min(EEG_segments.shape[1], imaging_segments.shape[1] * imaging_segments.shape[2] * imaging_segments.shape[3])
    
    # Apply PCA for dimensionality reduction on EEG segments
    pca_EEG = PCA(n_components=n_segments)
    EEG_segments_pca = pca_EEG.fit_transform(EEG_segments)
    
    # Reshape and apply PCA for dimensionality reduction to the imaging segments
    n_samples, n_x, n_y, n_z = imaging_segments.shape
    imaging_segments_reshaped = imaging_segments.reshape(n_samples, n_x * n_y * n_z)
    pca_imaging = PCA(n_components=n_segments)
    imaging_segments_pca = pca_imaging.fit_transform(imaging_segments_reshaped)
    
    # Concatenate the reduced EEG and imaging segments
    data_segments = np.concatenate((EEG_segments_pca, imaging_segments_pca), axis=1)
    
    # Apply FastICA for joint independent component analysis on segments
    # Adapt ICA parameters because previously it did not converge
    transformer = FastICA(n_components=hidden_dim, random_state=0, tol=0.0001, max_iter=2000, algorithm='parallel')
    joint_components = transformer.fit_transform(data_segments)
    
    return joint_components

#### Test on test data ####

# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Apply joint ICA to the test EEG and video data
#joint_components_with_dim_reduction = joint_ICA_with_dim_reduction(EEG_segments, imaging_segments, hidden_dim)

We will use this approach for now. It converges with the new parameters.

In [None]:
# Define dictionary with labels as keys and joint components as values

joint_components_dict = {}

for label in label_list:
    joint_components_dict[label] = joint_ICA_with_dim_reduction(np.array(eeg_segments[label]), np.array(brain_imaging_segments[label]), hidden_dim)

## Approach 2: Joint_ICA function with reshaping for EEG and imaging segments

Does this approach converge now with the new parameters?

In [None]:
def joint_ICA_with_reshaping(EEG_segments, imaging_segments):
    n_segments = EEG_segments.shape[0]
    n_features_EEG = EEG_segments.shape[1]
    
    # Reshape the EEG segments to have a 2D shape
    EEG_segments_reshaped = EEG_segments.reshape(n_segments, n_features_EEG)
    
    # Reshape the imaging segments to 2D
    n_samples, n_x, n_y, n_z = imaging_segments.shape
    imaging_segments_reshaped = imaging_segments.reshape(n_samples, n_x * n_y * n_z)
    
    # Concatenate EEG and imaging segments
    data_segments = np.concatenate((EEG_segments_reshaped, imaging_segments_reshaped), axis=1)
    
    # Apply FastICA for joint independent component analysis on segments with adjusted parameters
    # Adapt ICA parameters because previously it did not converge
    transformer = FastICA(n_components=10, random_state=0, tol=0.0001, max_iter=2000, algorithm='parallel')
    joint_components = transformer.fit_transform(data_segments)

    return joint_components

#### Test on test data ####

# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Apply joint ICA to the test EEG and video data
#joint_components_with_reshaping = joint_ICA_with_reshaping(EEG_segments, imaging_segments)

Next steps: Experiment with parameters (n_components, tol, max_iter) to ensure convergence and be able to minimize the reconstruction error.

## Approach 3: Simple Multimodal linear model

Generative model as described in the paper. Does not work yet!

In [25]:
"""
# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Reshape imaging_segments to (74, 80*300*260) for linear decoding
imaging_segments_flat = imaging_segments.reshape(74, -1)

# Concatenate EEG and flattened imaging data for joint linear decoding
joint_data = np.concatenate((EEG_segments, imaging_segments_flat), axis=1)

# Perform linear decoding to estimate neural sources
# Assuming W_x is the spatial extraction filter matrix
W_x = np.linalg.pinv(joint_data)  # Pseudo-inverse of joint data as decoding matrix

# Estimate neural sources
estimated_sources = np.dot(joint_data, W_x)

# Extract EEG and imaging source estimates
estimated_EEG_sources = estimated_sources[:, :2000]
estimated_imaging_sources_flat = estimated_sources[:, 2000:]
"""

'\n# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)\nEEG_segments = np.random.rand(74, 2000)\nimaging_segments = np.random.rand(74, 80, 300, 260)\n\n# Reshape imaging_segments to (74, 80*300*260) for linear decoding\nimaging_segments_flat = imaging_segments.reshape(74, -1)\n\n# Concatenate EEG and flattened imaging data for joint linear decoding\njoint_data = np.concatenate((EEG_segments, imaging_segments_flat), axis=1)\n\n# Perform linear decoding to estimate neural sources\n# Assuming W_x is the spatial extraction filter matrix\nW_x = np.linalg.pinv(joint_data)  # Pseudo-inverse of joint data as decoding matrix\n\n# Estimate neural sources\nestimated_sources = np.dot(joint_data, W_x)\n\n# Extract EEG and imaging source estimates\nestimated_EEG_sources = estimated_sources[:, :2000]\nestimated_imaging_sources_flat = estimated_sources[:, 2000:]\n'

# Computing Persistence Diagrams from the joint components

## Compute and Save Persistence Diagrams

In [None]:
# We will look at 0, 1 and 2 dimensional holes
homology_dimensions = [0, 1, 2]

def compute_persistence_diagrams(dataframes, label_list):

    radial_filtration = RadialFiltration(center=np.array([150, 150]))

    persistence_diagrams = {}
    
    for label in label_list:
        persistence_diagrams[label] = []
        for image in dataframes[label]:#[:100]: # Cut here if you do not want to use the complete data
            filtration = radial_filtration.fit_transform([image])
            persistence = VietorisRipsPersistence(
            homology_dimensions=[0, 1, 2], n_jobs=10
            )
            diagram = persistence.fit_transform(filtration)
            persistence_diagrams[label].append(diagram[0])


    return persistence_diagrams


persistence_diagrams = compute_persistence_diagrams(joint_components_dict, label_list)

In [None]:
np.save("Embeddings_and_Persistence_Diagrams/"+str(subject)+"/Persistence_Diagrams_Hidden_Dim"+str(hidden_dim)+".npy", np.array(persistence_diagrams, dtype=object), allow_pickle=True)

In [None]:
# Persistence diagrams
np.save('Embeddings_and_Persistence_Diagrams/'+str(subject)+'/Persistence_Diagrams_All_Labels_with_Hidden_Dim_'+str(hidden_dim)+'.npy', \
            np.array(persistence_diagrams, dtype=object), allow_pickle=True)