In [12]:
import numpy as np
import pandas as pd
import h5py
import numpy as np
from sklearn.decomposition import PCA, FastICA
from sklearn.mixture import GaussianMixture
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension


# Choose EEG or EMG

In [2]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG" # Does not have an effect yet, will be added later when processing anesthesia data
#data_type = "EMG"

In [3]:
# choose individuum
subject = "m294"

# Import Data

In [4]:
label_list = [0, 1, 2, 3, 4]

data_type_list = ["EEG", "EMG"]

subject_list = ["m292", "m294"]

In [5]:
# EEG & EMG data
time_series_dataframes = {}

for label in label_list:
    time_series_dataframes[label] = pd.read_csv("../Time_Series/Data/"+str(subject)+"/run0"+str(label)+"/Time_Series_Data.csv")

In [6]:
brain_imaging_dataframes = {}

for label in label_list:
    filename = "../Brain_Imaging/Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    brain_imaging_dataframes[label] = file['Data']

In [7]:
# Segment EEG & EMG data into segments of the same "length" that one brain imaging picture has

def segment_data(df, segment_size, step_size = 2):
    """
    Segments time-series data into EEG and EMG segments.

    Parameters:
    - df (DataFrame): The input dataframe containing the columns "Time", "EEG" and "EMG".
    - segment_size (float): The desired size of each segment in seconds.
    - step_size (float, optional): The step size of "Time" in milliseconds. Default is 2 millisecond.

    Returns:
    Tuple of two lists:
    - List of EEG segments.
    - List of EMG segments.
    """

    n_segments = int(df["time"].iloc[-1]) // segment_size
    eeg_segments = []
    emg_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size*1000/step_size)
        end_idx = start_idx + int(segment_size*1000/step_size)
        segment = df.iloc[start_idx:end_idx]
        eeg_segments.append(list(segment["voltage"]))
        emg_segments.append(list(segment["emg"]))

    return eeg_segments, emg_segments

In [8]:
segment_size = 4
eeg_segments = {}


for label in label_list:
    eeg_segments[label],_ = segment_data(time_series_dataframes[label], segment_size)

In [9]:
# Segment the data
segment_size = 80
brain_imaging_segments = {}

def segment_brain_imaging_data(df, segment_size):

    n_segments = 75
    brain_imaging_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size)
        end_idx = start_idx + int(segment_size)
        segment = df[start_idx:end_idx]

        brain_imaging_segments.append(list(segment))

    return brain_imaging_segments

for label in label_list:
    segments = segment_brain_imaging_data(brain_imaging_dataframes[label], segment_size) 
     # Remove the last segment for each label
    segments = segments[:-1]
    # Save in dictionary
    brain_imaging_segments[label] = segments

# Data Fusion

Joint ICA + Reconstruction with Generative Model.

## First try of Joint ICA with test data

In [None]:
import numpy as np
from sklearn.decomposition import FastICA

np.random.seed(0)
n_samples = 200
n_features_x = 64  # Number of EEG channels
n_features_y = 100  # Number of video features

x = np.random.rand(n_samples, n_features_x)
y = np.random.rand(n_samples, n_features_y)

# Apply Joint Independent Component Analysis (jICA) for early fusion
# Assuming x represents EEG data and y represents 2D video data
def joint_ICA(X, Y):
    # Concatenate the EEG and video data
    data = np.concatenate((X, Y), axis=1)
    
    # Apply FastICA for joint independent component analysis
    transformer = FastICA(n_components=10, random_state=0)
    joint_components = transformer.fit_transform(data)
    
    # Separate the joint components back into EEG and video components
    EEG_components = joint_components[:, :n_features_x]
    video_components = joint_components[:, n_features_x:]
    
    return joint_components, EEG_components, video_components

# Apply joint ICA to the EEG and video data
joint_components, EEG_components, video_components = joint_ICA(x, y)

In this code snippet:

- We generate sample EEG data (x) and 2D video data (y) for demonstration purposes.
- The joint_ICA function performs Joint Independent Component Analysis (jICA) on the concatenated EEG and video data.
- The resulting components are separated back into EEG components and video components.

In [12]:
# Print the shape of the extracted components
print("Shape of joint components:", joint_components.shape)
print("Shape of EEG components:", EEG_components.shape)
print("Shape of video components:", video_components.shape)

Shape of joint components: (200, 10)
Shape of EEG components: (200, 10)
Shape of video components: (200, 0)


## Trying Data Fusion with test data with the correct dimensions

Now we apply the same function to our actual data.

In [14]:
np.array(eeg_segments[0]).shape

(74, 2000)

In [15]:
np.array(brain_imaging_segments[0]).shape

(74, 80, 300, 260)

These different shapes will lead to problems, so we should reshape or use dimensionality reduction.

### Get first functions running

#### Approach 1: Joint_ICA function with dimensionality reduction for both EEG and imaging segments

Both functions do still not converge now?

In [10]:
def joint_ICA_with_dim_reduction(EEG_segments, imaging_segments):
    n_segments = EEG_segments.shape[0]
    n_features = min(EEG_segments.shape[1], imaging_segments.shape[1] * imaging_segments.shape[2] * imaging_segments.shape[3])
    
    # Apply PCA for dimensionality reduction on EEG segments
    pca_EEG = PCA(n_components=n_segments)
    EEG_segments_pca = pca_EEG.fit_transform(EEG_segments)
    
    # Reshape and apply PCA for dimensionality reduction to the imaging segments
    n_samples, n_x, n_y, n_z = imaging_segments.shape
    imaging_segments_reshaped = imaging_segments.reshape(n_samples, n_x * n_y * n_z)
    pca_imaging = PCA(n_components=n_segments)
    imaging_segments_pca = pca_imaging.fit_transform(imaging_segments_reshaped)
    
    # Concatenate the reduced EEG and imaging segments
    data_segments = np.concatenate((EEG_segments_pca, imaging_segments_pca), axis=1)
    
    # Apply FastICA for joint independent component analysis on segments
    # Adapt ICA parameters because previously it did not converge
    transformer = FastICA(n_components=10, random_state=0, tol=0.0001, max_iter=2000, algorithm='parallel')
    joint_components = transformer.fit_transform(data_segments)
    
    return joint_components

# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Apply joint ICA to the EEG and video data
joint_components_with_dim_reduction = joint_ICA_with_dim_reduction(EEG_segments, imaging_segments)

#### Approach 2: Joint_ICA function with reshaping for EEG and imaging segments

Does this approach converge now with the new parameters?

In [None]:
def joint_ICA_with_reshaping(EEG_segments, imaging_segments):
    n_segments = EEG_segments.shape[0]
    n_features_EEG = EEG_segments.shape[1]
    
    # Reshape the EEG segments to have a 2D shape
    EEG_segments_reshaped = EEG_segments.reshape(n_segments, n_features_EEG)
    
    # Reshape the imaging segments to 2D
    n_samples, n_x, n_y, n_z = imaging_segments.shape
    imaging_segments_reshaped = imaging_segments.reshape(n_samples, n_x * n_y * n_z)
    
    # Concatenate EEG and imaging segments
    data_segments = np.concatenate((EEG_segments_reshaped, imaging_segments_reshaped), axis=1)
    
    # Apply FastICA for joint independent component analysis on segments with adjusted parameters
    # Adapt ICA parameters because previously it did not converge
    transformer = FastICA(n_components=10, random_state=0, tol=0.0001, max_iter=2000, algorithm='parallel')
    joint_components = transformer.fit_transform(data_segments)

    return joint_components

# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Apply joint ICA to the EEG and video data
joint_components_with_reshaping = joint_ICA_with_reshaping(EEG_segments, imaging_segments)

Next steps: Experiment with parameters (n_components, tol, max_iter) to ensure convergence and be able to minimize the reconstruction error.

#### Approach 3: Simple Multimodal linear model

Generative model as described in the paper. Does not work yet!

In [25]:
"""
# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Reshape imaging_segments to (74, 80*300*260) for linear decoding
imaging_segments_flat = imaging_segments.reshape(74, -1)

# Concatenate EEG and flattened imaging data for joint linear decoding
joint_data = np.concatenate((EEG_segments, imaging_segments_flat), axis=1)

# Perform linear decoding to estimate neural sources
# Assuming W_x is the spatial extraction filter matrix
W_x = np.linalg.pinv(joint_data)  # Pseudo-inverse of joint data as decoding matrix

# Estimate neural sources
estimated_sources = np.dot(joint_data, W_x)

# Extract EEG and imaging source estimates
estimated_EEG_sources = estimated_sources[:, :2000]
estimated_imaging_sources_flat = estimated_sources[:, 2000:]
"""

'\n# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)\nEEG_segments = np.random.rand(74, 2000)\nimaging_segments = np.random.rand(74, 80, 300, 260)\n\n# Reshape imaging_segments to (74, 80*300*260) for linear decoding\nimaging_segments_flat = imaging_segments.reshape(74, -1)\n\n# Concatenate EEG and flattened imaging data for joint linear decoding\njoint_data = np.concatenate((EEG_segments, imaging_segments_flat), axis=1)\n\n# Perform linear decoding to estimate neural sources\n# Assuming W_x is the spatial extraction filter matrix\nW_x = np.linalg.pinv(joint_data)  # Pseudo-inverse of joint data as decoding matrix\n\n# Estimate neural sources\nestimated_sources = np.dot(joint_data, W_x)\n\n# Extract EEG and imaging source estimates\nestimated_EEG_sources = estimated_sources[:, :2000]\nestimated_imaging_sources_flat = estimated_sources[:, 2000:]\n'

## Trying Data Fusion with the actual data

Only label 0 right now.

### Producing joint components with Joint ICA

In [10]:
def joint_ICA_with_dim_reduction(EEG_segments, imaging_segments):
    n_segments = EEG_segments.shape[0]
    n_features = min(EEG_segments.shape[1], imaging_segments.shape[1] * imaging_segments.shape[2] * imaging_segments.shape[3])
    
    # Apply PCA for dimensionality reduction on EEG segments
    pca_EEG = PCA(n_components=n_segments)
    EEG_segments_pca = pca_EEG.fit_transform(EEG_segments)
    
    # Reshape and apply PCA for dimensionality reduction to the imaging segments
    n_samples, n_x, n_y, n_z = imaging_segments.shape
    imaging_segments_reshaped = imaging_segments.reshape(n_samples, n_x * n_y * n_z)
    pca_imaging = PCA(n_components=n_segments)
    imaging_segments_pca = pca_imaging.fit_transform(imaging_segments_reshaped)
    
    # Concatenate the reduced EEG and imaging segments
    data_segments = np.concatenate((EEG_segments_pca, imaging_segments_pca), axis=1)
    
    # Apply FastICA for joint independent component analysis on segments
    # Adapt ICA parameters because previously it did not converge
    transformer = FastICA(n_components=10, random_state=0, tol=0.0001, max_iter=2000, algorithm='parallel')
    joint_components = transformer.fit_transform(data_segments)
    
    return joint_components

# Apply joint ICA to the EEG and video data

joint_components_dict = {}

for label in label_list:
    joint_components_dict[label] = joint_ICA_with_dim_reduction(np.array(eeg_segments[label]), np.array(brain_imaging_segments[label]))

Does Fast ICA converge? => Yes!

In [None]:
joint_components_dict = {}

#joint_components_dict[0] = joint_ICA_with_dim_reduction(np.array(eeg_segments[0]), np.array(brain_imaging_segments[0]))
#joint_components_dict[1] = joint_ICA_with_dim_reduction(np.array(eeg_segments[1]), np.array(brain_imaging_segments[1]))
#joint_components_dict[2] = joint_ICA_with_dim_reduction(np.array(eeg_segments[2]), np.array(brain_imaging_segments[2]))
#joint_components_dict[3] = joint_ICA_with_dim_reduction(np.array(eeg_segments[3]), np.array(brain_imaging_segments[3]))
joint_components_dict[4] = joint_ICA_with_dim_reduction(np.array(eeg_segments[4]), np.array(brain_imaging_segments[4]))

### Computing Persistence Diagrams from the joint components

Find optimal parameters

In [None]:
# Initialise the embedding
max_embedding_dimension = 30
max_time_delay = 30
stride = 5

embedder = SingleTakensEmbedding(
    parameters_type="search",
    time_delay=max_time_delay,
    dimension=max_embedding_dimension,
    stride=stride,
)


def find_optimal_parameters(embedder, segments, max_index, iterations = 8):
    """
    Finds (approximate) optimal embedding parameters by averaging optimal parameters of random segments.

    Parameters:
    - embedder (object): defined by SingleTakensEmbedding() or similar
    - segments (list of lists): Complete EEG/EMG segments
    - max_index (int): How many segments there are
    - iteratiors (int): How many random indices to sample

    Returns:
    Tuple of two floats:
    - Average optimal embedding dimension
    - Average optimal time delay
    """


    optimal_embeddings_dimensions = []
    optimal_time_delays = []
    
    for _ in range(iterations):
        random_index = random.randint(0, max_index)
        embedding = embedder.fit_transform(segments[random_index])
        
         # append optimal embedding dimension for this segment
        optimal_embeddings_dimensions.append(embedder.dimension_)

        # append optimal time delay for this segment
        optimal_time_delays.append(embedder.time_delay_)

        print("The optimal embedding dimension is " + str(np.mean(optimal_embeddings_dimensions)) + 
              " and the optimal time delay is " + str(np.mean(optimal_time_delays)))
        
        return int(np.mean(optimal_embeddings_dimensions)), int(np.mean(optimal_time_delays))



# Compute optimal embedding parameters

embedding_dimension, embedding_time_delay = find_optimal_parameters(embedder, joint_components_dict[0], len(all_segments), iterations = 8)

In [None]:
# Setting parameters for point cloud embeddings

#embedding_dimension= 3 # for data exploration
stride = 10

embedder = SingleTakensEmbedding(
    parameters_type="fixed",
    n_jobs=2,
    time_delay=embedding_time_delay, # computed above
    dimension=embedding_dimension, # computed above
    stride=stride,
)

Compute final persistence diagrams

In [None]:
# We will look at 0, 1 and 2 dimensional holes
homology_dimensions = [0, 1, 2]

# We will use a Vietoris Rips filtrations
persistence = VietorisRipsPersistence(
    homology_dimensions=homology_dimensions, n_jobs=10
)

In [None]:
def compute_embeddings_and_diagrams(segments, time_delay_embeddings, persistence_diagrams, all_indices_dict, label):

    time_delay_embeddings["Label_"+str(label)] = []
    persistence_diagrams["Label_"+str(label)] = []

    # Compute embeddings and diagrams for the complete data
    for diagram_idx in range(len(segments[label])):
        time_delay_embeddings["Label_"+str(label)].append(embedder.fit_transform(segments[label][diagram_idx])[None, :, :])
        persistence_diagrams["Label_"+str(label)].append(persistence.fit_transform_plot(time_delay_embeddings["Label_"+str(label)][diagram_idx]))
    
    return time_delay_embeddings, persistence_diagrams

In [None]:
# Compute embeddings and persistence diagrams for the complete data

time_delay_embeddings = {}
persistence_diagrams = {}

for label in label_list:
    time_delay_embeddings, persistence_diagrams = compute_embeddings_and_diagrams(joint_components_dict, time_delay_embeddings, persistence_diagrams, all_indices_dict, label)

In [None]:
# Persistence diagrams
np.save('Embeddings_and_Persistence_Diagrams/'+str(subject)+'/'+str(data_type)+'/Persistence_Diagrams_All_Labels.npy', \
            np.array(persistence_diagrams, dtype=object), allow_pickle=True)