In [2]:
import numpy as np
import pandas as pd
import h5py
import numpy as np
from sklearn.decomposition import PCA, FastICA
from sklearn.mixture import GaussianMixture


# Choose EEG or EMG

In [3]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG" # Does not have an effect yet, will be added later when processing anesthesia data
#data_type = "EMG"

In [4]:
# choose individuum
subject = "m294"

# Import Data

In [5]:
label_list = [0, 1, 2, 3, 4]

data_type_list = ["EEG", "EMG"]

subject_list = ["m292", "m294"]

In [6]:
# EEG & EMG data
time_series_dataframes = {}

for label in label_list:
    time_series_dataframes[label] = pd.read_csv("../Time_Series/Data/"+str(subject)+"/run0"+str(label)+"/Time_Series_Data.csv")

In [7]:
brain_imaging_dataframes = {}

for label in label_list:
    filename = "../Brain_Imaging/Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    brain_imaging_dataframes[label] = file['Data']

In [8]:
# Segment EEG & EMG data into segments of the same "length" that one brain imaging picture has

def segment_data(df, segment_size, step_size = 2):
    """
    Segments time-series data into EEG and EMG segments.

    Parameters:
    - df (DataFrame): The input dataframe containing the columns "Time", "EEG" and "EMG".
    - segment_size (float): The desired size of each segment in seconds.
    - step_size (float, optional): The step size of "Time" in milliseconds. Default is 2 millisecond.

    Returns:
    Tuple of two lists:
    - List of EEG segments.
    - List of EMG segments.
    """

    n_segments = int(df["time"].iloc[-1]) // segment_size
    eeg_segments = []
    emg_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size*1000/step_size)
        end_idx = start_idx + int(segment_size*1000/step_size)
        segment = df.iloc[start_idx:end_idx]
        eeg_segments.append(list(segment["voltage"]))
        emg_segments.append(list(segment["emg"]))

    return eeg_segments, emg_segments

In [9]:
segment_size = 4
eeg_segments = {}


for label in label_list:
    eeg_segments[label],_ = segment_data(time_series_dataframes[label], segment_size)

In [11]:
# Segment the data
segment_size = 80
brain_imaging_segments = {}

def segment_brain_imaging_data(df, segment_size):

    n_segments = 75
    brain_imaging_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size)
        end_idx = start_idx + int(segment_size)
        segment = df[start_idx:end_idx]

        brain_imaging_segments.append(list(segment))

    return brain_imaging_segments

for label in label_list:
    segments = segment_brain_imaging_data(brain_imaging_dataframes[label], segment_size) 
     # Remove the last segment for each label
    segments = segments[:-1]
    # Save in dictionary
    brain_imaging_segments[label] = segments

## Joint ICA

## First try with test data

In [None]:
import numpy as np
from sklearn.decomposition import FastICA

np.random.seed(0)
n_samples = 200
n_features_x = 64  # Number of EEG channels
n_features_y = 100  # Number of video features

x = np.random.rand(n_samples, n_features_x)
y = np.random.rand(n_samples, n_features_y)

# Apply Joint Independent Component Analysis (jICA) for early fusion
# Assuming x represents EEG data and y represents 2D video data
def joint_ICA(X, Y):
    # Concatenate the EEG and video data
    data = np.concatenate((X, Y), axis=1)
    
    # Apply FastICA for joint independent component analysis
    transformer = FastICA(n_components=10, random_state=0)
    joint_components = transformer.fit_transform(data)
    
    # Separate the joint components back into EEG and video components
    EEG_components = joint_components[:, :n_features_x]
    video_components = joint_components[:, n_features_x:]
    
    return joint_components, EEG_components, video_components

# Apply joint ICA to the EEG and video data
joint_components, EEG_components, video_components = joint_ICA(x, y)

In this code snippet:

- We generate sample EEG data (x) and 2D video data (y) for demonstration purposes.
- The joint_ICA function performs Joint Independent Component Analysis (jICA) on the concatenated EEG and video data.
- The resulting components are separated back into EEG components and video components.

In [12]:
# Print the shape of the extracted components
print("Shape of joint components:", joint_components.shape)
print("Shape of EEG components:", EEG_components.shape)
print("Shape of video components:", video_components.shape)

Shape of joint components: (200, 10)
Shape of EEG components: (200, 10)
Shape of video components: (200, 0)


## Experiments with test data with the correct dimensions

Now we apply the same function to our actual data.

In [None]:
np.array(eeg_segments[0]).shape

In [None]:
np.array(brain_imaging_segments[0]).shape

These different shapes will lead to problems, so we should reshape or use dimensionality reduction.

### Get first functions running

#### Approach 1: Joint_ICA function with dimensionality reduction for both EEG and imaging segments

Both functions do still not converge now?

In [12]:
def joint_ICA_with_dim_reduction(EEG_segments, imaging_segments):
    n_segments = EEG_segments.shape[0]
    n_features = min(EEG_segments.shape[1], imaging_segments.shape[1] * imaging_segments.shape[2] * imaging_segments.shape[3])
    
    # Apply PCA for dimensionality reduction on EEG segments
    pca_EEG = PCA(n_components=n_segments)
    EEG_segments_pca = pca_EEG.fit_transform(EEG_segments)
    
    # Reshape and apply PCA for dimensionality reduction to the imaging segments
    n_samples, n_x, n_y, n_z = imaging_segments.shape
    imaging_segments_reshaped = imaging_segments.reshape(n_samples, n_x * n_y * n_z)
    pca_imaging = PCA(n_components=n_segments)
    imaging_segments_pca = pca_imaging.fit_transform(imaging_segments_reshaped)
    
    # Concatenate the reduced EEG and imaging segments
    data_segments = np.concatenate((EEG_segments_pca, imaging_segments_pca), axis=1)
    
    # Apply FastICA for joint independent component analysis on segments
    # Adapt ICA parameters because previously it did not converge
    transformer = FastICA(n_components=10, random_state=0, tol=0.0001, max_iter=2000, algorithm='parallel')
    joint_components = transformer.fit_transform(data_segments)
    
    return joint_components

# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Apply joint ICA to the EEG and video data
joint_components_with_dim_reduction = joint_ICA_with_dim_reduction(EEG_segments, imaging_segments)



#### Approach 2: Joint_ICA function with reshaping for EEG and imaging segments

Does this approach converge now with the new parameters?

In [10]:
def joint_ICA_with_reshaping(EEG_segments, imaging_segments):
    n_segments = EEG_segments.shape[0]
    n_features_EEG = EEG_segments.shape[1]
    
    # Reshape the EEG segments to have a 2D shape
    EEG_segments_reshaped = EEG_segments.reshape(n_segments, n_features_EEG)
    
    # Reshape the imaging segments to 2D
    n_samples, n_x, n_y, n_z = imaging_segments.shape
    imaging_segments_reshaped = imaging_segments.reshape(n_samples, n_x * n_y * n_z)
    
    # Concatenate EEG and imaging segments
    data_segments = np.concatenate((EEG_segments_reshaped, imaging_segments_reshaped), axis=1)
    
    # Apply FastICA for joint independent component analysis on segments with adjusted parameters
    # Adapt ICA parameters because previously it did not converge
    transformer = FastICA(n_components=10, random_state=0, tol=0.0001, max_iter=2000, algorithm='parallel')
    joint_components = transformer.fit_transform(data_segments)

    return joint_components


# EEG_segments shape: (74, 2000), imaging_segments shape: (74, 80, 300, 260)
EEG_segments = np.random.rand(74, 2000)
imaging_segments = np.random.rand(74, 80, 300, 260)

# Apply joint ICA to the EEG and video data
joint_components_with_reshaping = joint_ICA_with_reshaping(EEG_segments, imaging_segments)

Next steps: Experiment with parameters (n_components, tol, max_iter) to ensure convergence and be able to minimize the reconstruction error.

### Reconstruction the original data with a generative model

For reconstruction, we need to use a generative model.

#### For the dimensionality reduction approach

In [14]:
# Fit Gaussian Mixture Models for EEG and brain imaging data reconstruction
gmm_EEG = GaussianMixture(n_components=10, random_state=0)
gmm_EEG.fit(joint_components_with_dim_reduction)

gmm_imaging = GaussianMixture(n_components=10, random_state=0)
gmm_imaging.fit(joint_components_with_dim_reduction)

In [None]:
# Generate samples from the GMMs for EEG and brain imaging data reconstruction
EEG_reconstructed = gmm_EEG.sample(74)[0]

# Generate imaging samples directly with the desired shape
imaging_samples = gmm_imaging.sample((74, 80, 300, 260))[0]

# Reshape the imaging samples to match the desired shape
imaging_reconstructed = imaging_samples.reshape(74, 80, 300, 260)

# Print the shapes of the reconstructed data
print("Shape of EEG_reconstructed:", EEG_reconstructed.shape)
print("Shape of imaging_reconstructed:", imaging_reconstructed.shape)
