In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
from sklearn.utils import shuffle
import matplotlib.pyplot as plt


In [6]:
subject = "m292"

label_list = [0, 1, 2, 3, 4]

In [7]:
imaging_train_feature_df = pd.read_csv("Brain_Imaging/Features/"+str(subject)+"/Topological_Summary_Statistics.csv")
#imaging_train_feature_df = pd.read_csv("Brain_Imaging/Features/"+str(subject)+"/Advanced_Features.csv")

In [8]:
# Function to remove the last row for each label (because there is slightly more data for the brain imaging modality

def remove_last_row(group):
    return group.iloc[:-1]

# Apply the function to each group
imaging_train_feature_df = imaging_train_feature_df.groupby('Label').apply(remove_last_row).reset_index(drop=True)

In [9]:
ts_train_feature_df = pd.read_csv("Time_Series/Features/"+str(subject)+"/EEG/Topological_Summary_Statistics.csv")
#ts_train_feature_df = pd.read_csv("Time_Series/Features/"+str(subject)+"/EEG/Advanced_Features.csv")

# Directly correlate statistics

In [12]:
feature = "Amplitude_Dim_0"

In [13]:
np.corrcoef(ts_train_feature_df["EEG_"+feature], imaging_train_feature_df[feature])

array([[ 1.        , -0.45882338],
       [-0.45882338,  1.        ]])

# Correlation between Original Time Series Statistics and Basic Summary Statistics

In [21]:
feature = "Amplitude_Dim_0"

In [22]:
feature_df = ts_train_feature_df

ts_feature = "EEG_"+feature

list_of_basic_summary_statistics_for_segments = []

explored_feature = []
for label in label_list:
    list_of_basic_summary_statistics_for_segments.append(feature_df[ts_feature][feature_df["Label"] == label])


In [23]:
feature_df = imaging_train_feature_df

list_of_basic_summary_statistics_for_segments = []


for label in label_list:
    list_of_basic_summary_statistics_for_segments.append(feature_df[feature][feature_df["Label"] == label])


In [24]:
data = {}

data[subject] = np.load('Time_Series/Data/'+str(subject)+'/Data.npy', allow_pickle=True).item()

In [25]:
def segment_data(df, segment_size, step_size = 2):
    """
    Segments time-series data into EEG and EMG segments.

    Parameters:
    - df (DataFrame): The input dataframe containing the columns "Time", "EEG" and "EMG".
    - segment_size (float): The desired size of each segment in seconds.
    - step_size (float, optional): The step size of "Time" in milliseconds. Default is 2 millisecond.

    Returns:
    Tuple of two lists:
    - List of EEG segments.
    - List of EMG segments.
    """

    n_segments = int(df["time"].iloc[-1]) // segment_size
    eeg_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size*1000/step_size)
        end_idx = start_idx + int(segment_size*1000/step_size)
        segment = df.iloc[start_idx:end_idx]
        eeg_segments.append(list(segment["voltage"]))

    return eeg_segments

In [26]:
def calculate_means(data):
    # Ensure the input is a list of lists
    if not all(isinstance(sublist, list) for sublist in data):
        raise ValueError("Input must be a list of lists.")
    
    # Calculate the mean of each sublist
    means = [sum(np.abs(sublist)) / len(sublist) for sublist in data]
    
    return means


def calculate_variances_of_amplitude(data):
    # Ensure the input is a list of lists
    if not all(isinstance(sublist, list) for sublist in data):
        raise ValueError("Input must be a list of lists.")
    
    # Calculate the mean of each sublist
    variances = [np.var(np.abs(sublist)) for sublist in data]
    
    return variances



In [27]:
# Segment the data
segment_size = 4  # seconds
eeg_segments_amplitudes = []
eeg_segments_amplitude_variances = []


for label in label_list:
    eeg_segments_amplitudes.extend(calculate_means(segment_data(data["m292"][label], segment_size, step_size = 2)))
    eeg_segments_amplitude_variances.extend(calculate_variances_of_amplitude(segment_data(data["m292"][label], segment_size, step_size = 2)))

In [30]:
# Reshapeexplored_feature
list_of_basic_summary_statistics_for_segments = np.array(list_of_basic_summary_statistics_for_segments).flatten()

In [31]:
np.corrcoef(eeg_segments_amplitudes, list_of_basic_summary_statistics_for_segments)

array([[1.        , 0.42066623],
       [0.42066623, 1.        ]])

In [32]:
np.corrcoef(eeg_segments_amplitude_variances, list_of_basic_summary_statistics_for_segments)

array([[ 1.        , -0.48196238],
       [-0.48196238,  1.        ]])