# 1. Library

In [1]:
path_to_base_package = '../..'
import sys
# setting path
sys.path.append(f'{path_to_base_package}')
import mne
mne.viz.set_3d_backend('pyvistaqt')

import json
from sklearn.preprocessing import StandardScaler

import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline
from mna.utils.data_access import *

ModuleNotFoundError: No module named 'pyvistaqt'

In [None]:
output_dir = f'{path_to_base_package}/output/saved_files/pickle_files/corrected_voice_timestamp'

In [None]:
with open(f'{output_dir}/ica_epochs.pickle', 'rb') as handle:
    ica_proc_epochs = pickle.load(handle)
    
all_epochs = mne.concatenate_epochs(list(ica_proc_epochs.values()))

# get EEG data and labels from concatenated epoch objects
proc_epochs_data = all_epochs.get_data()
proc_labels = all_epochs.events[:,2]

In [None]:
# Download fsaverage files
fs_dir = fetch_fsaverage(verbose=True)
subjects_dir = os.path.dirname(fs_dir)

# The files live in:
subject = 'fsaverage'
trans = 'fsaverage'  # MNE has a built-in fsaverage transformation
'''
select the boundary element model, note that the source data has been downsampled by a factor of 5 
(i.e. ico == 5, https://mne.tools/stable/generated/mne.setup_source_space.html#mne.setup_source_space)
and the BEM has been downsampled by a factor of 5 (i.e. ico == 4, see here: https://mne.tools/stable/generated/mne.make_bem_model.html)
implications here: https://brainder.org/2016/05/31/downsampling-decimating-a-brain-surface/
'''
src_fname = os.path.join(fs_dir, 'bem', 'fsaverage-ico-5-src.fif')
bem = os.path.join(fs_dir, 'bem', 'fsaverage-5120-5120-5120-bem-sol.fif')

eeg_montage='biosemi64'
info = all_epochs.info

# Read and set the EEG electrode locations, which are already in fsaverage's
# space (MNI space) for standard_1020:
montage = mne.channels.make_standard_montage(eeg_montage)

# Check that the locations of EEG electrodes is correct with respect to MRI
#mne.viz.plot_alignment(
#    info, src=src_fname, eeg=['original', 'projected'], trans=trans,
#    show_axes=False, mri_fiducials=True, dig='fiducials')
fwd = mne.make_forward_solution(info, trans=trans, src=src_fname,
                                bem=bem, eeg=True, n_jobs=None)

In [None]:
method = "eLORETA"
snr = 3.
lambda2 = 1. / snr ** 2
cov = mne.compute_covariance(all_epochs, method='auto') # note this is not average referenced
cov.plot(all_epochs.info)
inverse_operator = mne.minimum_norm.make_inverse_operator(info, fwd, cov)
#low_motor = get_forward_results(output_dir, 'low',low_motor_sensor,inverse_operator,fwd, lambda2)
#high_motor = get_forward_results(output_dir, 'high',high_motor_sensor,inverse_operator,fwd, lambda2)

In [None]:
def get_all_tcs(output_dir, overwrite=False):
    if not overwrite and os.path.isfile(f"{output_dir}source_time_courses.pickle"):
        all_tcs = pickle.load(open(f"{output_dir}source_time_courses.pickle", 'rb'))
        return np.concatenate(all_tcs)
    else:
        bin_size = 500 # number of sources to compute in batch
        mode = 'mean'
        count = 0
        all_tcs=[]
        while count*bin_size <= len(all_epochs):
            print('bin',count)
            stcs = mne.minimum_norm.apply_inverse_epochs(all_epochs[count*bin_size:(count+1)*bin_size], inverse_operator,
                                        lambda2=1.0 / snr ** 2, verbose=False,
                                        method="eLORETA") # , pick_ori="normal"?
            tcs = mne.extract_label_time_course(stcs, rel_labels, src=fwd['src'], mode=mode,verbose=False)
            all_tcs.append(tcs)
            count += 1
        # plot the last time course
        plot_source_time_course(ltc=tcs[0], orig_stc=stcs[0], label=rel_labels[0], mode = 'mean',rel_mappings=rel_mappings)
        with open(f"{output_dir}source_time_courses.pickle", 'wb') as handle_tcs:
            pickle.dump(all_tcs, handle_tcs, protocol=pickle.HIGHEST_PROTOCOL)
    return np.concatenate(all_tcs)


In [None]:
all_tcs = get_all_tcs(output_dir_non_baseline_non_average,overwrite=False)
assert len(all_tcs) == len(all_epochs), 'the source data is not the same size as the motor_epochs, check this issue'


### Features Extraction

In [None]:
def eeg_features(df, data_type = 'processed', features = 'all', label_source = 'Steering_Wheel_Degree_Encoded', 
                 cleaned_up = False):
    
    if data_type == 'processed':
        first_electrode_column_name = "Fp1_4-8_Hz_Power"
        last_electrode_column_name = "O2_32-55_Hz_Sample_entropy"
        autoreject_column_name = "autorejected"
    elif data_type == 'raw':
        first_electrode_column_name = "Fp1_4-8_Hz_Power_raw"
        last_electrode_column_name = "O2_32-55_Hz_Sample_entropy_raw"
        autoreject_column_name = "autorejected_raw"
        
    first_electrode_idx = df.columns.get_loc(first_electrode_column_name)
    last_electrode_idx = df.columns.get_loc(last_electrode_column_name)

    # with autoreject
    valid_trial = (df[label_source].notnull()) & (df[autoreject_column_name] == False)
    
    all_eeg_features = df.iloc[:,first_electrode_idx:last_electrode_idx+1] # all features in cleaned up data
    
    if features == 'all': 
        eeg_features = all_eeg_features
    else:
        features = "|".join(map(str,features))
        eeg_features = all_eeg_features.loc[:, all_eeg_features.columns.str.contains(features)]
    
    if cleaned_up:
        return np.asarray(eeg_features[valid_trial]), np.asarray(df[label_source][valid_trial])
    else:
        return eeg_features

In [None]:
def eye_features(df, features = "pupil", label_source = 'Steering_Wheel_Degree_Encoded', cleaned_up = False):
    pupil_diameter = ['Left Pupil Diameter','Right Pupil Diameter']
    
    if features == 'pupil':
        # pupil_diameter.append(label_source)
        eye_df = df[pupil_diameter]
    else:
        eye_feature = features
        # eye_feature.append(label_source)
        eye_df = df[eye_feature]
        
    if cleaned_up:
        eye_df = eye_df.join(df[label_source]).dropna()
        return np.asarray(eye_df.iloc[:,0:-1]), np.asarray(eye_df.iloc[:,-1])
    else:
        return eye_df

In [None]:
def ecg_features(df, features = "all", label_source = 'Steering_Wheel_Degree_Encoded', cleaned_up = False):
    ecg_feature_first = df.columns.get_loc("bpm")
    ecg_feature_last = df.columns.get_loc("breathingrate")
    
    if features == 'all':
        ecg_df = df.iloc[:,ecg_feature_first:ecg_feature_last-2]
    else:
        ecg_feature = features
        # ecg_feature.append(label_source)
        ecg_df = df[ecg_feature]
    
    if cleaned_up:
        ecg_df = ecg_df.join(df[label_source]).dropna()
        return np.asarray(ecg_df.iloc[:,0:-1]), np.asarray(ecg_df.iloc[:,-1])
    else:
        return ecg_df

In [None]:
def multimodal_features(df, label_source = 'Steering_Wheel_Degree_Encoded'):
    
    all_features_list = [eeg_features(df), eye_features(df), ecg_features(df), df[label_source]]
    all_features_df = pd.concat(all_features_list, axis = 1).dropna()
    
    return np.asarray(all_features_df.iloc[:,0:-1]), np.asarray(all_features_df.iloc[:,-1])

### Features Normalization

In [None]:
def norm_features(x_train, x_test):
    
    scaler = MinMaxScaler()
    scaler.fit(x_train)
    x_train_norm = scaler.transform(x_train)
    x_test_norm = scaler.transform(x_test)
    
    return x_train_norm, x_test_norm

### EEG Features normalization

In [None]:
def feature_normalization(x_data, y_label, train_percentage=0.8):
    
    # Remove rows with invalid pupil diameter
    if sum(sum(np.isnan(x_data))) > 0:
        invalid_trial = np.argwhere(np.any(np.isnan(x_data) == True, axis=1))
        x_data_corrected = np.delete(x_data, invalid_trial, axis=0)
        y_label_corrected = np.delete(y_label, invalid_trial, axis=0)

    else:
        x_data_corrected = x_data
        y_label_corrected = y_label
    
    x_train, x_test, y_train, y_test = train_test_split(x_data_corrected, y_label_corrected, 
                                                                            train_size = train_percentage, random_state=rs)
    
    norm_data = MinMaxScaler().fit(x_train)
    x_train_norm = norm_data.transform(x_train)
    x_test_norm = norm_data.transform(x_test)

    return x_train_norm, x_test_norm, y_train, y_test


### Classification Function

In [None]:
def trial_classification(train_data, test_data, train_label, test_label, classfier,
                         data_type = None, save_plots = True, plot_fig = True):

    if classfier == 'logistic':

        logreg = LogisticRegression(solver="liblinear", penalty = 'l1', random_state=0).fit(train_data, train_label)
        score_train = logreg.decision_function(train_data)
        score_test = logreg.decision_function(test_data)

        train_pred = logreg.predict(train_data)
        test_pred = logreg.predict(test_data)
        
        importance = logreg.coef_[0]

    if classfier == 'svm':

        svm_classifer = SGDClassifier(penalty = 'l1',l1_ratio=1, random_state=0).fit(train_data, train_label)
        score_train = svm_classifer.decision_function(train_data)
        score_test = svm_classifer.decision_function(test_data)

        train_pred = svm_classifer.predict(train_data)
        test_pred = svm_classifer.predict(test_data)
        
        importance = svm_classifer.coef_[0]

    if classfier == 'knn':
        kNN = KNeighborsClassifier(n_neighbors = 3).fit(train_data, train_label)

        score_train = kNN.predict_proba(train_data)[:,1]
        score_test = kNN.predict_proba(test_data)[:,1]

        train_pred = (score_train > 0.5) + 0
        test_pred = (score_test >0.5) + 0
        
    if classfier == 'random_forest':
        random_forest = RandomForestClassifier(n_estimators=100, max_features = 'sqrt', random_state = rs, 
                                               bootstrap=True, class_weight = 'balanced_subsample', 
                                               max_samples=None, n_jobs = -1).fit(train_data, train_label)
        
        score_train = random_forest.predict_proba(train_data)[:,1]
        score_test = random_forest.predict_proba(test_data)[:,1]
        
        train_pred = random_forest.predict(train_data)
        test_pred = random_forest.predict(test_data)
        
        importance = random_forest.feature_importances_

    fpr_train, tpr_train, thresholds_train = metrics.roc_curve(train_label-1, score_train)
    AUC_train = metrics.roc_auc_score (train_label-1, score_train)

    fpr_test, tpr_test, thresholds_test = metrics.roc_curve(test_label-1, score_test)
    AUC_test = metrics.roc_auc_score (test_label-1, score_test)

    train_acc = metrics.accuracy_score(train_label,train_pred)
    test_acc = metrics.accuracy_score(test_label,test_pred)
    
    if plot_fig:
        # ROC Curve
        sns.set(font_scale=2)
        plt.style.use('seaborn-white')
        fig = plt.figure(figsize = [25,7])

        axe = fig.add_subplot(1,2,1)
        axe.plot(fpr_train,tpr_train)
        axe.set_xlabel("False Positive Rate")
        axe.set_ylabel("True Positive Rate")
        axe.set_title("Training ROC Curve")
        axe.text(0.6,0.2,"AUC = {:.2f}".format(AUC_train))

        axe = fig.add_subplot(1,2,2)
        axe.plot(fpr_test,tpr_test)
        axe.set_xlabel("False Positive Rate")
        axe.set_ylabel("True Positive Rate")
        axe.set_title("Testing ROC Curve")
        axe.text(0.6,0.2,"AUC = {:.2f}".format(AUC_test))

        if save_plots:
            plt.savefig(f"../output/classification_result/{data_type}_data_Training_Testing_ROC_Curve.png", dpi=300)

        # plt.grid(visible=False)

        # Confusion Matrix
        # fig, (ax1, ax2) = plt.subplots(1,2)

        fig_cnf = plt.figure(figsize = [20, 5])
        ax1 = fig_cnf.add_subplot(1,2,1)
        ax2 = fig_cnf.add_subplot(1,2,2)

        cnf_matrix_train = metrics.confusion_matrix(train_label, train_pred)
        cnf_matrix_test = metrics.confusion_matrix(test_label, test_pred)

        sns.heatmap(cnf_matrix_train, fmt = 'g', annot = True, xticklabels = ['Easy','Hard'], yticklabels = ['Easy','Hard'],ax=ax1)
        ax1.set_title("Training Confusion Matrix")
        sns.heatmap(cnf_matrix_test, fmt = 'g', annot = True, xticklabels = ['Easy','Hard'], yticklabels = ['Easy','Hard'],ax=ax2)
        ax2.set_title("Testing Confusion Matrix")

        if save_plots:
            plt.savefig(f"../output/classification_result/{data_type}_data_Training_Testing_Confusion_Matrix.png", dpi=300)

        # Features Importance
        fig_importance = plt.figure(figsize = [10 ,3])
        axe = fig_importance.add_subplot(1,1,1)

        markerline, stemline, baseline = axe.stem([x for x in range(len(importance))], importance, 
                                                  linefmt='k-',markerfmt='ko',basefmt='k.')
        plt.setp(stemline, linewidth = 1)
        plt.setp(markerline, markersize = 1)
        axe.set_xlabel("Feature")
        axe.set_ylabel("Importance")
        axe.set_title("Coefficient for Each Features")
        if save_plots:
            plt.savefig(f"../output/classification_result/{data_type}_data_coefficient.png")

        plt.show()
    
    return AUC_train, AUC_test, importance

### Cross validation function

In [None]:
def modality_cv(x_modality, y_modality, n_folds = 10, classifier = 'logistic'):
    
    auc_list = np.empty((2, n_folds))
    
    skf = StratifiedKFold(n_splits = n_folds, random_state=rs, shuffle=True)

    for i, (train_index, test_index) in enumerate(skf.split(x_modality, y_modality)):

        x_train_norm, x_test_norm = norm_features(x_modality[train_index], x_modality[test_index])
        train_auc, test_auc, coefs = trial_classification(x_train_norm, x_test_norm,
                                                          y_modality[train_index], y_modality[test_index],
                                                          classifier, plot_fig = False)
        auc_list[0,i] = train_auc
        auc_list[1,i] = test_auc
    
    return np.mean(auc_list, axis = 1)


### Root Mean Square Calculation

In [None]:
def calculate_rmse(df, modality, true_val_col = 'Steering_Wheel_Degree', features_list = 'all'):
    
    if modality == "EEG":
        x_modality, y_modality = eeg_features(df, features = features_list, label_source = true_val_col, 
                                              cleaned_up = True)
    if modality == "Eye":
        if features_list == 'all':
            features_list = ["Left Pupil Diameter", "Right Pupil Diameter",
                            "Left Evoked Pupil Diameter", "Right Evoked Pupil Diameter"]
        x_modality, y_modality = eye_features(df, features = features_list, label_source = true_val_col, 
                                              cleaned_up = True)
    if modality == "ECG":
        x_modality, y_modality = ecg_features(df, features = features_list, label_source = true_val_col, 
                                              cleaned_up = True)
    if modality == "All":
        x_modality, y_modality = multimodal_features(df, label_source = true_val_col)
        

    X_train, X_test, y_train, y_test = train_test_split(x_modality, y_modality, test_size=0.3, random_state=rs)
    regr.fit(X_train, y_train)

    y_pred = regr.predict(X_test)
    modality_rmse = mean_squared_error(y_test, y_pred, squared=False)
    
    return y_test, y_pred, modality_rmse

### Process and Save All Data

In [None]:
# all_dfs = None

# ica_epochs_dict = {}
# ica_dict = {}
# eog_idx_dict = {}
# events_dict = {}

# for each_file in onlyfiles:
#     input_path = data_dir + each_file
    
#     sbj_id = each_file[each_file.find('Sbj_')+4:each_file.find('-Ssn')]
#     ssn_no = each_file[each_file.find('Ssn_')+4:each_file.find('.dats')]

#     if len(sbj_id) < 2: sbj = "sbj0"+sbj_id
#     else: sbj = "sbj"+sbj_id
#     if len(ssn_no) < 2: ssn = "ssn0"+ssn_no
#     else: ssn = "ssn"+ssn_no
    
#     if sbj+ssn == "sbj20ssn03":
#         ref_ica = None
#     else: 
#         ref_ica = ica_dict['sbj20ssn03']
    
#     with open(input_path, 'rb') as handle:
#         rns_data = pickle.load(handle)

#     ## Add metadata to data

#     for key in rns_data.keys():
#         rns_data[key].append(return_metadata_from_name(key, metadata_jsons))

#     event_df = event_data_from_data(rns_data, interrupted_id_sessions=[(13,1), (22,1)])
#     event_df['trial_damage'] = event_df.damage.diff().fillna(0)
#     event_df['trial_duration'] = event_df.trial_end_time - event_df.trial_start_time

#     percent_missing = event_df.notnull().sum() / len(event_df)
#     summary_statistics = {}
#     summary_statistics['voice_success_rate'] = percent_missing['voice_timestamp']
#     if 'chunk_timestamp' in percent_missing:
#         summary_statistics['chunk_success_rate'] = percent_missing['chunk_timestamp']
#     else:
#         summary_statistics['chunk_success_rate'] = 0

#     # temporary fix for pilot phase where we had some incomplete data
#     if 'block_condition' not in event_df:
#         event_df['block_condition'] = 'practice'
#         event_df.loc[5:,'block_condition'] = 'voice'

#     event_df['spoken_difficulty_encoded'] = event_df.spoken_difficulty.replace(to_replace=['easy', 'hard', 'unknown'],
#                                                                           value=[1, 2, None])

#     # ecg
#     post_processed_event_df = process_session_ecg(rns_data, event_df,plot_frequency=20,plot_ecg_snippet=40)

#     # eye
#     # post_processed_event_df = process_session_eye(rns_data, post_processed_event_df,detect_blink=True,pretrial_period=0,
#     #                                               posttrial_period=0,plot_frequency=20, plot_eye_snippet=40, classifiers=['NSLR'])
#     if 'Unity_ViveSREyeTracking' in rns_data:
#         post_processed_event_df = process_session_eye(rns_data, post_processed_event_df,detect_blink=True,
#                                                       pretrial_period=0, posttrial_period=0, plot_frequency=20, 
#                                                       plot_eye_snippet=40, classifiers=['NSLR'])

#     # eeg
#     post_processed_event_df, epochs, events, info, reject_log, ica, eog_idx= process_session_eeg(rns_data, post_processed_event_df,
#                                 run_autoreject=True, run_ica=True, save_raw_eeg = True, sbj_session = sbj+ssn, 
#                                 template_ica = ref_ica, analyze_pre_ica = True)
    
#     # motor
#     post_processed_event_df, turns_df = process_session_motor(rns_data, post_processed_event_df, motor_channel='Unity_MotorInput',
#                                                 plot_motor_result = True, plot_motor_snippet = 30, plot_frequency = 10)
#     if motor_events:
#         post_processed_event_df = turns_df
    
#     events_dict[sbj+ssn] = events
#     ica_epochs_dict[sbj+ssn] = epochs
#     ica_dict[sbj+ssn] = ica
#     eog_idx_dict[sbj+ssn] = eog_idx
    
#     # save data for later use
#     if save_data_pkl:
        
#         with open('../output/saved_files/pickle_files/all_events.pickle', 'wb') as handle_events:
#             pickle.dump(events_dict, handle_events, protocol=pickle.HIGHEST_PROTOCOL)
#         with open('../output/saved_files/pickle_files/ica_epochs.pickle', 'wb') as handle_ica_eps:
#             pickle.dump(ica_epochs_dict, handle_ica_eps, protocol=pickle.HIGHEST_PROTOCOL)
#         with open('../output/saved_files/pickle_files/ica.pickle', 'wb') as handle_ica:
#             pickle.dump(ica_dict, handle_ica, protocol=pickle.HIGHEST_PROTOCOL)
#         with open('../output/saved_files/pickle_files/eog_comp.pickle', 'wb') as handle_eog:
#             pickle.dump(eog_idx_dict, handle_eog, protocol=pickle.HIGHEST_PROTOCOL)

#     # save
#     post_processed_event_df.to_csv(f"{output_dir}ppid_{post_processed_event_df.iloc[0].ppid}_session_{post_processed_event_df.iloc[0].session}.csv")
#     if not type(all_dfs)==pd.core.frame.DataFrame:
#         all_dfs = post_processed_event_df
#     else:
#         all_dfs = pd.concat([all_dfs, post_processed_event_df], ignore_index=True)
    

In [None]:
all_dfs = None

ica_epochs_dict = {}
ica_dict = {}
eog_idx_dict = {}
events_dict = {}

for each_file in onlyfiles:
    input_path = data_dir + each_file
    
    sbj_id = each_file[each_file.find('Sbj_')+4:each_file.find('-Ssn')]
    ssn_no = each_file[each_file.find('Ssn_')+4:each_file.find('.dats')]

    if len(sbj_id) < 2: sbj = "sbj0"+sbj_id
    else: sbj = "sbj"+sbj_id
    if len(ssn_no) < 2: ssn = "ssn0"+ssn_no
    else: ssn = "ssn"+ssn_no
    
    if sbj+ssn == "sbj20ssn03":
        ref_ica = None
    else: 
        ref_ica = ica_dict['sbj20ssn03']
    
    with open(input_path, 'rb') as handle:
        rns_data = pickle.load(handle)

    ## Add metadata to data

    for key in rns_data.keys():
        rns_data[key].append(return_metadata_from_name(key, metadata_jsons))

    event_df = read_event_data(rns_data) # typically only 15_1 and 22_1 will be used here, change below too

    if event_df.empty:
        continue
    
    event_df = event_df[event_df.block_condition == 'voice']
    event_df['trial_damage'] = event_df.damage.diff().fillna(0)
    event_df['trial_duration'] = event_df.trial_end_time - event_df.trial_start_time

    percent_missing = event_df.notnull().sum() / len(event_df)
    summary_statistics = {}
    summary_statistics['voice_success_rate'] = percent_missing['spoken_difficulty']
    event_df['spoken_difficulty'] = event_df['spoken_difficulty'].fillna("unknown")
    event_df['spoken_difficulty_encoded'] = event_df.spoken_difficulty.replace(to_replace=['easy', 'hard', 'unknown'],
                                                                          value=[1, 2, 0])
    
#     event_df = event_data_from_data(rns_data, interrupted_id_sessions=[(13,1), (22,1)])
#     event_df = event_data_from_data(rns_data, ts_fixer, remove_id_sessions=remove_sessions, interrupted_id_sessions=interrupted_sessions)
#     if event_df.empty:
#         continue
    
#     event_df['trial_damage'] = event_df.damage.diff().fillna(0)
#     event_df['trial_duration'] = event_df.trial_end_time - event_df.trial_start_time

#     percent_missing = event_df.notnull().sum() / len(event_df)
#     summary_statistics = {}
#     summary_statistics['voice_success_rate'] = percent_missing['voice_timestamp']
#     if 'chunk_timestamp' in percent_missing:
#         summary_statistics['chunk_success_rate'] = percent_missing['chunk_timestamp']
#     else:
#         summary_statistics['chunk_success_rate'] = 0

#     # temporary fix for pilot phase where we had some incomplete data
#     if 'block_condition' not in event_df:
#         event_df['block_condition'] = 'practice'
#         event_df.loc[5:,'block_condition'] = 'voice'

#     event_df['spoken_difficulty_encoded'] = event_df.spoken_difficulty.replace(to_replace=['easy', 'hard', 'unknown'],
#                                                                           value=[1, 2, None])

    # ecg
    post_processed_event_df = process_session_ecg(rns_data, event_df,plot_frequency=20,plot_ecg_snippet=40)

    # eye
    # post_processed_event_df = process_session_eye(rns_data, post_processed_event_df,detect_blink=True,pretrial_period=0,
    #                                               posttrial_period=0,plot_frequency=20, plot_eye_snippet=40, classifiers=['NSLR'])
    if 'Unity_ViveSREyeTracking' in rns_data:
        post_processed_event_df = process_session_eye(rns_data, post_processed_event_df,detect_blink=True,
                                                      pretrial_period=0, posttrial_period=0, plot_frequency=20, 
                                                      plot_eye_snippet=40, classifiers=['NSLR'])

    # eeg
    post_processed_event_df, epochs, events, info, reject_log, ica, eog_idx= process_session_eeg(rns_data, post_processed_event_df,
                                run_autoreject=True, run_ica=True, save_raw_eeg = True, sbj_session = sbj+ssn, 
                                template_ica = ref_ica, analyze_pre_ica = True)
    
    # motor
    post_processed_event_df, turns_df = process_session_motor(rns_data, post_processed_event_df, motor_channel='Unity_MotorInput',
                                                plot_motor_result = True, plot_motor_snippet = 30, plot_frequency = 10)
    if motor_events:
        post_processed_event_df = turns_df
    
    events_dict[sbj+ssn] = events
    ica_epochs_dict[sbj+ssn] = epochs
    ica_dict[sbj+ssn] = ica
    eog_idx_dict[sbj+ssn] = eog_idx
    
    # save data for later use
    if save_data_pkl:
        
        with open('../output/saved_files/corrected_timestamp/all_events.pickle', 'wb') as handle_events:
            pickle.dump(events_dict, handle_events, protocol=pickle.HIGHEST_PROTOCOL)
        with open('../output/saved_files/corrected_timestamp/ica_epochs.pickle', 'wb') as handle_ica_eps:
            pickle.dump(ica_epochs_dict, handle_ica_eps, protocol=pickle.HIGHEST_PROTOCOL)
        with open('../output/saved_files/corrected_timestamp/ica.pickle', 'wb') as handle_ica:
            pickle.dump(ica_dict, handle_ica, protocol=pickle.HIGHEST_PROTOCOL)
        with open('../output/saved_files/corrected_timestamp/eog_comp.pickle', 'wb') as handle_eog:
            pickle.dump(eog_idx_dict, handle_eog, protocol=pickle.HIGHEST_PROTOCOL)

    # save
    post_processed_event_df.to_csv(f"{output_dir}ppid_{post_processed_event_df.iloc[0].ppid}_session_{post_processed_event_df.iloc[0].session}.csv")
    if not type(all_dfs)==pd.core.frame.DataFrame:
        all_dfs = post_processed_event_df
    else:
        all_dfs = pd.concat([all_dfs, post_processed_event_df], ignore_index=True)
    

In [None]:
print('done')

In [None]:
# from pivottablejs import pivot_ui
all_dfs.to_csv(f"../output/saved_files/corrected_voice_timestamp/all_results.csv")
all_dfs.to_excel(f"../output/saved_files/corrected_voice_timestamp/all_results.xlsx")
# all_dfs.to_excel(f"{output_dir}all_results.xlsx")
# pivot_ui(all_dfs, outfile_path=f"{output_dir}all_results.html");

### Epoching Raw EEG Data

In [None]:
# load raw eeg.fif file and epoch raw eeg

if epoch_raw_eeg:

    with open('../output/saved_files/corrected_timestamp/all_events.pickle', 'rb') as handle:
        all_events = pickle.load(handle)

    raw_eeg_dir = '../output/saved_files/raw_eeg/'
    event_dict = dict(easy=1, hard=2)

    raw_eeg_dict = {}
    raw_epochs_dict = {}

    for sbj_ssn in list(all_events.keys()):

        each_raw_eeg = sbj_ssn + '_eeg_filt_raw.fif'
        raw_eeg_path = raw_eeg_dir+each_raw_eeg
        raw_eeg = mne.io.read_raw_fif(raw_eeg_path, preload=True)
        raw_eeg_dict[sbj_ssn] = raw_eeg

        epochs_raw = mne.Epochs(raw_eeg, all_events[sbj_ssn], event_id=event_dict, baseline = (None, 0), tmin= -.2, tmax=3, preload=True, on_missing='warn')

        autoreject_epochs = 20
        run_autoreject = True

        if len(epochs_raw) < 10: # we need at least 10 epochs to run autoreject for cross validation
            # bad_epochs_raw = pd.Series(np.full(len(event_df),np.NAN), index=event_df.index, name='autorejected')
            # event_df = event_df.join(bad_epochs)
            reject_log = None
        elif run_autoreject:
            ar_raw = autoreject.AutoReject(random_state=rs,n_jobs=1, verbose=False)
            ar_raw.fit(epochs_raw[:autoreject_epochs])  # fit on a few epochs to save time
            epochs_ar, reject_log = ar_raw.transform(epochs_raw, return_log=True)
            # bad_epochs = pd.Series(reject_log.bad_epochs, index=event_recognized_df.index, dtype=bool, name='autorejected')
            # event_df = event_df.join(bad_epochs_raw) # creates nan if not processed at all
            epochs_raw = epochs_ar

        raw_epochs_dict[sbj_ssn] = epochs_raw

    with open('../output/saved_files/corrected_voice_timestamp/raw_epochs.pickle', 'wb') as handle_raw_eps:
        pickle.dump(raw_epochs_dict, handle_raw_eps, protocol=pickle.HIGHEST_PROTOCOL)
    with open('../output/saved_files/corrected_voice_timestamp/raw_eeg.pickle', 'wb') as handle_raw_eeg:
        pickle.dump(raw_eeg_dict, handle_raw_eeg, protocol=pickle.HIGHEST_PROTOCOL)


### Load All Processed Data and Dataframe

In [None]:
all_dfs = pd.read_csv("../output/saved_files/corrected_voice_timestamp/all_results.csv")

# open saved pickle files
with open('../output/saved_files/corrected_voice_timestamp/ica_epochs.pickle', 'rb') as handle:
    all_proc_epochs = pickle.load(handle)
with open('../output/saved_files/corrected_voice_timestamp/ica.pickle', 'rb') as handle:
    all_ica = pickle.load(handle)
with open('../output/saved_files/corrected_voice_timestamp/eog_comp.pickle', 'rb') as handle:
    all_eog_comps = pickle.load(handle)
with open('../output/saved_files/corrected_voice_timestamp/raw_epochs.pickle', 'rb') as handle:
    all_raw_epochs = pickle.load(handle)

# save ICA components plot
if save_ica_plts:
    ica_comp_dir = "../output/plots/ica_comps/"
    if not os.path.isdir(ica_comp_dir): os.makedirs(ica_comp_dir)

    for sbj_ssn in list(all_ica.keys()):
        
        all_ica[sbj_ssn].plot_components(picks = list(range(0,20)), title=sbj_ssn+"_ICA_Components", show=False)

        plt.savefig(f"{ica_comp_dir}{sbj_ssn}_ica_comps.png")
        plt.close()

In [None]:
# Removed component for all sessions

show_removed_comp = False

if show_removed_comp:
    for sbj in all_ica.keys():
        if all_eog_comps[sbj] != []:

            all_ica[sbj].plot_components(picks = all_eog_comps[sbj], title=sbj, show=False)

            plt.savefig(f"../output/plots/Removed_Components_Corrmap/{sbj}_removed_components.png")
            plt.close()
        else:
            pass
    
# all_eog_comps.values()

In [None]:
# Concatenating all epochs
raw_epochs_concat = mne.concatenate_epochs(list(all_raw_epochs.values()))
proc_epochs_concat = mne.concatenate_epochs(list(all_proc_epochs.values()))

# get EEG data and labels from concatenated epoch objects
raw_epochs_eeg = raw_epochs_concat.get_data()
proc_epochs_eeg = proc_epochs_concat.get_data()

raw_labels = raw_epochs_concat.events[:,2]
proc_labels = proc_epochs_concat.events[:,2]

In [None]:
# raw_epochs_concat['easy'].plot(n_epochs=5, n_channels=3)
# raw_epochs_concat['hard'].plot(n_epochs=5, n_channels=3)
# proc_epochs_concat['easy'].plot(n_epochs=5, n_channels=3)
# proc_epochs_concat['hard'].plot(n_epochs=5, n_channels=3)

In [None]:
# sorted_raw_epoch = dict(sorted(all_raw_epochs.items()))
# sorted_proc_epoch = dict(sorted(all_proc_epochs.items()))

# raw_epochs_train = mne.concatenate_epochs(list(sorted_raw_epoch.values())[:-4]).get_data()
# raw_epochs_test = mne.concatenate_epochs(list(sorted_raw_epoch.values())[-4:]).get_data()
# proc_epochs_train = mne.concatenate_epochs(list(sorted_proc_epoch.values())[:-4]).get_data()
# proc_epochs_test = mne.concatenate_epochs(list(sorted_proc_epoch.values())[-4:]).get_data()
# all_raw_epochs

### Time-Frequency Analysis

In [None]:
from mne.time_frequency import tfr_morlet, psd_multitaper, psd_welch

epochs_easy = raw_epochs_concat['easy'][20]
epochs_hard = raw_epochs_concat['hard'][20]

# freq_range = np.logspace(*np.log10([4, 55]), num=15)
freq_range = np.linspace(4, 56, 28)
n_cycles = freq_range / 2.

power_easy, itc_easy = tfr_morlet(epochs_easy, freqs=freq_range, n_cycles=n_cycles, use_fft=True, return_itc=True, n_jobs=1)
power_hard, itc_hard = tfr_morlet(epochs_hard, freqs=freq_range, n_cycles=n_cycles, use_fft=True, return_itc=True, n_jobs=1)

sel_chan = 15

# power_easy.plot_topo(baseline=(-0.5, -.2), mode='mean', title='Average power')
# power_easy.plot([sel_chan], baseline=(-3.2, -3), mode='mean', title=power_easy.ch_names[sel_chan])
power_easy.plot([sel_chan], baseline=(-, mode='mean', title=power_easy.ch_names[sel_chan])
# power_easy.plot([sel_chan], baseline=None, mode='mean', title=power_easy.ch_names[sel_chan])

# power_hard.plot_topo(baseline=None, mode='mean', title='Average power')
power_hard.plot([sel_chan], baseline=None, mode='mean', title=power_hard.ch_names[sel_chan])
# power_hard.plot([sel_chan], baseline=None, mode='mean', title=power_hard.ch_names[sel_chan])

### Feature Extraction and Classification (All Participants)

In [None]:
all_dfs_cleaned_up = all_dfs.copy()
all_dfs_cleaned_up = all_dfs_cleaned_up[(all_dfs_cleaned_up.spoken_difficulty_encoded != 0) & 
                                        (all_dfs_cleaned_up.spoken_difficulty_encoded.notnull())]
all_dfs_cleaned_up = clean_up_adadrive_trials(all_dfs_cleaned_up)

pupil_df = pd.read_csv(f"../output/pupil_exposure/participant_level_exposure_fits.csv")
all_dfs_cleaned_up['Raw Left Pupil Diameter'] = all_dfs_cleaned_up['Left Pupil Diameter']
p_val_criteria = 0.05

for index, row in all_dfs_cleaned_up.reset_index(drop=True).iloc[1:].iterrows():
    last_ppid = all_dfs_cleaned_up.iloc[index-1].ppid
    last_session = all_dfs_cleaned_up.iloc[index-1].session
    last_trial = all_dfs_cleaned_up.iloc[index-1].trial
    last_opacity = all_dfs_cleaned_up.iloc[index-1].density
    if ((row.ppid == last_ppid) & (row.session == last_session) & (row.trial == last_trial+1)): # if continuous
        # if there is a significant effect of opacity on pupil
        if pupil_df.loc[pupil_df['sub']==last_ppid,'p_opacities'].values < p_val_criteria:
            this_opacity = row.density
            this_pupil_diameter = row['Left Pupil Diameter']
            weight = pupil_df.loc[pupil_df['sub']==last_ppid,'w_opacities']
            adjustment = (this_opacity-last_opacity)*weight
            all_dfs_cleaned_up.iloc[index,all_dfs_cleaned_up.columns.get_loc('Left Pupil Diameter')] -= adjustment
            
all_dfs_cleaned_up = all_dfs_cleaned_up[all_dfs_cleaned_up['Left Pupil Diameter'].notnull()]

In [None]:
## EEG Features Extraction
raw_eeg_features = eeg_features(all_dfs_cleaned_up, 'raw', label_source = 'spoken_difficulty_encoded', cleaned_up = False)
processed_eeg_features = eeg_features(all_dfs_cleaned_up, 'processed', label_source = 'spoken_difficulty_encoded', cleaned_up = False)

pupil_dia = eye_features(all_dfs_cleaned_up, features = ['Left Pupil Diameter'], label_source = 'spoken_difficulty_encoded', cleaned_up = False)

In [None]:
x_raw_data = np.asarray(pd.concat([raw_eeg_features, pupil_dia], axis = 1))
x_proc_data = np.asarray(pd.concat([processed_eeg_features, pupil_dia], axis = 1))
y_labels = np.asarray(all_dfs_cleaned_up['spoken_difficulty_encoded'])

In [None]:
## EEG Features Normalization and Data Split
proc_train_norm, proc_test_norm, y_train_proc, y_test_proc = feature_normalization(x_proc_data, y_labels)
raw_train_norm, raw_test_norm, y_train_raw, y_test_raw = feature_normalization(x_raw_data, y_labels)

In [None]:
# Trial Difficulty Classification - Raw EEG 
AUC_train_raw, AUC_test_raw, coefs_raw = trial_classification(raw_train_norm, raw_test_norm,
                                                                          y_train_raw, y_test_raw, 
                                                                          'logistic', 'raw')

In [None]:
# Trial Difficulty Classification - Artifacts Removed EEG
AUC_train_proc, AUC_test_proc, coefs_proc = trial_classification(proc_train_norm, proc_test_norm,
                                                                             y_train_proc, y_test_proc, 
                                                                             'logistic', 'processed')

# print(f"Training Accuracy with Artifacts Removal: {train_acc_proc:.2f} \n"
#       f"Training Label:      {y_train_proc} \n"
#       f"Training Prediction: {train_pred_proc} \n"
#       f"Test Accuracy with Artifacts Removal: {test_acc_proc:.2f} \n"
#       f"Test Label:      {y_test_proc} \n"
#       f"Test Prediction: {test_pred_proc}")

### Important Features Table Generation

In [None]:
# Generate table for feature importance

# features_list = list(processed_selected_channel_bp.columns)
features_list = list(processed_eeg_features.columns)
features_list.extend(["Left Pupil Diameter","Right Pupil Diameter"])

bottom_10_raw = zip(np.argsort(coefs_raw)[:10], np.sort(coefs_raw)[:10])
bottom_10_raw_features = [features_list[i] + f" - {importance:.2E}" for i, importance in bottom_10_raw]
top_10_raw =  zip(np.argsort(coefs_raw)[-10:], np.sort(coefs_raw)[-10:])
top_10_raw_features = [features_list[i] + f" - {importance:.2E}" for i, importance in top_10_raw]


bottom_10_proc = zip(np.argsort(coefs_proc)[:10], np.sort(coefs_proc)[:10])
bottom_10_proc_features = [features_list[i] + f" - {importance:.2E}" for i, importance in bottom_10_proc]
top_10_proc = zip(np.argsort(coefs_proc)[-10:],np.sort(coefs_proc)[-10:])
top_10_proc_features = [features_list[i] + f" - {importance:.2E}" for i, importance in top_10_proc]

features_ranking = {
    'Top 10 features for raw filtered data': top_10_raw_features,
    'Bottom 10 features for raw filtered data': bottom_10_raw_features,
    'Top 10 features for ICA processed raw filtered data': top_10_proc_features,
    'Bottom 10 features for ICA processed raw filtered data': bottom_10_proc_features
}

df_features = pd.DataFrame(features_ranking, index = ['Low End','2','3','4','5','6','7','8','9','High End'])
# df_features.to_excel(f"{output_dir}saved_files/ranked_features_all.xlsx")
df_features.to_csv(f"{output_dir}saved_files/ranked_features_all.csv")

# print (len(top_10_raw_features),len(bottom_10_raw_features), len(top_10_proc_features), len(bottom_10_proc_features))

In [None]:
# df_features

### Within participant classification (Spoken Difficulty)

In [None]:
selected_clf = 'random_forest'
participants_id = np.sort(all_dfs_cleaned_up.ppid.unique().astype(int)) 

participant_AUC = np.empty([len(participants_id),4])

for i, participant in enumerate(participants_id):
    single_sbj_df = all_dfs_cleaned_up[all_dfs_cleaned_up.ppid == participant]
    
    if np.sum((single_sbj_df['spoken_difficulty']=='easy') | (single_sbj_df['spoken_difficulty']=='hard')) < 10:
        AUC_train_raw = AUC_test_raw = AUC_train_proc = AUC_test_proc = float('NaN')
    else:
        raw_eeg_features, raw_data_features, raw_labels = eeg_features(single_sbj_df, 'raw')
        processed_eeg_features, processed_data_features, processed_labels = eeg_features(single_sbj_df, 'processed')

        raw_train_norm, raw_test_norm, proc_train_norm, proc_test_norm, y_train_raw, y_test_raw, y_train_proc, y_test_proc = feature_normalization(
                                                            processed_data_features, processed_labels, raw_data_features, raw_labels, train_percentage = 0.9)

        AUC_train_raw, AUC_test_raw, coefs_raw = trial_classification(raw_train_norm, raw_test_norm,
                                                                                  y_train_raw, y_test_raw, 
                                                                                  selected_clf, 'raw', save_plots=False)
        AUC_train_proc, AUC_test_proc, coefs_proc = trial_classification(proc_train_norm, proc_test_norm,
                                                                                     y_train_proc, y_test_proc, 
                                                                                     selected_clf, 'processed', save_plots=False)

    participant_AUC[i,:] = [AUC_train_raw, AUC_test_raw, AUC_train_proc, AUC_test_proc]

participant_AUC_df = pd.DataFrame(participant_AUC, index = participants_id, 
                                  columns = ['Raw Train AUC', 'Raw Test AUC', 'ICA Processed Train AUC', 'ICA Processed Test AUC'])

participant_AUC_df

### Data Processing and Extraction

In [None]:
motor_output_dir = (f"../output/batch_analysis/")

In [None]:
import glob
import re

def str_list_to_list(lst):
    str_single_space = re.sub("\s+", " ", lst.strip())
    str_no_brackets = re.sub("[\[\]]", "", lst)
    return [float(n) for n in str_no_brackets.split()]

# loop over the list of csv files
def read_motor_csvs():
    csv_files = glob.glob(os.path.join(motor_output_dir, "ppid*_motor.csv"))
    all_dfs = None
    for f in csv_files:
        # read the csv file and add column for labels
        temp_df = pd.read_csv(f)

        all_steer_events = temp_df.copy()['post_steer_event_raw']
        all_steer_events_finalized = all_steer_events.apply(str_list_to_list)

        norm_pos = lambda wheel_pos: np.asarray(wheel_pos)/np.asarray(wheel_pos[0])
        final_pos = lambda final_wheel_pos: np.asarray(final_wheel_pos[-1])-np.asarray(final_wheel_pos[0])

        norm_pos_df = all_steer_events_finalized.apply(norm_pos)

        temp_df['Steering_Wheel_Degree'] = abs(all_steer_events_finalized.apply(final_pos))
        temp_df['Steering_Wheel_Degree_Categorical'] = pd.qcut(temp_df['Steering_Wheel_Degree'], 2, labels=["Low", "High"]) #2=High, 1 =Low
        temp_df['Steering_Wheel_Degree_Encoded'] = temp_df.Steer_Wheel_Degree_Categorical.replace({'High': 2, 'Low': 1})
        # temp_df['Mean_Steer_Wheel_Degree'] = temp_df.Steer_Wheel_Degree.mean()
        
        if not type(all_dfs)==pd.core.frame.DataFrame:
            all_dfs = temp_df
        else:
            all_dfs = pd.concat([all_dfs, temp_df], ignore_index=True)
            
    all_dfs = all_dfs[all_dfs.columns.drop(list(all_dfs.filter(regex='Unnamed')))]
    
    return all_dfs

In [None]:
all_dfs_all_pp_trials = read_motor_csvs()
motor_all_dfs = all_dfs_all_pp_trials.copy()

motor_all_dfs['sub_sess'] = motor_all_dfs.ppid.astype(str) + "_" + motor_all_dfs.session.astype(str)
motor_all_dfs = motor_all_dfs.loc[~motor_all_dfs.sub_sess.isin([f"{es[0]}.0_{es[1]}.0" for es in remove_sessions])]

In [None]:
motor_all_dfs = clean_up_adadrive_trials(motor_all_dfs)
motor_all_dfs['Mean_Steering_Wheel_Degree'] = motor_all_dfs.Steering_Wheel_Degree.mean()

In [None]:
sns.histplot(data=motor_all_dfs, x="Steering_Wheel_Degree")
# plt.savefig(f"../output/plots/steering_wheel_turned_deg.png", dpi=300)

In [None]:
motor_all_dfs.to_csv(f"../output/batch_analysis/motor_df_label.csv")
# motor_all_dfs.to_excel(f"../output/batch_analysis/motor_df_label.xlsx")

### Feature Extraction and Classification - 10 folds CV (Pupil, ECG, EEG, All)

In [None]:
x_eeg, y_eeg = eeg_features(motor_all_dfs, cleaned_up = True)
x_eye, y_eye = eye_features(motor_all_dfs, cleaned_up = True)
x_ecg, y_ecg = ecg_features(motor_all_dfs, cleaned_up = True)
x_all, y_all = multimodal_features(motor_all_dfs)

modality_dict = {"EEG": (x_eeg, y_eeg),
                 "Eye": (x_eye, y_eye),
                 "ECG": (x_ecg, y_ecg),
                 "All": (x_all, y_all)}

In [None]:
# x_all.shape, x_eeg.shape, x_eye.shape, x_ecg.shape

In [None]:
modality_auc_logreg = {}

for modality in list(modality_dict.keys()):

    modality_auc_logreg[modality] = modality_cv(modality_dict[modality][0], modality_dict[modality][1],
                                        classifier = 'logistic')

In [None]:
auc_df = pd.DataFrame(modality_auc_logreg, index = ['Train AUC', 'Test AUC'])
auc_df

In [None]:
modality_auc_rf = {}

for modality in list(modality_dict.keys()):

    modality_auc_rf[modality] = modality_cv(modality_dict[modality][0], modality_dict[modality][1],
                                        classifier = 'random_forest')
    
pd.DataFrame(modality_auc_rf, index = ['Train AUC', 'Test AUC'])

In [None]:
# modality_cv(x_ecg, y_ecg, classifier = 'random_forest')

### Feature Extraction and Classification (All Participants)

In [None]:
# alpha_power, beta_power, theta_hjorth_activity, theta_hfd, theta_power, theta_sample_entropy,
# theta_hjorth_mobility, alpha_hjorth_activity, alpha_sample_entropy, alpha_hfd, beta_hjorth_activity,
# beta_hjorth_activity, gamma_power, gamma_hjorth_activity, beta_hfd, beta_hjorth_complexity,
# beta_hjorth_mobility, alpha_hjorth_mobility, gamma_hfd, alpha_hjorth_complexity,
# theta_hjorth_complexity, beta_sample_entropy, gamma_hjorth_mobility, gamma_hjorth_complexity,
# gamma_sample_entropy

In [None]:
selected_feature = '8-15_Hz_Power|15-32_Hz_Power|4-8_Hz_Hjorth_Activity|4-8_Hz_Higuchi_FD|4-8_Hz_Power|4-8_Hz_Sample_entropy|4-8_Hz_Hjorth_Mobility|8-13_Hz_Hjorth_Activity|8-13_Hz_Sample_entropy| 8-13_Hz_Higuchi_FD'

processed_eeg_features, processed_data_features, processed_labels = eeg_features(motor_all_dfs, 'processed', 
                                                                                 selected_chans = False, features = 'all',
                                                                                 label_source = 'Steer_Wheel_Degree_Encoded')

In [None]:
_, _, proc_train_norm, proc_test_norm, _, _, y_train_proc, y_test_proc = feature_normalization(processed_data_features,
                                                                                               processed_labels,
                                                                                               raw_feature = None, 
                                                                                               raw_labels = None)

In [None]:
AUC_train_proc, AUC_test_proc, coefs_proc = trial_classification(proc_train_norm, proc_test_norm,
                                                                             y_train_proc, y_test_proc, 
                                                                             'logistic', 'processed')

In [None]:
AUC_train_proc, AUC_test_proc, coefs_proc = trial_classification(proc_train_norm, proc_test_norm,
                                                                             y_train_proc, y_test_proc, 
                                                                             'random_forest', 'processed')

### Within Participant Classification (Steering Wheel Position Difference)

In [None]:
selected_clf = 'random_forest'
participants_id = np.sort(motor_all_dfs.ppid.unique().astype(int)) 

participant_AUC = np.empty([len(participants_id),2])

for i, participant in enumerate(participants_id):
    single_sbj_df = motor_all_dfs[motor_all_dfs.ppid == participant]

    processed_eeg_features, processed_data_features, processed_labels = eeg_features(single_sbj_df, 'processed',
                                                                                    label_source = 'Steer_Wheel_Degree_Encoded')

    _, _, proc_train_norm, proc_test_norm, _, _, y_train_proc, y_test_proc = feature_normalization(processed_data_features, 
                                                                                                   processed_labels, 
                                                                                                   raw_feature = None, 
                                                                                                   raw_labels = None)

    AUC_train_proc, AUC_test_proc, coefs_proc = trial_classification(proc_train_norm, proc_test_norm,
                                                                     y_train_proc, y_test_proc, 
                                                                     selected_clf, 'processed', save_plots=False)

    participant_AUC[i,:] = [AUC_train_proc, AUC_test_proc]


participant_AUC_df = pd.DataFrame(participant_AUC, index = participants_id, 
                                  columns = ['ICA Processed Train AUC', 'ICA Processed Test AUC'])

In [None]:
participant_AUC_df

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

regr = RandomForestRegressor(random_state=rs)

In [None]:
# def cal_rmse(df, true_val_col = 'Steer_Wheel_Degree', eeg_features_list = 'all', 
#              eye_features_list = 'all', ecg_features_list = 'all'):
    
#     if eeg_features == 'all':
#         # all eeg features rmse calculation
#         x_eeg, _, _ = eeg_features(df, 'processed', label_source = 'Steer_Wheel_Degree_Encoded')
#     else:
#         # selected eeg features rmse calculation
#         # selected_feature = '8-15_Hz_Power|15-32_Hz_Power|4-8_Hz_Hjorth_Activity|4-8_Hz_Higuchi_FD|4-8_Hz_Power|4-8_Hz_Sample_entropy|4-8_Hz_Hjorth_Mobility|8-13_Hz_Hjorth_Activity|8-13_Hz_Sample_entropy| 8-13_Hz_Higuchi_FD'
#         selected_feature = "|".join(map(str,eeg_features_list))
#         x_eeg, _, _ = eeg_features(df, 'processed', features = selected_feature, label_source = 'Steer_Wheel_Degree_Encoded')
#         y_eeg = df[true_val_col]
        
#     X_eeg_train, X_eeg_test, y_eeg_train, y_eeg_test = train_test_split(x_eeg, y_eeg, test_size=0.3, random_state=rs)
#     regr.fit(X_eeg_train, y_eeg_train)

#     y_eeg_pred = regr.predict(X_eeg_test)
#     eeg_rmse = mean_squared_error(y_eeg_test, y_eeg_pred, squared=False)
    
#     # eye rmse calculation
#     if eye_features_list == 'all':
#         eye_features = ["Left Pupil Diameter", "Right Pupil Diameter",
#                    "Left Evoked Pupil Diameter", "Right Evoked Pupil Diameter", true_val_col]
#     else:
#         eye_features = eye_features_list
#         eye_features.append(true_val_col)
     
#     eye_df = df[eye_features].dropna()
#     x_eye = eye_df.iloc[:,0:-1]
#     y_eye = eye_df.iloc[:,-1]
#     X_eye_train, X_eye_test, y_eye_train, y_eye_test = train_test_split(x_eye, y_eye, test_size=0.3, random_state=rs)
#     regr.fit(X_eye_train, y_eye_train)

#     y_eye_pred = regr.predict(X_eye_test)
#     eye_rmse = mean_squared_error(y_eye_test, y_eye_pred, squared=False)
    
#     #ecg emse calculation
#     if ecg_features_list == 'all':
#         ecg_feature_first = df.columns.get_loc("bpm")
#         ecg_feature_last = df.columns.get_loc("breathingrate")
#         ecg_feature = df.iloc[:,ecg_feature_first:ecg_feature_last-2].join(df[true_val_col])
#         ecg_df = ecg_feature.dropna()
#     else:
#         ecg_feature = ecg_features_list
#         ecg_feature.append(true_val_col)
#         ecg_df = df[ecg_feature].dropna()
        
#     x_ecg = ecg_df.iloc[:,0:-1]
#     y_ecg = ecg_df.iloc[:,-1]
#     X_ecg_train, X_ecg_test, y_ecg_train, y_ecg_test = train_test_split(x_ecg, y_ecg, test_size=0.3, random_state=rs)

#     regr.fit(X_ecg_train, y_ecg_train)
#     y_ecg_pred = regr.predict(X_ecg_test)
#     ecg_rmse = mean_squared_error(y_ecg_test, y_ecg_pred, squared=False)
    
#     return y_eeg_test, y_eeg_pred, eeg_rmse, 
#         y_eye_test, y_eye_pred, eye_rmse, 
#         y_ecg_test, y_ecg_pred, ecg_rmse


# y_eeg_test, y_eeg_pred, eeg_rmse, 
#             y_eye_test, y_eye_pred, eye_rmse, 
#             y_ecg_test, y_ecg_pred, ecg_rmse = cal_rmse(motor_all_dfs, true_val_col = 'Steering_Wheel_Degree',
#                                                         eeg_features_list = ['F1_15-32_Hz_Power'],
#                                                         eye_features_list = ['Left Pupil Diameter'],
#                                                         ecg_features_list = ['sdnn'])

In [None]:
rmse_mean = mean_squared_error(motor_all_dfs['Steering_Wheel_Degree'], 
                               motor_all_dfs['Mean_Steering_Wheel_Degree'], squared=False)
rmse_mean

In [None]:
modality_rmse_dict = {"EEG": [], "Eye": [], "ECG": [], "All": []}

# _,_,eeg_rmse = calculate_rmse(motor_all_dfs, 'EEG', features_list = ['F1_15-32_Hz_Power'])

for modality in list(modality_rmse_dict.keys()):
    _,_,rmse = calculate_rmse(motor_all_dfs, modality)
    modality_rmse_dict[modality] = rmse


In [None]:
rmse_df = pd.DataFrame(modality_rmse_dict, index = ['rmse'])

results_clf_reg = pd.concat([rmse_df, auc_df], axis = 0)
# results_clf_reg.to_csv(f'../output/saved_files/all_modality_rmse_auc.csv')
results_clf_reg.to_excel(f'../output/saved_files/all_modality_rmse_auc.xlsx')

In [None]:
eeg_rmse, eye_rmse, ecg_rmse

In [None]:
rmse_data = [[eeg_rmse_all_mean, eeg_rmse_selected_mean, eye_rmse_mean, ecg_rmse_mean],
        [eeg_rmse_all, eeg_rmse_selected, eye_rmse, ecg_rmse]]
rmse_column = ['EEG prediction RMSE - all features', 'EEG prediction RMSE - selected features', 'Eye prediction RMSE', 'ECG prediction RMSE']
rmse_row = ['Mean', 'Raw']

In [None]:
rmse_df = pd.DataFrame(rmse_data, index = rmse_row, columns = rmse_column)
rmse_df.to_csv(f'../output/saved_files/all_modality_rmse.csv')
rmse_df.to_excel(f'../output/saved_files/all_modality_rmse.xlsx')

In [None]:
pred_dict = dict({"EEG_all_Mean_pred": y_eeg_all_pred_mean,
                  "EEG_selected_Mean_pred": y_eeg_pred_mean,  
                  "Eye_Mean_pred": y_eye_pred_mean,
                  "ECG_Mean_pred": y_ecg_pred_mean,
                  "EEG_all_pred": y_eeg_all_pred, 
                  "EEG_selected_pred": y_eeg_pred, 
                  "Eye_pred": y_eye_pred,
                  "ECG_pred": y_ecg_pred, 
                 })

with open('../output/saved_files/pickle_files/modality_pred.pickle', 'wb') as handle_pred:
            pickle.dump(pred_dict, handle_pred, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
test_dict = dict({"EEG_all_Mean_test": y_eeg_all_test_mean, 
                  "EEG_selected_Mean_test": y_eeg_test_mean, 
                  "Eye_Mean_test": y_eye_test_mean,
                  "ECG_Mean_test": y_ecg_test_mean,
                  "EEG_all_test": y_eeg_all_test, 
                  "EEG_selected_test": y_eeg_test, 
                  "Eye_test": y_eye_test,
                  "ECG_test": y_ecg_test,
                 })

with open('../output/saved_files/pickle_files/modality_test.pickle', 'wb') as handle_test:
            pickle.dump(test_dict, handle_test, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# new classification label based on wheel input

wheel_input_metric = all_dfs_cleaned_up['abs_sum_delta_throttle_input']/all_dfs_cleaned_up['abs_sum_delta_brake_input']

driving_metric = pd.qcut(wheel_input_metric, 2, labels=["High", "Low"])
driving_metric_df = pd.DataFrame(driving_metric,columns = ['driving_metric'])
all_dfs_cleaned_up = all_dfs_cleaned_up.join(driving_metric_df)

all_dfs_cleaned_up = all_dfs_cleaned_up.replace({'driving_metric':{'High': 2, 'Low': 1}})

In [None]:
# !pip install EMD_signal
import PyEMD
from PyEMD import EMD, Visualisation

# eeg_comps = ica.get_sources(raw).get_data() #eeg componenets for all epochs
# # eeg_comps = ica.get_sources(epochs).get_data() #eeg componenets for all epochs
# # comps_epoch_concat = np.empty([eeg_comps.shape[1],eeg_comps.shape[2]*eeg_comps.shape[0]]) #initiate empty array
# # for i in range(eeg_comps.shape[0]):
# #     comps_epoch_concat[:,i*eeg_comps.shape[2]:eeg_comps.shape[2]*(i+1)] = eeg_comps[i]

# component_no = 5
# test_comps = eeg_comps[component_no]

# emd = EMD() # EMD instantiation
# emd.emd(test_comps) # decompose signal into IMFs and residue
# imfs, res = emd.get_imfs_and_residue()

# # # imfs = emd(np.squeeze(eeg_comps[0])[0])

In [None]:
# scipy.stats.kurtosis(test_comps)

In [None]:
# # Visualization
# t = np.arange(0, 3+1/freq, 1/freq)
# vis = Visualisation()
# vis.plot_imfs(imfs=imfs, residue=res, t=t, include_residue=True)
# # vis.plot_instant_freq(t, imfs=imfs)
# vis.show()