In [1]:
import os
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt

from utils import constants

Load pre-processed data file and isolate phase data

In [60]:
data_dir = constants.PYTHON_DIR

all_data = pd.read_parquet(os.path.join(data_dir, 'all_filt_chan.parquet'))

In [42]:
# Filter phase features only
ph_data = all_data[[c for c in all_data.columns if ('ac_' not in c) and ('dc_' not in c)]]

ph_data.to_parquet(os.path.join(data_dir, 'phase_filt_chan.parquet'), index=False)

We can either work with the RT window or the full signal

In [61]:
ph_data = pd.read_parquet(os.path.join(data_dir, 'phase_filt_chan.parquet'))

ph_data = ph_data.rename({'trial_num_04': 'trial_num',
                          'subject_id_04': 'subject_id',
                          'trial_type_04': 'trial_type',
                          'montage_04': 'montage'}, axis=1)
ph_data = ph_data.drop(['trial_num_08', 'subject_id_08', 'trial_type_08', 'montage_08',
                        'trial_num_13', 'subject_id_13', 'trial_type_13', 'montage_13'], axis=1)

In [63]:
# Utilize only the full signal
ph_all = ph_data[[c for c in ph_data.columns if c.split('_')[0] == 'ph']]

ph_all.to_parquet(os.path.join(data_dir, 'phase_all_filt_chan.parquet'), index=False)

In [62]:
# Filter RT window for phase data only
ph_RT = ph_data[[c for c in ph_data.columns if c.split('_')[0] == 'ph-rt'] + ['trial_num', 'subject_id', 'trial_type', 'montage']]

ph_RT.to_parquet(os.path.join(data_dir, 'phase_RT_filt_chan.parquet'), index=False)

In [None]:
# Filter pre-stim window for phase data only
ph_pre_stim = ph_data[[c for c in ph_data.columns if c.split('_')[0] == 'ph-pre-stim'] + ['trial_num', 'subject_id', 'trial_type', 'montage']]

ph_pre_stim.to_parquet(os.path.join(data_dir, 'phase_pre_stim_filt_chan.parquet'), index=False)

In [None]:
# Filter initial response window for phase data only
ph_init = ph_data[[c for c in ph_data.columns if c.split('_')[0] == 'ph-init'] + ['trial_num', 'subject_id', 'trial_type', 'montage']]

ph_init.to_parquet(os.path.join(data_dir, 'phase_init_filt_chan.parquet'), index=False)

In [None]:
# Filter pre-RT window for phase data only
ph_pre_RT = ph_data[[c for c in ph_data.columns if c.split('_')[0] == 'ph-pre-rt'] + ['trial_num', 'subject_id', 'trial_type', 'montage']]

ph_pre_RT.to_parquet(os.path.join(data_dir, 'phase_pre_RT_filt_chan.parquet'), index=False)

In [None]:
# Filter post-RT window for phase data only
ph_post_RT = ph_data[[c for c in ph_data.columns if c.split('_')[0] == 'ph-post-rt'] + ['trial_num', 'subject_id', 'trial_type', 'montage']]

ph_post_RT.to_parquet(os.path.join(data_dir, 'phase_post_RT_filt_chan.parquet'), index=False)

# Extract band power features

In [None]:
data_dir = constants.PYTHON_DIR
montages = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']

window_mapping = {
    'all': 'ph',
    'rt': 'ph-rt',
    'pre-stim': 'ph-pre-stim',
    'init': 'ph-init',
    'pre-rt': 'ph-pre-rt',
    'post-rt': 'ph-post-rt'
}

def extract_features(data_dir, input_fname, key, output_fname):

    df = pd.read_parquet(os.path.join(data_dir, input_fname))

    orig = df[[f'{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']]].values
    empty = np.zeros((orig.shape[0], orig.shape[1], len(orig[0, 0])))

    for i in range(orig.shape[0]):
        for j in range(orig.shape[1]):
            empty[i, j, :] = orig[i, j]
    max_feats = pd.DataFrame(np.max(empty.copy(), axis=-1),
        columns=[f'max_{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
    min_feats = pd.DataFrame(np.min(empty.copy(), axis=-1),
        columns=[f'min_{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
    mean_feats = pd.DataFrame(np.mean(empty.copy(), axis=-1),
        columns=[f'mean_{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
    range_feats = pd.DataFrame((np.max(empty.copy(), axis=-1) - np.min(empty.copy(), axis=-1)),
        columns=[f'range_{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
    avg_pwr_feats = pd.DataFrame(np.sum(empty.copy() ** 2,  axis=-1) / empty.shape[2],
        columns=[f'avg_pwr_{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])

    nan_mask = np.isnan(empty.copy()).any(axis=-1)
    new_values = np.sum(empty.copy() > 0,  axis=-1)
    new_values = new_values.astype('float64')
    new_values[nan_mask] = np.nan

    samp_gt_zero_feats = pd.DataFrame(new_values,
        columns=[f'samp_gt_zero_{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
    
    def num_zero_crossing(arr):
        return len(np.where(np.diff(np.signbit(arr)))[0])

    nan_mask = np.isnan(empty.copy()).any(axis=-1)
    new_values = np.apply_along_axis(num_zero_crossing, axis=-1, arr=empty.copy())
    new_values = new_values.astype('float64')
    new_values[nan_mask] = np.nan

    zero_cross_feats = pd.DataFrame(new_values,
        columns=[f'zero_cross_{window_mapping[key]}_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
    
    feats_df = pd.concat([max_feats, min_feats, mean_feats, range_feats, avg_pwr_feats, samp_gt_zero_feats, zero_cross_feats], axis=1)
    info_df = df[['trial_num', 'subject_id', 'trial_type', 'montage']]
    final_df = pd.concat([info_df, feats_df], axis=1)
    final_df.dropna(axis=1, how='all', inplace=True)

    print('Writing output file...')
    final_df.to_parquet(os.path.join(data_dir, output_fname), index=False)

In [None]:
input_fname = None
key = None
output_fname = None

extract_features(data_dir, input_fname, key, output_fname)

# Extract band power features from full signal

In [3]:
data_dir = '/shared/rsaas//nschiou2/EROS/python/'
montages = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']

df = pd.read_parquet(os.path.join(data_dir, 'phase_all_filt_chan.parquet'))

In [5]:
orig = df[[f'ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']]].values
empty = np.zeros((orig.shape[0], orig.shape[1], len(orig[0, 0])))

for i in range(orig.shape[0]):
    for j in range(orig.shape[1]):
        empty[i, j, :] = orig[i, j]
max_feats = pd.DataFrame(np.max(empty.copy(), axis=-1),
                         columns=[f'max_ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
min_feats = pd.DataFrame(np.min(empty.copy(), axis=-1),
                         columns=[f'min_ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
mean_feats = pd.DataFrame(np.mean(empty.copy(), axis=-1),
                          columns=[f'mean_ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
range_feats = pd.DataFrame((np.max(empty.copy(), axis=-1) - np.min(empty.copy(), axis=-1)),
                           columns=[f'range_ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])
avg_pwr_feats = pd.DataFrame(np.sum(empty.copy() ** 2,  axis=-1) / empty.shape[2],
                             columns=[f'avg_pwr_ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])

In [6]:
nan_mask = np.isnan(empty.copy()).any(axis=-1)
new_values = np.sum(empty.copy() > 0,  axis=-1)
new_values = new_values.astype('float64')
new_values[nan_mask] = np.nan

samp_gt_zero_feats = pd.DataFrame(new_values,
                                  columns=[f'samp_gt_zero_ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])

In [7]:
def num_zero_crossing(arr):
    return len(np.where(np.diff(np.signbit(arr)))[0])

nan_mask = np.isnan(empty.copy()).any(axis=-1)
new_values = np.apply_along_axis(num_zero_crossing, axis=-1, arr=empty.copy())
new_values = new_values.astype('float64')
new_values[nan_mask] = np.nan

zero_cross_feats = pd.DataFrame(new_values,
                                columns=[f'zero_cross_ph_{m}_{i}_{f}' for m in montages for i in range(128) for f in ['04', '08', '13']])

In [8]:
feats_df = pd.concat([max_feats, min_feats, mean_feats, range_feats, avg_pwr_feats, samp_gt_zero_feats, zero_cross_feats], axis=1)
info_df = df[['trial_num', 'subject_id', 'trial_type', 'montage']]
final_df = pd.concat([info_df, feats_df], axis=1)
final_df.dropna(axis=1, how='all', inplace=True)

In [10]:
final_df.to_parquet(os.path.join(data_dir, 'all_simple_bandpower_features.parquet'), index=False)

# With Common Spatial Pattern

## Extract band power features from RT-window

In [107]:
data_dir = '/shared/rsaas/nschiou2/EROS/python/'

df = pd.read_parquet(os.path.join(data_dir, 'CSP_filt_8_RT.parquet'))

In [108]:
[c for c in df.columns if 'csp' in c]

['csp_0',
 'csp_1',
 'csp_10',
 'csp_11',
 'csp_12',
 'csp_13',
 'csp_14',
 'csp_15',
 'csp_2',
 'csp_3',
 'csp_4',
 'csp_5',
 'csp_6',
 'csp_7',
 'csp_8',
 'csp_9',
 'csp_feat_0',
 'csp_feat_1',
 'csp_feat_10',
 'csp_feat_11',
 'csp_feat_12',
 'csp_feat_13',
 'csp_feat_14',
 'csp_feat_15',
 'csp_feat_2',
 'csp_feat_3',
 'csp_feat_4',
 'csp_feat_5',
 'csp_feat_6',
 'csp_feat_7',
 'csp_feat_8',
 'csp_feat_9']

In [109]:
orig = df[[c for c in df.columns if 'csp' in c]].values
empty = np.zeros((orig.shape[0], orig.shape[1], len(orig[0, 0])))

for i in range(orig.shape[0]):
    for j in range(orig.shape[1]):
        empty[i, j, :] = orig[i, j]
max_feats = pd.DataFrame(np.max(empty.copy(), axis=-1),
                         columns=[f'max_ph_rt_{c}' for c in df.columns if 'csp' in c])
min_feats = pd.DataFrame(np.min(empty.copy(), axis=-1),
                         columns=[f'min_ph_rt_{c}' for c in df.columns if 'csp' in c])
mean_feats = pd.DataFrame(np.mean(empty.copy(), axis=-1),
                          columns=[f'mean_ph_rt_{c}' for c in df.columns if 'csp' in c])
range_feats = pd.DataFrame((np.max(empty.copy(), axis=-1) - np.min(empty.copy(), axis=-1)),
                           columns=[f'range_ph_rt_{c}' for c in df.columns if 'csp' in c])
avg_pwr_feats = pd.DataFrame(np.sum(empty.copy() ** 2,  axis=-1) / empty.shape[2],
                             columns=[f'avg_pwr_ph_rt_{c}' for c in df.columns if 'csp' in c])

In [110]:
nan_mask = np.isnan(empty.copy()).any(axis=-1)
new_values = np.sum(empty.copy() > 0,  axis=-1)
new_values = new_values.astype('float64')
new_values[nan_mask] = np.nan

samp_gt_zero_feats = pd.DataFrame(new_values,
                                  columns=[f'samp_gt_zero_ph_rt_{c}' for c in df.columns if 'csp' in c])

In [111]:
def num_zero_crossing(arr):
    return len(np.where(np.diff(np.signbit(arr)))[0])

nan_mask = np.isnan(empty.copy()).any(axis=-1)
new_values = np.apply_along_axis(num_zero_crossing, axis=-1, arr=empty.copy())
new_values = new_values.astype('float64')
new_values[nan_mask] = np.nan

zero_cross_feats = pd.DataFrame(new_values,
                                columns=[f'zero_cross_ph_rt_{c}' for c in df.columns if 'csp' in c])

In [112]:
feats_df = pd.concat([max_feats, min_feats, mean_feats, range_feats, avg_pwr_feats, samp_gt_zero_feats, zero_cross_feats], axis=1)
info_df = df[['trial_num', 'subject_id', 'trial_type', 'montage']]
final_df = pd.concat([info_df, feats_df], axis=1)

In [113]:
final_df.to_parquet(os.path.join(data_dir, 'simple_bandpower_features_csp_8_rt.parquet'), index=False)

## Extract band power features from full signal

In [114]:
data_dir = '/shared/rsaas/nschiou2/EROS/python/'

df = pd.read_parquet(os.path.join(data_dir, 'CSP_filt_8_all.parquet'))

In [115]:
[c for c in df.columns if 'csp' in c]

['csp_0',
 'csp_1',
 'csp_10',
 'csp_11',
 'csp_12',
 'csp_13',
 'csp_14',
 'csp_15',
 'csp_2',
 'csp_3',
 'csp_4',
 'csp_5',
 'csp_6',
 'csp_7',
 'csp_8',
 'csp_9',
 'csp_feat_0',
 'csp_feat_1',
 'csp_feat_10',
 'csp_feat_11',
 'csp_feat_12',
 'csp_feat_13',
 'csp_feat_14',
 'csp_feat_15',
 'csp_feat_2',
 'csp_feat_3',
 'csp_feat_4',
 'csp_feat_5',
 'csp_feat_6',
 'csp_feat_7',
 'csp_feat_8',
 'csp_feat_9']

In [116]:
orig = df[[c for c in df.columns if 'csp' in c]].values
empty = np.zeros((orig.shape[0], orig.shape[1], len(orig[0, 0])))

for i in range(orig.shape[0]):
    for j in range(orig.shape[1]):
        empty[i, j, :] = orig[i, j]
max_feats = pd.DataFrame(np.max(empty.copy(), axis=-1),
                         columns=[f'max_ph_{c}' for c in df.columns if 'csp' in c])
min_feats = pd.DataFrame(np.min(empty.copy(), axis=-1),
                         columns=[f'min_ph_{c}' for c in df.columns if 'csp' in c])
mean_feats = pd.DataFrame(np.mean(empty.copy(), axis=-1),
                          columns=[f'mean_ph_{c}' for c in df.columns if 'csp' in c])
range_feats = pd.DataFrame((np.max(empty.copy(), axis=-1) - np.min(empty.copy(), axis=-1)),
                           columns=[f'range_ph_{c}' for c in df.columns if 'csp' in c])
avg_pwr_feats = pd.DataFrame(np.sum(empty.copy() ** 2,  axis=-1) / empty.shape[2],
                             columns=[f'avg_pwr_ph_{c}' for c in df.columns if 'csp' in c])

In [117]:
nan_mask = np.isnan(empty.copy()).any(axis=-1)
new_values = np.sum(empty.copy() > 0,  axis=-1)
new_values = new_values.astype('float64')
new_values[nan_mask] = np.nan

samp_gt_zero_feats = pd.DataFrame(new_values,
                                  columns=[f'samp_gt_zero_ph_{c}' for c in df.columns if 'csp' in c])

In [118]:
def num_zero_crossing(arr):
    return len(np.where(np.diff(np.signbit(arr)))[0])

nan_mask = np.isnan(empty.copy()).any(axis=-1)
new_values = np.apply_along_axis(num_zero_crossing, axis=-1, arr=empty.copy())
new_values = new_values.astype('float64')
new_values[nan_mask] = np.nan

zero_cross_feats = pd.DataFrame(new_values,
                                columns=[f'zero_cross_ph_{c}' for c in df.columns if 'csp' in c])

In [119]:
feats_df = pd.concat([max_feats, min_feats, mean_feats, range_feats, avg_pwr_feats, samp_gt_zero_feats, zero_cross_feats], axis=1)
info_df = df[['trial_num', 'subject_id', 'trial_type', 'montage']]
final_df = pd.concat([info_df, feats_df], axis=1)

In [120]:
final_df.to_parquet(os.path.join(data_dir, 'simple_bandpower_features_csp_8_all.parquet'), index=False)