# Setup

## Imports

In [1]:
import h5py 
import mat73
import numpy as np
import pandas as pd
import seaborn as sns
import csv
from matplotlib import pyplot as plt
from sklearn.model_selection import RepeatedKFold, cross_validate
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy.stats import ttest_1samp, ttest_ind

## Load File Paths

In [2]:
# ncsl_share = '/run/user/1000/gvfs/smb-share:server=10.162.37.21,share=main'
# data_path = f'Data/Subject06_snapshot_normalized.npy'
subs = ['06']
file_paths = {}

for sub in subs:
    # create a dictionary holding the file paths
    ncsl_share = '/mnt/ncsl_share'
    file_paths[sub] = {
        'setup_path': ncsl_share + f'/Public/EFRI/1_formatted/SUBJECT{sub}/EFRI{sub}_WAR_SES1_Setup.mat',
        'raw_path': ncsl_share + f'/Public/EFRI/1_formatted/SUBJECT{sub}/EFRI{sub}_WAR_SES1_Raw.mat',
        'data_path': ncsl_share + f'/Daniel/Data/Trial_by_Chan_by_Freq_by_Time_Snapshots/Subject{sub}_snapshot_normalized.npy', # movement onset as event
        # 'data_path' : ncsl_share + f'/Daniel/Data/Trial_by_Chan_by_Freq_by_Time_Snapshots/show-card_pre-2sec_post-4sec/Subject{sub}_snapshot_normalized.npy', # visual cue as event
        'out_path_metrics': f'Metrics/Subject{sub}',
        'out_path_plots': f'Plots/Subject{sub}'
    }

In [3]:
raw_file = h5py.File(file_paths['06']['raw_path'])
setup_data = mat73.loadmat(file_paths['06']['setup_path'])

out_path_plots = file_paths['06']['out_path_plots']
out_path_metrics = file_paths['06']['out_path_metrics']

In [4]:
setup_data.keys()

dict_keys(['elec_area', 'elec_ind', 'elec_name', 'filters', 'trial_times', 'trial_words'])

## Instantiate variables

In [5]:
bets = setup_data['filters']['bets']

good_trials = np.where(np.isnan(bets) == False)[0] # extract indices of trials without the 'nan'

bets = bets[good_trials] # get the bet values for the good trials
subject_cards = setup_data['filters']['card1'][good_trials] # get the subject's card values for the good trials

In [6]:
elec_names = np.array(setup_data['elec_name'])
elec_areas = np.array(setup_data['elec_area'])

In [7]:
data = np.load(file_paths['06']['data_path'])
y = np.asarray([(0 if bet == 5 else 1) for bet in bets]) # 0 = low bet ($5), 1 = high bet ($20)

## Matplotlib Settings

In [8]:
import matplotlib as mpl
mpl.rcParams['axes.titlesize'] = 22
mpl.rcParams['axes.labelsize'] = 18
mpl.rcParams['xtick.labelsize'] = 18
mpl.rcParams['ytick.labelsize'] = 18

## Create Frequency Bands

In [9]:
wavelet_freqs = np.logspace(np.log2(2),np.log2(150),num=63,base=2)

frequency_band_indices ={
    "Delta" : [i for i,freq in enumerate(wavelet_freqs) if freq >= 0.5 and freq < 4],
    "Theta" : [i for i,freq in enumerate(wavelet_freqs) if freq >= 4 and freq < 8],
    "Alpha" : [i for i,freq in enumerate(wavelet_freqs) if freq >= 8 and freq < 14],
    "Beta" : [i for i,freq in enumerate(wavelet_freqs) if freq >= 14 and freq < 30],
    "Gamma" : [i for i,freq in enumerate(wavelet_freqs) if freq >= 30]
}

In [10]:
f_band_data = np.zeros((data.shape[0], data.shape[1], 5, data.shape[3]))

for i, key in enumerate(frequency_band_indices):
    f_band_data[:,:,i,:] = data[:,:,frequency_band_indices[key],:].mean(2)

# Classes

# Model Training

Using the power per wavelet scale for a particular channel and timepoint as a feature. 

# Multiprocessing

In [49]:
def get_shuffled_t_stats(data, y, setup_data,  time_resolution, threshold, ref_estimator):
    shuffled_lda = ShuffledLDA(setup_data)
    shuffled_lda.train_per_channel_and_timestep(data, y, setup_data = setup_data, time_resolution=time_resolution)
    return shuffled_lda.compute_t_stat_clusters(ref_estimator, threshold)

In [50]:
from multiprocessing import Pool

n_processes = 20

if __name__ == '__main__':
    with Pool(n_processes) as p:
        results = p.starmap(get_shuffled_t_stats, [(f_band_data, y, setup_data, 5, 12, lda)] * 100)
        p.close()

Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!
Shuffling!

# Visualization of Model Performance

# Extraneous Code