> __Purpose:__ Create a couple of dataframes containing relevant information (eg one for EMG training data with EMG vectors, condition number, and update number), and then save them for later use so each subsequent NB doesn't have to go through this process.

In [1]:
import numpy as np
import time
import os
import pandas as pd
import pickle
from experiment_params import *

# Load In Data

In [2]:
with open('Data\continuous_full_data_block1.pickle', 'rb') as handle:
    #refs_block1, poss_block1, dec_vels_block1, int_vel_block1, emgs_block1, Ws_block1, Hs_block1, alphas_block1, pDs_block1, times_block1, conditions_block1 = pickle.load(handle)
    _, _, _, _, emgs_block1, Ws_block1, _, _, _, times_block1, _ = pickle.load(handle)

with open('Data\continuous_full_data_block2.pickle', 'rb') as handle:
    #refs_block2, poss_block2, dec_vels_block2, int_vel_block2, emgs_block2, Ws_block2, Hs_block2, alphas_block2, pDs_block2, times_block2, conditions_block2 = pickle.load(handle)
    _, _, _, _, emgs_block2, Ws_block2, _, _, _, times_block2, _ = pickle.load(handle)

In [3]:
# trying to figure out how often decoder updates -- every 1202 samples

W = Ws_block1[keys[0]][0]
W[1:,:,:].shape # 7199 time points x (decoder dimensions is 2 x 6)
dold = W[0]
update_ix = []
for ix,d in enumerate(W[1:]):
    if (np.array_equal(dold,d)==False):
        update_ix.append(ix)
        dold = d

update_ix.append(len(W) - 1) 
update_ix = np.asarray(update_ix)
update_ix = np.hstack([[0],update_ix])
print("update index in time indices")
print(update_ix)

# only go up to 20432

update_times = times_block1[keys[0]][0][update_ix]
print("")
print("update times in seconds")
print(update_times)

update_mins = update_times/60
print("")
print("update times in minutes")
print(update_mins)

tscale = update_ix[-1]/update_times[-1]
print("")
print("time scale conversion (index --> seconds): ", tscale)

update index in time indices
[    0  1200  2402  3604  4806  6008  7210  8412  9614 10816 12018 13220
 14422 15624 16826 18028 19230 20432 20769]

update times in seconds
[  0.          16.81372571  33.70942521  50.44435     67.45188546
  84.74962473 101.82288647 118.90209508 136.32002926 153.7001555
 170.77472734 188.02292895 205.35715556 222.46085095 239.64881945
 256.75217056 274.14669037 291.41761136 296.34003878]

update times in minutes
[0.         0.28022876 0.56182375 0.84073917 1.12419809 1.41249375
 1.69704811 1.98170158 2.27200049 2.56166926 2.84624546 3.13371548
 3.42261926 3.70768085 3.99414699 4.27920284 4.56911151 4.85696019
 4.93900065]

time scale conversion (index --> seconds):  70.08502828627614


In [4]:
#np.save(r"Data\update_ix.npy", update_ix)

# Make Input and Label DFs

In [5]:
print("The subject keys are the labels")
print(keys)

The subject keys are the labels
['METACPHS_S106', 'METACPHS_S107', 'METACPHS_S108', 'METACPHS_S109', 'METACPHS_S110', 'METACPHS_S111', 'METACPHS_S112', 'METACPHS_S113', 'METACPHS_S114', 'METACPHS_S115', 'METACPHS_S116', 'METACPHS_S117', 'METACPHS_S118', 'METACPHS_S119']


In [6]:
print("Processed EMG Data:")
print("(number of conditions, all data points, number of channels)")
print(emgs_block1[keys[0]].shape)

Processed EMG Data:
(number of conditions, all data points, number of channels)
(8, 20770, 64)


In [7]:
print("Decoder AKA Wiener Filter:")
print("(number of conditions, all data points, XY?, number of channels)")
print(Ws_block1[keys[0]].shape)

Decoder AKA Wiener Filter:
(number of conditions, all data points, XY?, number of channels)
(8, 20770, 2, 64)


In [8]:
emg_labels_df1 = pd.DataFrame(columns=["Subject", "Condition", "Channel"])
emg_data_df1 = pd.DataFrame()
dec_norms_df1 = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "Frobenius Norm"])
dec_flattened_df1 = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "Flattened Decoder"])

In [9]:
t0 = time.time()

for key in keys:
    patient_emgs = emgs_block1[key]
    patient_dec = Ws_block1[key]
    
    for my_cond in range(num_conds):
        for my_channel in range(num_channels):
            emg_labels_df1.loc[len(emg_labels_df1)] = [key, my_cond, my_channel]
            # Ought to make sure all of these are the same length...
            emg_data_df1 = pd.concat([emg_data_df1, pd.DataFrame(patient_emgs[my_cond, :, my_channel]).transpose()], ignore_index=True)
            
        for update_number, update_idx in enumerate(update_ix):
            dec_norms_df1.loc[len(dec_norms_df1)] = [key, my_cond, update_number, np.linalg.norm(patient_dec[my_cond, update_idx, :, :])]
            dec_flattened_df1.loc[len(dec_flattened_df1)] = [key, my_cond, update_number, np.ravel(patient_dec[my_cond, update_idx, :, :])]

t1 = time.time()
total = t1-t0  
print(total)

1269.2716588974


In [10]:
print(emg_data_df1.shape)
emg_data_df1.head()

(7168, 20770)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20760,20761,20762,20763,20764,20765,20766,20767,20768,20769
0,0.0,0.0,0.0,10.778148,10.778148,10.778148,26.874088,43.189501,43.189501,38.594581,...,54.343173,54.343173,53.363208,53.363208,53.363208,53.363208,59.557374,59.557374,59.557374,55.634152
1,0.0,0.0,0.0,9.891218,9.891218,9.891218,23.589349,36.477933,36.477933,31.296507,...,79.894291,79.894291,86.860329,86.860329,86.860329,86.860329,71.319955,71.319955,71.319955,56.606641
2,0.0,0.0,0.0,1.606057,1.606057,1.606057,8.623857,15.845217,15.845217,17.957593,...,89.500295,89.500295,84.266738,84.266738,84.266738,84.266738,71.979639,71.979639,71.979639,65.918534
3,0.0,0.0,0.0,3.668527,3.668527,3.668527,7.165376,11.62829,11.62829,15.308951,...,68.943668,68.943668,66.983974,66.983974,66.983974,66.983974,64.104558,64.104558,64.104558,61.848159
4,0.0,0.0,0.0,1.41181,1.41181,1.41181,9.707134,15.677262,15.677262,18.92241,...,43.565918,43.565918,42.34359,42.34359,42.34359,42.34359,42.235306,42.235306,42.235306,41.818073


In [11]:
print(emg_labels_df1.shape)
emg_labels_df1.head()

(7168, 3)


Unnamed: 0,Subject,Condition,Channel
0,METACPHS_S106,0,0
1,METACPHS_S106,0,1
2,METACPHS_S106,0,2
3,METACPHS_S106,0,3
4,METACPHS_S106,0,4


In [12]:
print(dec_norms_df1.shape)
dec_norms_df1.head()

(2128, 4)


Unnamed: 0,Subject,Condition,Update Number,Frobenius Norm
0,METACPHS_S106,0,0,0.06636
1,METACPHS_S106,0,1,0.06636
2,METACPHS_S106,0,2,9.70939
3,METACPHS_S106,0,3,8.20908
4,METACPHS_S106,0,4,10.406943


## Repeat for Block2

In [13]:
emg_data_df2 = pd.DataFrame()
emg_labels_df2 = pd.DataFrame(columns=["Subject", "Condition", "Channel"])
dec_norms_df2 = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "Frobenius Norm"])
dec_flattened_df2 = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "Flattened Decoder"])

In [14]:
t0 = time.time()

for key in keys:
    patient_emgs = emgs_block2[key]
    patient_dec = Ws_block2[key]
    
    for my_cond in range(num_conds):
        for my_channel in range(num_channels):
            emg_labels_df2.loc[len(emg_labels_df2)] = [key, my_cond, my_channel]
            # Ought to make sure all of these are the same length...
            emg_data_df2 = pd.concat([emg_data_df2, pd.DataFrame(patient_emgs[my_cond, :, my_channel]).transpose()], ignore_index=True)
            
        for update_number, update_idx in enumerate(update_ix):
            dec_norms_df2.loc[len(dec_norms_df2)] = [key, my_cond, update_number, np.linalg.norm(patient_dec[my_cond, update_idx, :, :])]
            dec_flattened_df2.loc[len(dec_flattened_df2)] = [key, my_cond, update_number, np.ravel(patient_dec[my_cond, update_idx, :, :])]

t1 = time.time()
total = t1-t0  
print(total)

1816.7099251747131


## Save Data

In [15]:
emg_data_df1.to_csv("Data\emg_full_data1.csv")
emg_labels_df1.to_csv("Data\emg_full_labels1.csv")
dec_norms_df1.to_csv("Data\decoder_full_norms1.csv")

emg_data_df2.to_csv("Data\emg_full_data2.csv")
emg_labels_df2.to_csv("Data\emg_full_labels2.csv")
dec_norms_df2.to_csv("Data\decoder_full_norms2.csv")

dec_flattened_df1.to_csv("Data\dec_full_flattened_df1.csv")
dec_flattened_df2.to_csv("Data\dec_full_flattened_df2.csv")