> __Purpose:__ Implement an initial privacy attack to quantify how much re-identiifcation and linkability risk exists from both filtered EMG data and for personalization parameters (the decoder). Thus, implement a basic ML model to link the EMG data back to the corresponding subject, and likewise to link the decoder matrices back to the corresponding subject.  

In [1]:
import numpy as np
import matplotlib.pyplot as plt
# import matplotlib.image as mpimg
# import seaborn
import scipy
from scipy.optimize import minimize,least_squares
# from scipy.io import loadmat
import copy as copy
import time
import glob
import os
import pandas as pd
import pickle
from scipy.stats import wilcoxon as wilcoxon

# Load In Data

In [2]:
keys = ['METACPHS_S106', 'METACPHS_S107','METACPHS_S108', 'METACPHS_S109', 'METACPHS_S110', 'METACPHS_S111', 'METACPHS_S112']

with open('cphs_data_block1.pickle', 'rb') as handle:
    #refs_block1, poss_block1, dec_vels_block1, int_vel_block1, emgs_block1, Ws_block1, Hs_block1, alphas_block1, pDs_block1, times_block1, conditions_block1 = pickle.load(handle)
    _, _, _, _, emgs_block1, Ws_block1, _, _, _, times_block1, _ = pickle.load(handle)

with open('cphs_data_block2.pickle', 'rb') as handle:
    #refs_block2, poss_block2, dec_vels_block2, int_vel_block2, emgs_block2, Ws_block2, Hs_block2, alphas_block2, pDs_block2, times_block2, conditions_block2 = pickle.load(handle)
    _, _, _, _, emgs_block2, Ws_block2, _, _, _, times_block2, _ = pickle.load(handle)

In [3]:
# trying to figure out how often decoder updates -- every 1202 samples

W = Ws_block1[keys[0]][0]
W[1:,:,:].shape # 7199 time points x (decoder dimensions is 2 x 6)
dold = W[0]
update_ix = []
for ix,d in enumerate(W[1:]):
  if (np.array_equal(dold,d)==False):
    update_ix.append(ix)
    dold = d

update_ix.append(len(W) - 1) 
update_ix = np.asarray(update_ix)
update_ix = np.hstack([[0],update_ix])
print("update index in time indices")
print(update_ix)

# only go up to 20432

update_times = times_block1[keys[0]][0][update_ix]
print("")
print("update times in seconds")
print(update_times)

update_mins = update_times/60
print("")
print("update times in minutes")
print(update_mins)

tscale = update_ix[-1]/update_times[-1]
print("")
print("time scale conversion (index --> seconds): ", tscale)

update index in time indices
[    0  1200  2402  3604  4806  6008  7210  8412  9614 10816 12018 13220
 14422 15624 16826 18028 19230 20432 20769]

update times in seconds
[  0.          16.81372571  33.70942521  50.44435     67.45188546
  84.74962473 101.82288647 118.90209508 136.32002926 153.7001555
 170.77472734 188.02292895 205.35715556 222.46085095 239.64881945
 256.75217056 274.14669037 291.41761136 296.34003878]

update times in minutes
[0.         0.28022876 0.56182375 0.84073917 1.12419809 1.41249375
 1.69704811 1.98170158 2.27200049 2.56166926 2.84624546 3.13371548
 3.42261926 3.70768085 3.99414699 4.27920284 4.56911151 4.85696019
 4.93900065]

time scale conversion (index --> seconds):  70.08502828627614


# Make Input and Label DFs

In [4]:
print("The subject keys are the labels")
print(keys)

The subject keys are the labels
['METACPHS_S106', 'METACPHS_S107', 'METACPHS_S108', 'METACPHS_S109', 'METACPHS_S110', 'METACPHS_S111', 'METACPHS_S112']


In [5]:
print("Processed EMG Data:")
print("(number of conditions, all data points, number of channels)")
print(emgs_block1[keys[0]].shape)

Processed EMG Data:
(number of conditions, all data points, number of channels)
(8, 20770, 64)


In [6]:
print("Decoder AKA Wiener Filter:")
print("(number of conditions, all data points, XY?, number of channels)")
print(Ws_block1[keys[0]].shape)

Decoder AKA Wiener Filter:
(number of conditions, all data points, XY?, number of channels)
(8, 20770, 2, 64)


In [7]:
emg_labels_df = pd.DataFrame(columns=["Subject", "Condition", "Channel"])
emg_labels_df.head()

Unnamed: 0,Subject,Condition,Channel


In [8]:
emg_data_df = pd.DataFrame()
emg_data_df.head()

In [9]:
print("I don't think I can actually just pass in the matrix...")

dec_df = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "????"])
dec_df.head()

I don't think I can actually just pass in the matrix...


Unnamed: 0,Subject,Condition,Update Number,????


In [10]:
dec_norms_df = pd.DataFrame(columns=["Subject", "Condition", "Update Number", "Frobenius Norm"])
dec_norms_df.head()

Unnamed: 0,Subject,Condition,Update Number,Frobenius Norm


In [11]:
num_conds = 8
num_channels = 64 

In [12]:
t0 = time.time()

for key in keys:
    patient_emgs = emgs_block1[key]
    patient_dec = Ws_block1[key]
    
    for my_cond in range(num_conds):
        for my_channel in range(num_channels):
            emg_labels_df.loc[len(emg_labels_df)] = [key, my_cond, my_channel]
            #emg_data_df = pd.concat([emg_data_df, pd.Series(patient_emgs[my_cond, :, my_channel])], ignore_index=True)
            
        #for update_number, update_idx in enumerate(update_ix):
        #    emg_labels_df.loc[len(emg_labels_df)] = [key, my_cond, my_channel]

t1 = time.time()
total = t1-t0   
print(total)

10.267606019973755


In [13]:
t0 = time.time()

for key in keys:
    patient_emgs = emgs_block1[key]
    patient_dec = Ws_block1[key]
    
    for my_cond in range(num_conds):
        for my_channel in range(num_channels):
            #emg_labels_df.loc[len(emg_labels_df)] = [key, my_cond, my_channel]
            emg_data_df = pd.concat([emg_data_df, pd.Series(patient_emgs[my_cond, :, my_channel])], ignore_index=True)
            
        #for update_number, update_idx in enumerate(update_ix):
        #    emg_labels_df.loc[len(emg_labels_df)] = [key, my_cond, my_channel]
        
t1 = time.time()
total = t1-t0  
print(total)

536.1957488059998


In [14]:
t0 = time.time()

for key in keys:
    patient_emgs = emgs_block1[key]
    patient_dec = Ws_block1[key]
    
    for my_cond in range(num_conds):
        #for my_channel in range(num_channels):
            #emg_labels_df.loc[len(emg_labels_df)] = [key, my_cond, my_channel]
            #emg_data_df = pd.concat([emg_data_df, pd.Series(patient_emgs[my_cond, :, my_channel])], ignore_index=True)
            
        for update_number, update_idx in enumerate(update_ix):
            emg_labels_df.loc[len(emg_labels_df)] = [key, my_cond, my_channel]
        
t1 = time.time()
total = t1-t0  
print(total)

4.600125789642334


In [20]:
74439680/20770

3584.0

In [18]:
print(emg_data_df.shape)
emg_data_df.head()

(74439680, 1)


Unnamed: 0,0
0,0.0
1,0.0
2,0.0
3,10.778148
4,10.778148


In [21]:
4648/64

72.625

In [19]:
print(emg_labels_df.shape)
emg_labels_df.head()

(4648, 3)


Unnamed: 0,Subject,Condition,Channel
0,METACPHS_S106,0,0
1,METACPHS_S106,0,1
2,METACPHS_S106,0,2
3,METACPHS_S106,0,3
4,METACPHS_S106,0,4
