In [126]:
# explore stimulation data
import os
from cmlreaders import CMLReader, get_data_index
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [140]:
# get subject and experiment data
experiment = 'FR5'
rootdir = '/'
df = get_data_index("r1", rootdir = rootdir)
df = df[(df['experiment'] == experiment ) | (df['experiment'] ==  'cat' + experiment )]
df[['subject', 'session', 'experiment']]
len(df['subject'].unique())


39

In [141]:
# get item-level data 
report_db_location = "/scratch/report_database/"
import os
import glob

from ramutils.reports.summary import FRStimSessionSummary
fr_session_summary_locations = glob.glob(os.path.join(report_db_location, '*' + experiment + '*session_summary*'))
session_summaries = []

for summary_loc in fr_session_summary_locations:
    summary = FRStimSessionSummary.from_hdf(summary_loc)
    session_summaries.append(summary)

combined_item_df = FRStimSessionSummary.combine_sessions(session_summaries)
combined_item_df.head()

# combined_item_df[covars]
combined_item_df['is_tbi'] = False
combined_item_df['is_LTC'] = False
combined_item_df['amplitude'] = combined_item_df['amplitude'].astype('float')



In [129]:
len(combined_item_df['subject'].unique()) == len(df['subject'].unique()) # check if the number of subjects is the same 

True

In [122]:
combined_item_df.head()

Unnamed: 0,serialpos,session,subject,experiment,mstime,type,recalled,list,is_stim_list,phase,...,thresh,classifier_output,location,amplitude,pulse_freq,stim_duration,stimAnodeTag,stimCathodeTag,is_tbi,is_LTC
0,1,0,R1124J,FR3,1453231867650,WORD,1,1,0,,...,0.5,0.614445,,,,,,,False,False
1,2,0,R1124J,FR3,1453231870151,WORD,0,1,0,,...,0.5,0.654978,,,,,,,False,False
2,3,0,R1124J,FR3,1453231872584,WORD,0,1,0,,...,0.5,0.563749,,,,,,,False,False
3,4,0,R1124J,FR3,1453231875019,WORD,1,1,0,,...,0.5,0.558127,,,,,,,False,False
4,5,0,R1124J,FR3,1453231877635,WORD,0,1,0,,...,0.5,0.510759,,,,,,,False,False


In [132]:
# get all unique subjects
subjects = combined_item_df['subject'].unique()
print(subjects)

['R1275D' 'R1292E' 'R1304N' 'R1308T' 'R1315T' 'R1317D' 'R1320D' 'R1321M'
 'R1323T' 'R1328E' 'R1330D' 'R1334T' 'R1339D' 'R1341T' 'R1345D' 'R1351M'
 'R1374T' 'R1375C' 'R1378T' 'R1379E' 'R1380D' 'R1383J' 'R1384J' 'R1385E'
 'R1387E' 'R1390M' 'R1395M' 'R1389J' 'R1401J' 'R1406M' 'R1398J' 'R1441T'
 'R1465D' 'R1487T' 'R1488T' 'R1489E' 'R1491T' 'R1477J' 'R1498D']


In [142]:
# identify stim location
for subject in subjects:
    try:
        df_subject = combined_item_df[combined_item_df['subject'] == subject]  # select data for a particular subject
        exp_sessions = df_subject.groupby(['experiment','session']).size().reset_index() # grab experiment session combo

        for exp_sess in exp_sessions.values:  # loop 
            exp, session = exp_sess[0], exp_sess[1]
            df_subject_session = df_subject[(df_subject['session'] == session) & (df_subject['experiment'] == exp)]
            if np.nanmax(df_subject_session.amplitude) < 1.0:
                stim_items = df_subject_session[df_subject_session['is_stim_item'] == 1]
                locations = stim_items['location'].unique()
                locations = locations[locations != 'nan']
#                 print(locations)
                locations = locations[0].lower() # hard coded for fr3/fr5, for fr6, please change 
                is_LTC = ('left' in locations) and ('temporal' in locations) # select subjects with LTC stimulation
                df_subject.loc[df_subject.session == session, 'is_LTC'] = is_LTC
                combined_item_df.loc[combined_item_df.subject == subject, 'is_LTC'] = df_subject.is_LTC
    except:
        print("{}'s data does not exist".format(subject))  # print subject with error

In [143]:
tbi_frame = pd.read_csv('TBI_info.csv')
tbi_definite_subjects = tbi_frame['Subject'].unique()

tbi_possibly_subjects = ['R1008J', 'R1014D', 'R1016M', 'R1019J', 'R1047D', 'R1067E', 'R1156D', 'R1162N', 'R1195E', 'R1201P', 'R1212P', 'R1337E', 'R1339D', 'R1345D', 'R1379E', 'R1498D', 'R1491T']

# excluded subjects in Youssef's paper
excluded_subjects = ['R1124J','R1145J','R1154D','R1161E','R1163T','R1166D','R1170J','R1195E','R1200T','R1201P','R1202M','R1204T','R1217T','R1223E','R1226D','R1230J','R1235E','R1236J','R1243T','R1247P','R1260D','R1264P','R1274T','R1277J','R1286J']

tbi_subjects = np.append(tbi_definite_subjects, tbi_possibly_subjects)
tbi_mask = np.array([x in tbi_subjects for x in combined_item_df['subject']])
excluded_mask = np.array([x not in excluded_subjects for x in combined_item_df['subject']])
tbi_mask = tbi_mask & excluded_mask

combined_item_df = combined_item_df[tbi_mask & combined_item_df.is_LTC]
combined_item_df['is_tbi'] = True
combined_item_df = combined_item_df[combined_item_df['list'] > 3]  # exclude first 3 lists
today = pd.Timestamp.today().date()  # get date
combined_item_df.to_csv(experiment + '_all_amp_' +  str(today) + '.csv')

In [144]:
combined_item_df['subject'].unique()

array(['R1374T', 'R1379E', 'R1465D', 'R1487T', 'R1488T', 'R1489E',
       'R1491T', 'R1498D'], dtype=object)