In [1]:
'''
Created by Brandon Katerman on March 13th, 2022

Last Modified: 04/17/22 by Ricardo Adrogue

Current stats to run on encoding events for RepFR1 -- successor to sme_tstat.py
'''

print("loading modules")
from time import time
import glob
import csv
import os
import numpy as np 
import pandas as pd
pd.set_option("display.max_columns", 200)

import xarray as xr
import scipy.stats as stats
import scipy.spatial as spatial
from sklearn import preprocessing

import cmlreaders as cml
from cmlreaders import CMLReader, get_data_index 

import ptsa 
#from ptsa.data.TimeSeriesX import TimeSeries
from ptsa.data.timeseries import TimeSeries
from ptsa.data import timeseries
from ptsa.data.readers import BaseEventReader

from ptsa.data.filters import MorletWaveletFilter
from ptsa.data.filters import ButterworthFilter

print("functions ... ", end = ' ')
# imports all functions needed for this to work
from run_matched_deliberations import *

loading modules




functions ...  imports imported <3 
functions imported


In [2]:
# makes a list of RepFR1 subjects with electrodes in ROIs
data = cml.get_data_index(kind = 'r1'); data = data[data['experiment'] == 'RepFR1']
# pulls all contacts from the montage
loc = []
for subject, df in data.groupby('subject'):
    for session in pd.unique(df['session']):
        r = cml.CMLReader(subject=subject, experiment='RepFR1', session=session)
        temp = r.load('localization')
        temp['subject'] = pd.Series(subject, index=temp.index)
        temp['session'] = pd.Series(session, index=temp.index)
        loc.append(temp)
all_loc = pd.concat(loc)
all_loc_p = all_loc.loc['pairs']
# loc_p[loc_p['atlases.dk'].]
# all_loc_p['atlases.whole_brain'].unique()


  json_normalize(flat_contact_data).set_index('name'),
  json_normalize(flat_pairs_data).set_index('names')


In [3]:
# describes regions of interest
regions = ['parietal', 'Hippocampus', 'entorhinal', 'Amygdala','parietal', 
           'parahippocampal', 'frontal gyrus', 'inferior frontal gyrus', 'middle frontal gyrus',
          'superior frontal gyrus', 'temporal gyrus', 'MTG', 
           'inferior temporal gyrus', 'superior temporal gyrus', 'MTL']
hemispheres = ['Right', 'Left', '']
# stores subjects by ROI in a dataframe
results = pd.DataFrame(columns=['region', 'num_subs', 'subjects'])
for h in hemispheres:
    for r in regions:
        
        subs = all_loc_p[(all_loc_p['atlases.whole_brain'].str.contains(r)) & all_loc_p['atlases.whole_brain'].
                     str.contains(h)].subject.unique()
        n = all_loc_p[(all_loc_p['atlases.whole_brain'].str.contains(r)) & all_loc_p['atlases.whole_brain'].
                     str.contains(h)].subject.nunique()
        if h == '':
            results = results.append(pd.DataFrame(dict(region = r, num_subs = n, subjects = [subs]), index = [len(results)]))
        else:
            results = results.append(pd.DataFrame(dict(region = h +' ' + r, num_subs = n, subjects = [subs]), index = [len(results)]))
# LTL = results[(results.hemisphere == 'Left') & (results.region == 'temporal')]

# display(results.set_index('region'))


# path = '/scratch/radrogue/RepFR1/'
# print('processed regions:')
# for file in os.listdir(path):
#     d = os.path.join(path, file)
#     if os.path.isdir(d):
#         words = d.split('/')[-1].split('_')
#         print(words[0] + ' ' + words[1])

In [12]:
# set your hemisphere and region here
hemisphere = 'Right'
region = 'MTL'

# selects the subjects with electrodes in your selected region
# MTL is multiple regions, so specifically have to look through this way
if region == 'MTL':
    subs = results[results.region == hemisphere+' '+'parahippocampal'].subjects.iloc[0]
    subs = np.concatenate([subs, results[results.region == hemisphere+' '+'Amygdala'].subjects.iloc[0]])
    subs = np.concatenate([subs, results[results.region == hemisphere+' '+'entorhinal'].subjects.iloc[0]])
    subs = np.unique(subs)
else:
    subs = subs = results[results.region == hemisphere+' '+region].subjects.iloc[0]
print(len(subs), 'with electrodes in localization')


# checks that the pairs in that region were actually recorded from
# We only record 128 channels for most of this data
# Localization includes all electrodes (up to 256)
# So this checks that the electrodes are also in pairs, which only shows pairs where
# data was recorded
pairs = []
for sub in subs:
    data = get_data_index('r1'); data = data[(data.experiment == 'RepFR1') & (data.subject==sub)]
    r = cml.CMLReader(subject=sub, experiment='RepFR1', session = data.session.iloc[0])
    loc = r.load("localization")
    t_pairs = r.load('pairs')
    loc_p = loc.loc['pairs']
    if region == 'MTL':
        f_loc_p = loc_p[(loc_p['atlases.whole_brain'].str.contains(hemisphere)) & 
                        ((loc_p['atlases.whole_brain'].str.contains('parahippocampal')) | (loc_p['atlases.whole_brain'].str.contains('Amygdala')) 
                         | (loc_p['atlases.whole_brain'].str.contains('entorhinal')))]
    else:
        f_loc_p = loc_p[(loc_p['atlases.whole_brain'].str.contains(hemisphere)) & loc_p['atlases.whole_brain'].str.contains(region)]
    pairs_filter = []
    for labels in f_loc_p.index:
        biploar_label = labels[0]+'-'+labels[1]
        pairs_filter.append(biploar_label)
    t_pairs = t_pairs[t_pairs.label.isin(pairs_filter)]
    if t_pairs.empty:
        subs = subs[subs != sub]
    else:
        pairs.append(t_pairs)
print(len(subs), 'with region in localization & pairs')
# print(subs)
# makes lists of hemi and reg as same length as subs array
# this is because Dask requires all of your parameters to have the same shape
hemispheres = []
regions = []
print(subs)
for i in subs:
    regions.append(region)
    hemispheres.append(hemisphere)

11 with electrodes in localization


  json_normalize(flat_contact_data).set_index('name'),
  json_normalize(flat_pairs_data).set_index('names')


9 with region in localization & pairs
['R1501J' 'R1514E' 'R1516E' 'R1528E' 'R1534D' 'R1566D' 'R1582E' 'R1587J'
 'R1604J']


In [13]:
# import Dask and Dask functions to run script on the cluster
import CMLDask
from dask.distributed import wait, as_completed, progress
from dask import config
config.set({'timeouts':{'connect':'90s', 'tcp':'120s'}})
try: client.shutdown()
except: print('no client')

In [14]:
# creates cluster jobs, 1 for each subject, each with 10 GB limit of
# memory to calculate powers for subject and region
# client.map(function, p1, p2, p3)

client = CMLDask.new_dask_client("iEEG_powers", "10GB")
futures = client.map(get_rec_powers, subs, pairs, hemispheres, regions)
# waits until the cluster job is complete
wait(futures)
# gathers any errors
power_errors = client.gather(futures)
# shuts down the cluster
client.shutdown()
# displays errors
power_errors

Unique port for radrogue is 51417
{'dashboard_address': ':51417'}
To view the dashboard, run: 
`ssh -fN radrogue@rhino2.psych.upenn.edu -L 8000:192.168.86.145:51417` in your local computer's terminal (NOT rhino) 
and then navigate to localhost:8000 in your browser


Future exception was never retrieved
future: <Future finished exception=CommClosedError('in <TCP (closed) Scheduler Broadcast local=tcp://192.168.86.145:54756 remote=tcp://192.168.86.135:40950>: Stream is closed')>
Traceback (most recent call last):
  File "/home1/radrogue/.local/lib/python3.7/site-packages/distributed/comm/tcp.py", line 205, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/global/Anaconda/2019-10/lib/python3.7/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.throw(*exc_info)  # type: ignore
  File "/home1/radrogue/.local/lib/python3.7/site-packages/distributed/utils.py", line 221, in quiet
    yield task
  File "/usr/global/Anaconda/2019-10/lib/python3.7/site-packages/tornado/gen.py", line 735, in run
    value = future.result()
  File "/home1/radrogue/.local/lib/p

[['R1501J_0 worked :)', 'R1501J_1 worked :)', 'R1501J_2 worked :)'],
 ['R1514E_0 worked :)'],
 ['R1516E_0 worked :)', 'R1516E_1 worked :)'],
 ['R1528E_0 worked :)', 'R1528E_1 worked :)'],
 ['R1534D_0 worked :)'],
 ['R1566D_1 worked :)', 'R1566D_3 worked :)', 'R1566D_4 worked :)'],
 ['R1582E_0 worked :)', 'R1582E_1 worked :)', 'R1582E_2 worked :)'],
 ['R1587J_1 worked :)', 'R1587J_2 worked :)', 'R1587J_3 worked :)'],
 ['R1604J_0 worked :)']]

In [15]:
# creates new cluster jobs, 1 for each subject, 50GB memory to calculate t-stats
client = CMLDask.new_dask_client("iEEG_stats", "25GB")
futures = client.map(rec_power_statistics, subs, pairs, hemispheres, regions)

# gathers report on how this function ran
# good means it was completed, otherwise shows error message
wait(futures)
ahh = client.gather(futures)

# shuts down client
client.shutdown()
ahh

Unique port for radrogue is 51417
{'dashboard_address': ':51417'}
To view the dashboard, run: 
`ssh -fN radrogue@rhino2.psych.upenn.edu -L 8000:192.168.86.145:51417` in your local computer's terminal (NOT rhino) 
and then navigate to localhost:8000 in your browser


distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
concurrent.futures._base.CancelledError
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
concurrent.futures._base.CancelledError


['R1501J <3',
 'R1514E <3',
 'R1516E <3',
 'R1528E <3',
 'R1534D <3',
 'R1566D failed :( all the input array dimensions for the concatenation axis must match exactly, but along dimension 3, the array at index 0 has size 590 and the array at index 1 has size 2356',
 'R1582E <3',
 'R1587J <3',
 'R1604J <3']

Exception in thread WorkerMemory:
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home1/radrogue/.local/lib/python3.7/site-packages/distributed/comm/core.py", line 286, in connect
    timeout=min(intermediate_cap, time_left()),
  File "/home1/radrogue/.conda/envs/environmentname/lib/python3.7/asyncio/tasks.py", line 442, in wait_for
    return fut.result()
  File "/home1/radrogue/.local/lib/python3.7/site-packages/distributed/comm/tcp.py", line 410, in connect
    convert_stream_closed_error(self, e)
  File "/home1/radrogue/.local/lib/python3.7/site-packages/distributed/comm/tcp.py", line 126, in convert_stream_closed_error
    raise CommClosedError(f"in {obj}: {exc.__class__.__name__}: {exc}") from exc
distributed.comm.core.CommClosedError: in <distributed.comm.tcp.TCPConnector object at 0x2b1a738a5a90>: ConnectionRefusedError: [Errno 111] Connection refused

The a