In [157]:
import torch
from torch_geometric.data import download_url, extract_zip

In [158]:
import os
import shutil
import mat73
import pickle
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.io import loadmat
from sklearn import preprocessing

This notebook should only be needed to run once!

In [159]:
#@title Download the curated open-source worm datasets from host server
#@markdown Downloading can take up to 8 minutes depending on your network speed!

root = os.getcwd()
url = 'https://www.dropbox.com/s/5dv8ezn8ehjwyid/opensource_data.zip?dl=1'
filename = os.path.join('opensource_data.zip')
data_path = os.path.join(os.getcwd(), 'opensource_data')

if not os.path.exists(root):
    os.mkdir(root)
if not os.path.exists(data_path):
    download_url(url=url, folder=os.getcwd(), filename=filename)
    extract_zip(filename, folder=data_path) # extract zip file
    os.unlink(filename) # remove zip file

Downloading https://www.dropbox.com/s/5dv8ezn8ehjwyid/opensource_data.zip?dl=1
Extracting opensource_data.zip


In [160]:
#@title Kato et al., Cell Reports 2015, *Global Brain Dynamics Embed the Motor Command Sequence of Caenorhabditis elegans* 

data_dict = dict()

# 'WT_Stim'
# load the first .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Kato2015', 'WT_Stim.mat'))['WT_Stim']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for i, real_data in enumerate(all_traces):
    worm =  "worm"+str(i+1)
    i_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[i]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(i_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })

# 'WT_NoStim'
# load the second .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Kato2015', 'WT_NoStim.mat'))['WT_NoStim']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['NeuronNames'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['deltaFOverF_bc'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for ii, real_data in enumerate(all_traces):
    worm =  "worm"+str(ii+1 + i+1)
    ii_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[ii]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(ii_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })
    

# pickle the data
file = os.path.join(root, "Kato2015.pickle")
pickle_out = open(file, "wb")
pickle.dump(data_dict, pickle_out)
pickle_out.close()
pickle_in = open(file, "rb")
Kato2015 = pickle.load(pickle_in)
print()
print(Kato2015.keys())
print()

['IDs', 'States', 'dataset', 'fps', 'stimulus', 'timeVectorSeconds', 'traces', 'tracesDif', 'traces_raw']

num. worms: 7

len. Ca recording 2198, total num. neurons 107, num. ID'd neurons 43
len. Ca recording 2017, total num. neurons 122, num. ID'd neurons 44
len. Ca recording 2197, total num. neurons 124, num. ID'd neurons 37
len. Ca recording 2018, total num. neurons 134, num. ID'd neurons 51
len. Ca recording 2201, total num. neurons 123, num. ID'd neurons 48
len. Ca recording 2017, total num. neurons 151, num. ID'd neurons 44
len. Ca recording 2019, total num. neurons 146, num. ID'd neurons 51
['NeuronNames', 'Opts', 'States', 'dataset', 'deltaFOverF', 'deltaFOverF_bc', 'derivs', 'fps', 'stateParams', 'tv']

num. worms: 5

len. Ca recording 3137, total num. neurons 109, num. ID'd neurons 38
len. Ca recording 3134, total num. neurons 135, num. ID'd neurons 44
len. Ca recording 3059, total num. neurons 131, num. ID'd neurons 32
len. Ca recording 3311, total num. neurons 125, num. ID'

In [161]:
#@title Nichols et al., Science 2017, *A global brain state underlies C. elegans sleep behavior* 

data_dict = dict()

# 'n2_let'
# load the first .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Nichols2017', 'n2_let.mat'))['n2_let']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for i, real_data in enumerate(all_traces):
    worm =  "worm"+str(i+1)
    i_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[i]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(i_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })


# 'n2_prelet'
# load the second .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Nichols2017', 'n2_prelet.mat'))['n2_prelet']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for ii, real_data in enumerate(all_traces):
    worm =  "worm"+str(ii+1 + i+1)
    ii_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[ii]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(ii_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })
    

# 'npr1_let'
# load the third .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Nichols2017', 'npr1_let.mat'))['npr1_let']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for iii, real_data in enumerate(all_traces):
    worm =  "worm"+str(iii+1 + ii+1 + i+1)
    iii_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[iii]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(iii_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })

# 'npr1_prelet'
# load the fourth .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Nichols2017', 'npr1_prelet.mat'))['npr1_prelet']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for iv, real_data in enumerate(all_traces):
    worm =  "worm"+str(iv+1 + iii+1 + ii+1 + i+1)
    iv_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[iv]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(iv_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })

# pickle the data
file = os.path.join(root, "Nichols2017.pickle")
pickle_out = open(file, "wb")
pickle.dump(data_dict, pickle_out)
pickle_out.close()
pickle_in = open(file, "rb")
Nichols2017 = pickle.load(pickle_in)
print()
print(Nichols2017.keys())
print()

['FiveStates', 'FourStates', 'IDs', 'dataset', 'fps', 'stimulus', 'timeVectorSeconds', 'traces', 'tracesDif']

num. worms: 12

len. Ca recording 4224, total num. neurons 117, num. ID'd neurons 22
len. Ca recording 3445, total num. neurons 130, num. ID'd neurons 37
len. Ca recording 3792, total num. neurons 121, num. ID'd neurons 35
len. Ca recording 3793, total num. neurons 119, num. ID'd neurons 36
len. Ca recording 4212, total num. neurons 104, num. ID'd neurons 33
len. Ca recording 3642, total num. neurons 121, num. ID'd neurons 26
len. Ca recording 4214, total num. neurons 124, num. ID'd neurons 36
len. Ca recording 3190, total num. neurons 116, num. ID'd neurons 35
len. Ca recording 3646, total num. neurons 113, num. ID'd neurons 40
len. Ca recording 4212, total num. neurons 108, num. ID'd neurons 32
len. Ca recording 3154, total num. neurons 127, num. ID'd neurons 29
len. Ca recording 3034, total num. neurons 104, num. ID'd neurons 36
['FiveStates', 'FourStates', 'IDs', 'dataset'

In [162]:
#@title Nguyen et al., PLOS CompBio 2017, *Automatically tracking neurons in a moving and deforming brain* 

# WORM 1
# load .mat file for  worm 1
arr1 = loadmat(os.path.join(data_path, 'Nguyen2017', 'heatData_worm1.mat')) # load .mat file
print(list(arr1.keys()))
print()
# get data for worm 1
G2 = arr1['G2'] # the ratio signal is defined as gPhotoCorr/rPhotoCorr, the Ratio is then normalized as delta R/ R0. is the same way as R2 and G2.
cgIdx = arr1['cgIdx'].squeeze() # ordered indices derived from heirarchically clustering the correlation matrix. 
real_data1 = G2[cgIdx-1, :].T # to show organized traces, use Ratio2(cgIdx,:)
real_data1 = np.nan_to_num(real_data1) # replace NaNs 
max_time1, num_neurons1 = real_data1.shape 
num_named1 = 0
worm1_IDs = {i+1: str(i+1) for i in range(num_neurons1)}
print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time1, num_neurons1, num_named1))
print()
# normalize the data 
sc = preprocessing.MinMaxScaler()
real_data1 = sc.fit_transform(real_data1[:, :num_neurons1]) 
# add a feature dimension and convert to tensor
real_data1 = np.expand_dims(real_data1, axis=-1)
real_data1 = torch.tensor(real_data1, dtype=torch.float64)


# WORM 2
# load .mat file for  worm 1
arr2 = loadmat(os.path.join(data_path, 'Nguyen2017', 'heatData_worm2.mat')) # load .mat file
print(list(arr2.keys()))
print()
# get data for worm 2
G2 = arr2['G2'] # the ratio signal is defined as gPhotoCorr/rPhotoCorr, the Ratio is then normalized as delta R/ R0. is the same way as R2 and G2.
cgIdx = arr2['cgIdx'].squeeze() # ordered indices derived from heirarchically clustering the correlation matrix. 
real_data2 = G2[cgIdx-1, :].T # to show organized traces, use Ratio2(cgIdx,:)
real_data2 = np.nan_to_num(real_data2) # replace NaNs 
max_time2, num_neurons2 = real_data2.shape 
num_named2 = 0
worm2_IDs = {i+1: str(i+1) for i in range(num_neurons2)}
print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time2, num_neurons2, num_named2))
print()
# normalize the data 
sc = preprocessing.MinMaxScaler()
real_data2 = sc.fit_transform(real_data2[:, :num_neurons2]) 
# add a feature dimension and convert to tensor
real_data2 = np.expand_dims(real_data2, axis=-1)
real_data2 = torch.tensor(real_data2, dtype=torch.float64)


# pickle the data
data_dict = {'worm1': {'data': real_data1, 'neuron_ids': worm1_IDs, 'max_time': max_time1, 
                       'num_neurons': num_neurons1, 'num_named': num_named1}, 
             'worm2': {'data': real_data2, 'neuron_ids': worm2_IDs, 'max_time': max_time2, 
                       'num_neurons': num_neurons2, 'num_named': num_named2},
            }
file = os.path.join(root, "Nguyen2017.pickle")
pickle_out = open(file, "wb")
pickle.dump(data_dict, pickle_out)
pickle_out.close()
pickle_in = open(file, "rb")
Nguyen2017 = pickle.load(pickle_in)
print(Nguyen2017.keys())
print()

['__header__', '__version__', '__globals__', 'hasPointsTime', 'ethoTrack', 'R2', 'G2', 'Ratio2', 'acorr', 'cgIdx', 'cgIdxRev', 'DmatAll']

len. Ca recording 1516, total num. neurons 77, num. ID'd neurons 0

['__header__', '__version__', '__globals__', 'hasPointsTime', 'ethoTrack', 'R2', 'G2', 'Ratio2', 'acorr', 'cgIdx', 'cgIdxRev', 'rRaw', 'gRaw', 'rPhotoCorr', 'gPhotoCorr']

len. Ca recording 2849, total num. neurons 156, num. ID'd neurons 0

dict_keys(['worm1', 'worm2'])



In [163]:
#@title Skora et al., Cell Reports 2018, *Energy Scarcity Promotes a Brain-wide Sleep State Modulated by Insulin Signaling in C. elegans* 

data_dict = dict()

# 'WT_fasted'
# load the first .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Skora2018', 'WT_fasted.mat'))['WT_fasted']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for i, real_data in enumerate(all_traces):
    worm =  "worm"+str(i+1)
    i_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[i]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(i_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })

# 'WT_starved'
# load the second .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Skora2018', 'WT_starved.mat'))['WT_starved']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for ii, real_data in enumerate(all_traces):
    worm =  "worm"+str(ii+1 + i+1)
    ii_IDs = [(j[0] if isinstance(j,list) else j) for j in all_IDs[ii]]
    neuron_IDs = {nid+1: (str(nid+1) if (j is None or isinstance(j, np.ndarray)) else str(j)) for nid,j  in enumerate(ii_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })
    

# pickle the data
file = os.path.join(root, "Skora2018.pickle")
pickle_out = open(file, "wb")
pickle.dump(data_dict, pickle_out)
pickle_out.close()
pickle_in = open(file, "rb")
Skora2018 = pickle.load(pickle_in)
print()
print(Skora2018.keys())
print()

['FourStateKey', 'FourStates', 'IDs', 'dataset', 'fps', 'stimulus', 'timeVectorSeconds', 'traces']

num. worms: 6

len. Ca recording 2597, total num. neurons 139, num. ID'd neurons 40
len. Ca recording 2397, total num. neurons 143, num. ID'd neurons 51
len. Ca recording 2585, total num. neurons 131, num. ID'd neurons 55
len. Ca recording 2209, total num. neurons 114, num. ID'd neurons 48
len. Ca recording 2204, total num. neurons 123, num. ID'd neurons 45
len. Ca recording 2810, total num. neurons 127, num. ID'd neurons 42
['FourStateKey', 'FourStates', 'IDs', 'dataset', 'fps', 'stimulus', 'timeVectorSeconds', 'traces']

num. worms: 6

len. Ca recording 2366, total num. neurons 128, num. ID'd neurons 48
len. Ca recording 2362, total num. neurons 147, num. ID'd neurons 52
len. Ca recording 2353, total num. neurons 127, num. ID'd neurons 45
len. Ca recording 1893, total num. neurons 125, num. ID'd neurons 39
len. Ca recording 1894, total num. neurons 126, num. ID'd neurons 47
len. Ca rec

In [164]:
#@title Kaplan et al., Neuron 2020, *Nested Neuronal Dynamics Orchestrate a Behavioral Hierarchy across Timescales*

data_dict = dict()

# 'RIShisCl_Neuron2019'
# load the first .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Kaplan2020', 'Neuron2019_Data_RIShisCl.mat'))['RIShisCl_Neuron2019']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['neuron_ID'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces_bleach_corrected'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()

for i, real_data in enumerate(all_traces):
    worm =  "worm"+str(i+1)
    neuron_IDs = {nid+1: str(j) for nid, j in enumerate(all_IDs[i])} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 
                             'num_named': num_named},
                     })
    

# 'MNhisCl_RIShisCl_Neuron2019'
# load the second .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Kaplan2020', 'Neuron2019_Data_MNhisCl_RIShisCl.mat'))['MNhisCl_RIShisCl_Neuron2019']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['neuron_ID'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces_bleach_corrected'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()
for ii, real_data in enumerate(all_traces):
    worm =  "worm"+str(ii+1 + i+1)
    neuron_IDs = {nid+1: str(j) for nid, j in enumerate(all_IDs[ii])} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 
                             'num_named': num_named},
                     })
    

# 'MNhisCl_RIShisCl_Neuron2019'
# load the third .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Kaplan2020', 'Neuron2019_Data_SMDhisCl_RIShisCl.mat'))['SMDhisCl_RIShisCl_Neuron2019']
print(list(arr.keys()))
print()
# get data for all worms
all_IDs = arr['neuron_ID'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces_bleach_corrected'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()
for iii, real_data in enumerate(all_traces):
    worm =  "worm"+str(iii+1 + ii+1 + i+1)
    neuron_IDs = {nid+1: str(j) for nid, j in enumerate(all_IDs[iii])} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 
                             'num_named': num_named},
                     })

    
# pickle the data
file = os.path.join(root, "Kaplan2020.pickle")
pickle_out = open(file, "wb")
pickle.dump(data_dict, pickle_out)
pickle_out.close()
pickle_in = open(file, "rb")
Kaplan2020 = pickle.load(pickle_in)
print()
print(Kaplan2020.keys())
print()

ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)


['five_state_annotation', 'neuron_ID', 'parameters', 'peaks', 'raw_traces', 'time_vector', 'traces_bleach_corrected', 'traces_bleach_corrected_detrended', 'traces_derivatives', 'volumes_per_second']

num. worms: 10

len. Ca recording 5455, total num. neurons 103, num. ID'd neurons 45
len. Ca recording 5455, total num. neurons 129, num. ID'd neurons 53
len. Ca recording 5455, total num. neurons 122, num. ID'd neurons 48
len. Ca recording 5455, total num. neurons 119, num. ID'd neurons 53
len. Ca recording 5455, total num. neurons 124, num. ID'd neurons 53
len. Ca recording 3578, total num. neurons 111, num. ID'd neurons 40
len. Ca recording 3269, total num. neurons 128, num. ID'd neurons 37
len. Ca recording 2941, total num. neurons 127, num. ID'd neurons 48
len. Ca recording 4010, total num. neurons 129, num. ID'd neurons 45
len. Ca recording 3692, total num. neurons 124, num. ID'd neurons 44


ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)


['five_state_annotation', 'neuron_ID', 'parameters', 'peaks', 'raw_traces', 'time_vector', 'traces_bleach_corrected', 'traces_bleach_corrected_detrended', 'traces_derivatives', 'volumes_per_second']

num. worms: 4

len. Ca recording 2812, total num. neurons 102, num. ID'd neurons 28
len. Ca recording 2917, total num. neurons 99, num. ID'd neurons 29
len. Ca recording 3163, total num. neurons 107, num. ID'd neurons 28
len. Ca recording 2795, total num. neurons 97, num. ID'd neurons 26


ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)
ERROR:root:ERROR: MATLAB type not supported: function_handle_workspace, (uint32)


['five_state_annotation', 'neuron_ID', 'parameters', 'peaks', 'raw_traces', 'time_vector', 'traces_bleach_corrected', 'traces_bleach_corrected_detrended', 'traces_derivatives', 'volumes_per_second']

num. worms: 5

len. Ca recording 3805, total num. neurons 105, num. ID'd neurons 25
len. Ca recording 3803, total num. neurons 114, num. ID'd neurons 30
len. Ca recording 3657, total num. neurons 123, num. ID'd neurons 27
len. Ca recording 2808, total num. neurons 97, num. ID'd neurons 28
len. Ca recording 3157, total num. neurons 117, num. ID'd neurons 23

dict_keys(['worm1', 'worm2', 'worm3', 'worm4', 'worm5', 'worm6', 'worm7', 'worm8', 'worm9', 'worm10', 'worm11', 'worm12', 'worm13', 'worm14', 'worm15', 'worm16', 'worm17', 'worm18', 'worm19'])



In [165]:
#@title Uzel et al 2022., Cell CurrBio 2022, *A set of hub neurons and non-local connectivity features support global brain dynamics in C. elegans*

data_dict = dict()

# load .mat file
arr = mat73.loadmat(os.path.join(data_path, 'Uzel2022', 'Uzel_WT.mat'))['Uzel_WT'] # load .mat file
print(list(arr.keys()))
print()

# get data for all worms
all_IDs = arr['IDs'] # identified neuron IDs (only subset have neuron names)
all_traces = arr['traces'] # neural activity traces corrected for bleaching
print('num. worms:', len(all_IDs))
print()
for i, real_data in enumerate(all_traces):
    worm =  "worm"+str(i+1)
    i_IDs = [np.array(j).item() for j in all_IDs[i]]
    neuron_IDs = {nid+1: (str(int(j)) if type(j)!=str else j) for nid,j  in enumerate(i_IDs)} 
    neuron_IDs = {nid: (name.replace('0','') if not name.endswith('0') and not name.isnumeric() else name) for nid, name in neuron_IDs.items()}
    max_time, num_neurons = real_data.shape  
    num_named = len([v for v in neuron_IDs.values() if not v.isnumeric()]) # number of neurons that were ID'd
    print("len. Ca recording %s, total num. neurons %s, num. ID'd neurons %s"%(
        max_time, num_neurons, num_named))
    sc = preprocessing.MinMaxScaler() # normalize data
    real_data = sc.fit_transform(real_data[:, :num_neurons]) 
    real_data = np.expand_dims(real_data, axis=-1)
    real_data = torch.tensor(real_data, dtype=torch.float64) # add a feature dimension and convert to tensor
    data_dict.update({worm: {'data': real_data, 'neuron_ids': neuron_IDs, 
                             'max_time': max_time, 'num_neurons': num_neurons, 'num_named': num_named},
                     })
    
# pickle the data
file = os.path.join(root, "Uzel2022.pickle")
pickle_out = open(file, "wb")
pickle.dump(data_dict, pickle_out)
pickle_out.close()
pickle_in = open(file, "rb")
Uzel2022 = pickle.load(pickle_in)
print(Uzel2022.keys())
print()

['IDs', 'derivatives', 'fps', 'options', 'states', 'statesKey', 'traces', 'tv']

num. worms: 6

len. Ca recording 3312, total num. neurons 154, num. ID'd neurons 58
len. Ca recording 3749, total num. neurons 154, num. ID'd neurons 54
len. Ca recording 4126, total num. neurons 133, num. ID'd neurons 48
len. Ca recording 5450, total num. neurons 124, num. ID'd neurons 46
len. Ca recording 3313, total num. neurons 136, num. ID'd neurons 51
len. Ca recording 3311, total num. neurons 133, num. ID'd neurons 47
dict_keys(['worm1', 'worm2', 'worm3', 'worm4', 'worm5', 'worm6'])



In [166]:
#@title Delete the downloaded raw datasets.
#@markdown The files are too large to push to GitHub.

shutil.rmtree(data_path)