In [1]:
%load_ext autoreload
%autoreload 2

In [54]:
from preprocess._utils import Flavell2023Preprocessor, Nichols2017Preprocessor, Kato2015Preprocessor
from preprocess._pkg import *
import matplotlib.pyplot as plt

In [55]:
config = OmegaConf.load("/home/lrvnc/Projects/worm-graph/conf/preprocess.yaml")
config['preprocess'].keys()
dataset = Flavell2023Preprocessor(
                                    transform=StandardScaler(),
                                    smooth_method=config.preprocess.smooth,
                                    resample_dt=config.preprocess.resample_dt,
)

In [56]:
filename = '/home/lrvnc/Projects/worm-graph/opensource_data/Flavell2023/2023-03-07-01.json'
data = dataset.load_data(filename)

In [57]:
data.keys()

dict_keys(['avg_timestep', 'rel_enc_str_θh', 'dorsalness', 'head_curvature', 'trace_array', 'angular_velocity', 'rel_enc_str_P', 'reversal_events', 'encoding_changing_neurons', 'feedingness', 'uid', 'ranges', 'labeled', 'velocity', 'body_curvature', 'forwardness', 'tau_vals', 'num_neurons', 'rel_enc_str_v', 'max_t', 'dataset_type', 'pumping', 'neuron_categorization'])

In [58]:
# Relevant features are: IDs, traces and time_vector

avg_time = data['avg_timestep']
max_t = data['max_t']
raw_traces = data['trace_array']
ids = data['labeled']

In [62]:
seq_len = []
for trace in raw_traces:
    seq_len.append(len(trace))
seq_len = np.array(seq_len)
print(np.count_nonzero(np.where(seq_len != max_t)))
# All traces have the same length !

0


In [63]:
# Create time vector
raw_timeVectorSeconds = np.arange(0, max_t*avg_time, avg_time)

In [196]:
# Trace data is an array
raw_traces = [np.array(trace) for trace in raw_traces]
raw_traces

[array([-0.47278601, -0.39202656, -0.54950041, ..., -0.33404995,
        -0.33378048, -0.33351097]),
 array([-0.91852817, -0.91840091, -1.00843827, ...,  0.02763032,
         0.02789118,  0.02815208]),
 array([ 1.60647478,  2.65702913,  1.99517219, ..., -1.20265624,
        -1.20224991, -1.20184353]),
 array([ 2.59043728,  2.36485264,  2.12907464, ..., -0.54228451,
        -0.54213745, -0.54199037]),
 array([ 2.04013279,  0.66039735,  1.80568331, ..., -0.16290591,
        -0.16235193, -0.16179788]),
 array([ 2.48168336,  2.9590653 ,  3.24338898, ..., -0.49181899,
        -0.49142208, -0.49102512]),
 array([ 2.08016614,  1.92797078,  1.70829216, ..., -0.89315372,
        -0.8928968 , -0.89263984]),
 array([-1.32941659, -1.16032617, -1.20949135, ...,  3.01322474,
         3.01412987,  3.01503512]),
 array([-1.55128705, -1.45177398, -1.56107087, ...,  0.93188998,
         0.93272511,  0.93356035]),
 array([-0.02768623, -0.35267365, -1.29838946, ...,  1.70100857,
         1.70246064,  1.70

In [195]:
# We need to organize the data such as the traces are in the same order as the IDs

neuron_IDs = [str(i) for i in range(len(raw_traces))]

for i in ids.keys():
    label = ids[str(i)]['label']
    neuron_IDs[int(i)-1] = label

print(neuron_IDs)

# Treat the '?' labels
for i in range(len(neuron_IDs)):

    label = neuron_IDs[i]

    if not label.isnumeric():
        
        if '?' in label:
            # Find the group which the neuron belongs to
            label_split = label.split('?')[0]
            # Verify possible labels
            possible_labels = [neuron_name for neuron_name in NEURONS_302 if label_split in neuron_name]
            # Exclude possibilities that we already have
            possible_labels = [neuron_name for neuron_name in possible_labels if neuron_name not in neuron_IDs]
            # Random pick one of the possibilities
            neuron_IDs[i] = random.choice(possible_labels)
    
print(neuron_IDs)

['RID', 'RIMR', 'URYDR', 'RIH', 'RMDD?', '5', 'OLQVL', 'ADLL', '8', 'RMGL', 'RIML', 'IL2VR', '12', '13', '14', 'I1L', '16', '17', 'ADAL', '19', 'RMDVR', '21', '22', '23', 'OLLL', '25', 'URBR', 'ALA', '28', '29', 'CEPDR', 'SAAD?', 'RMDVL', 'AVAL', '34', '35', 'IL2VL', '37', '38', '39', '40', 'ASHR', 'IL1DR', 'URYVL', '44', 'ASHL', '46', 'M3R', 'RIVL', 'I1R', 'ASEL', 'CEPVL', 'IL1L', 'CEPVR', 'URADL', '55', 'I2R', 'AUAL', 'I2L', 'RMER', 'OLQDL', 'SMDDL', 'AVAR', 'AUAR', '64', 'URXL', 'AWCR', '67', 'BAGR', '69', '70', '71', '72', 'M3L', 'SAAD?', '75', '76', 'AVHL', '78', 'OLQDR', '80', '81', '82', '83', 'RMDD?', '85', 'AIBR', '87', '88', '89', 'URYVR', 'OLLR', 'URXR', '93', 'URBL', 'ASGL', '96', '97', 'NSML', 'RMDR', '100', '101', 'RMDL', 'AWBL', '104', '105', 'AVL', 'RICL', '108', 'AWAR', 'SMDDR', '111', 'IL2DL', 'VB02', 'RMEL', 'AVER', '116', 'AIY?', '118', '119', '120', 'CEPDL', 'URAVL', '123', '124', 'OLQVR', 'AQR', 'RIAL', '128', 'AVDL', '130', '131', 'IL1R', '133', '134', 'MI', 'FLP

In [144]:
print(NEURONS_302)

['ADAL', 'ADAR', 'ADEL', 'ADER', 'ADFL', 'ADFR', 'ADLL', 'ADLR', 'AFDL', 'AFDR', 'AIAL', 'AIAR', 'AIBL', 'AIBR', 'AIML', 'AIMR', 'AINL', 'AINR', 'AIYL', 'AIYR', 'AIZL', 'AIZR', 'ALA', 'ALML', 'ALMR', 'ALNL', 'ALNR', 'AQR', 'AS1', 'AS10', 'AS11', 'AS2', 'AS3', 'AS4', 'AS5', 'AS6', 'AS7', 'AS8', 'AS9', 'ASEL', 'ASER', 'ASGL', 'ASGR', 'ASHL', 'ASHR', 'ASIL', 'ASIR', 'ASJL', 'ASJR', 'ASKL', 'ASKR', 'AUAL', 'AUAR', 'AVAL', 'AVAR', 'AVBL', 'AVBR', 'AVDL', 'AVDR', 'AVEL', 'AVER', 'AVFL', 'AVFR', 'AVG', 'AVHL', 'AVHR', 'AVJL', 'AVJR', 'AVKL', 'AVKR', 'AVL', 'AVM', 'AWAL', 'AWAR', 'AWBL', 'AWBR', 'AWCL', 'AWCR', 'BAGL', 'BAGR', 'BDUL', 'BDUR', 'CANL', 'CANR', 'CEPDL', 'CEPDR', 'CEPVL', 'CEPVR', 'DA1', 'DA2', 'DA3', 'DA4', 'DA5', 'DA6', 'DA7', 'DA8', 'DA9', 'DB1', 'DB2', 'DB3', 'DB4', 'DB5', 'DB6', 'DB7', 'DD1', 'DD2', 'DD3', 'DD4', 'DD5', 'DD6', 'DVA', 'DVB', 'DVC', 'FLPL', 'FLPR', 'HSNL', 'HSNR', 'I1L', 'I1R', 'I2L', 'I2R', 'I3', 'I4', 'I5', 'I6', 'IL1DL', 'IL1DR', 'IL1L', 'IL1R', 'IL1VL', 'IL

In [100]:
raw_traces[0]

[-0.47278600799588,
 -0.39202655683483384,
 -0.5495004106689969,
 -0.6749240443218124,
 -0.7375346708976944,
 -0.7711430481730215,
 -0.9357103386585586,
 -0.9213488225889649,
 -0.9042836541544742,
 -0.8867327230088176,
 -0.8526244937766984,
 -0.9474635251555126,
 -1.133318933532301,
 -1.1460165448813922,
 -1.1593095268512823,
 -1.066503787160187,
 -1.1056622047276654,
 -1.103160398121442,
 -1.0586602006191315,
 -1.0646615667470687,
 -1.0878625642504827,
 -1.0626662616783336,
 -1.0715910696090396,
 -1.164616469325134,
 -1.1571606152009861,
 -1.1365157960686858,
 -1.194993465428579,
 -1.3513442986733801,
 -1.2619901340026611,
 -1.235245589975193,
 -1.3087697044596196,
 -1.3741606525497696,
 -1.4326978095164282,
 -1.432101701073338,
 -1.3866093114922193,
 -1.5023459097065512,
 -1.4947758115824765,
 -1.493546562558477,
 -1.5156689023813197,
 -1.539816810415137,
 -1.5664205306969392,
 -1.570904026458195,
 -1.5750278948857739,
 -1.5269516746428193,
 -1.487226363159615,
 -1.5139164793689495,


In [112]:
dataset = Flavell2023Preprocessor(
                                    transform=StandardScaler(),
                                    smooth_method=config.preprocess.smooth,
                                    resample_dt=config.preprocess.resample_dt,
)
preprocessed_Data = dataset.preprocess()

TypeError: list indices must be integers or slices, not tuple

In [111]:
raw_timeVectorSeconds

[array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.00529678e-02, ...,
        1.60122948e+01, 1.60223213e+01, 1.60323477e+01]),
 array([0.00000000e+00, 1.00264839e-02, 2.005296

In [51]:
nichols = Nichols2017Preprocessor(
                                    transform=StandardScaler(),
                                    smooth_method=config.preprocess.smooth,
                                    resample_dt=config.preprocess.resample_dt,
)

In [108]:
raw_data_nichols = nichols.load_data("n2_let.mat")["n2_let"]
neuron_IDs, traces, raw_timeVectorSeconds = nichols.extract_data(raw_data_nichols)
# Just one worm
neuron_IDs = neuron_IDs[0]
traces = traces[0]
raw_timeVectorSeconds = raw_timeVectorSeconds[0]

In [109]:
print('neuron_IDs: len = {}, traces: shape = {}, raw_timeVectorSeconds: len = {}'.format(len(neuron_IDs), traces.shape, len(raw_timeVectorSeconds)))

neuron_IDs: len = 117, traces: shape = (4224, 117), raw_timeVectorSeconds: len = 4224


In [48]:
neuron_IDs

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 ['IL2DL'],
 None,
 None,
 None,
 None,
 None,
 None,
 ['URYDR'],
 [None, 'OLQVL'],
 None,
 None,
 ['URYDL'],
 None,
 None,
 ['BAGL'],
 None,
 ['BAGR'],
 ['RMED'],
 None,
 None,
 ['RID'],
 None,
 None,
 None,
 ['ALA'],
 ['AVAL'],
 None,
 ['URXL'],
 None,
 None,
 ['RMEV'],
 None,
 ['ASKL'],
 None,
 None,
 None,
 None,
 None,
 ['AFDR'],
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 ['RIS'],
 None,
 ['VB02'],
 None,
 None,
 ['AVFL'],
 None,
 ['AVFR'],
 None,
 ['VB01'],
 None,
 None,
 None,
 None,
 None,
 ['DB02'],
 None,
 ['AQR'],
 None,
 ['AUAL'],
 ['AUAR'],
 None]