In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import os
from tqdm.notebook import tqdm
import networkx as nx

In [2]:
# data
data_folder = '../../data/'
files = os.listdir(data_folder)
for f in files:
    if f.endswith('.csv.gz'):
        file_name = f.split('.')[0]
        command = file_name+"= pd.read_csv('../../data/" + f + "')"
        exec(command)
        print(command)

coordinates= pd.read_csv('../../data/coordinates.csv.gz')
names= pd.read_csv('../../data/names.csv.gz')
neurons= pd.read_csv('../../data/neurons.csv.gz')
classification= pd.read_csv('../../data/classification.csv.gz')
synapse_coordinates= pd.read_csv('../../data/synapse_coordinates.csv.gz')
neuropil_synapse_table= pd.read_csv('../../data/neuropil_synapse_table.csv.gz')
labels= pd.read_csv('../../data/labels.csv.gz')
connections= pd.read_csv('../../data/connections.csv.gz')


In [3]:
# replace nan with previous non nan value in the column
synapse_coordinates['pre_root_id'].ffill(inplace=True)
synapse_coordinates['post_root_id'].ffill(inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  synapse_coordinates['pre_root_id'].ffill(inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  synapse_coordinates['post_root_id'].ffill(inplace=True)


In [4]:
def find_neuron_by_hemibrain_label(label):
    return classification[classification['hemibrain_type'].apply(lambda x: label in x if x is not np.nan else False)]['root_id'].values

def root_to_props(root_id):
    input_regions = connections[connections['post_root_id']==root_id]['neuropil']
    if len(input_regions) == 0:
        input_regions = 'UNASGD'
    else:
        input_regions = input_regions.value_counts().idxmax().split('_')[0]
    output_regions = connections[connections['pre_root_id']==root_id]['neuropil']
    if len(output_regions) == 0:
        output_regions = 'UNASGD'
    else:
        output_regions = output_regions.value_counts().idxmax().split('_')[0]
    hemicell_type = classification[classification['root_id']==root_id]['hemibrain_type'].values[0]
    flywirecell_type = classification[classification['root_id']==root_id]['cell_type'].values[0]
    if hemicell_type == '' and flywirecell_type == '':
        celltype = 'unknown'
    elif hemicell_type == '':
        celltype = flywirecell_type
    else:
        celltype = hemicell_type
    flow, super_class, class_, subclass = classification[classification['root_id']==root_id][['flow','super_class','class','sub_class']].values[0]
    return input_regions, output_regions, celltype, flow, super_class, class_, subclass

In [7]:
def find_connections(connections, input_root_ids, output_root_ids, skip_root_ids, max_depth=2, value_type='syn_strength', propagation_mode='multiplicative'):
    """ 
    Find all paths between input and output neurons upto max_depth removing traversed paths
    """

    paths = []

    # for each input neuron find the downstream neurons that are connected
    direct_connections = connections[connections['pre_root_id'].isin(input_root_ids) & connections['post_root_id'].isin(output_root_ids)]

    if len(direct_connections) > 0:
        print('Found {} direct connections at depth {}'.format(len(direct_connections), max_depth))

        for i, row in direct_connections.iterrows():
            paths.append(
                {
                    'path': [row['pre_root_id'], row['post_root_id']],
                    'value': row[value_type]
                }
            )

        # if max_depth is reached return the paths
        if max_depth == 1:
            return paths

        # remove the direct connections
        connections = connections[~connections.index.isin(direct_connections.index)]
        
        # find the downstream neurons
        downstream_neurons = connections[connections['pre_root_id'].isin(output_root_ids)]['post_root_id'].unique()

        # remove the skip root ids
        downstream_neurons = np.setdiff1d(downstream_neurons, skip_root_ids)

        # add the input root ids to skip root ids
        skip_root_ids = np.append(skip_root_ids, input_root_ids)

        # find the paths from downstream neurons to output neurons
        paths_downstream = find_connections(connections, downstream_neurons, output_root_ids, skip_root_ids, max_depth-1, value_type, propagation_mode)

        print('Found {} downstream connections at depth {}'.format(len(paths_downstream), max_depth-1))

        # for each path from downstream neuron to output neuron
        for path in tqdm(paths_downstream):
            # find the connections from input neurons to downstream neurons
            direct_connections = connections[connections['pre_root_id'].isin(input_root_ids) & connections['post_root_id'].isin([path['path'][0]])]
            for i, row in direct_connections.iterrows():
                if propagation_mode == 'multiplicative':
                    value = row[value_type] * path['value']
                elif propagation_mode == 'additive':
                    value = row[value_type] + path['value']
                paths.append(
                    {
                        'path': [row['pre_root_id']] + path['path'],
                        'value': value
                    }
                )
    
    # if no direct connections are found
    else:

        # if max_depth is reached return the paths
        if max_depth == 1:
            return paths
            
        # find the downstream neurons
        downstream_neurons = connections[connections['pre_root_id'].isin(input_root_ids)]['post_root_id'].unique()

        # remove the skip root ids
        downstream_neurons = np.setdiff1d(downstream_neurons, skip_root_ids)

        # add the input root ids to skip root ids
        skip_root_ids = np.append(skip_root_ids, input_root_ids)

        # find the paths from downstream neurons to output neurons
        paths_downstream = find_connections(connections, downstream_neurons, output_root_ids, skip_root_ids, max_depth-1, value_type, propagation_mode)

        print('Found {} downstream connections at depth {}'.format(len(paths_downstream), max_depth))

        # for each path from downstream neuron to output neuron
        for path in tqdm(paths_downstream):
            # find the connections from input neurons to downstream neurons
            direct_connections = connections[connections['pre_root_id'].isin(input_root_ids) & connections['post_root_id'].isin([path['path'][0]])]
            for i, row in direct_connections.iterrows():
                if propagation_mode == 'multiplicative':
                    value = row[value_type] * path['value']
                elif propagation_mode == 'additive':
                    value = row[value_type] + path['value']
                paths.append(
                    {
                        'path': [row['pre_root_id']] + path['path'],
                        'value': value
                    }
                )

    # sort the paths by value
    paths = sorted(paths, key=lambda x: x['value'], reverse=True)
    
    return paths

In [8]:
PFNs = find_neuron_by_hemibrain_label('PFN')
PNs = classification[classification['class']=='ALPN'].root_id.values
MBONs = classification[classification['class']=='MBON'].root_id.values
KCs = classification[classification['class']=='Kenyon_Cell'].root_id.values

In [67]:
# find the connections between PNs and PFNs ignoring the KCs
synapse_connections = connections.copy()
PN_PFN_noKC = find_connections(synapse_connections, PNs, PFNs, KCs, 3, 'syn_strength', 'multiplicative')

Found 154 direct connections at depth 2
Found 3327 direct connections at depth 1
Found 3327 downstream connections at depth 1


  0%|          | 0/3327 [00:00<?, ?it/s]

Found 13561 downstream connections at depth 3


  0%|          | 0/13561 [00:00<?, ?it/s]

In [9]:
synapse_connections = connections.copy()
MBON_PFN = find_connections(synapse_connections, MBONs, PFNs, [], 2, 'syn_strength', 'multiplicative')

Found 772 direct connections at depth 1
Found 772 downstream connections at depth 2


  0%|          | 0/772 [00:00<?, ?it/s]

In [10]:
MBON_PFN

[{'path': [720575940637934308, 720575940625967546, 720575940610403826],
  'value': 0.0070546737213403},
 {'path': [720575940637934308, 720575940625967546, 720575940630602444],
  'value': 0.0070546737213403},
 {'path': [720575940637934308, 720575940625967546, 720575940633946045],
  'value': 0.0070546737213403},
 {'path': [720575940637934308, 720575940625967546, 720575940624039370],
  'value': 0.0070546737213403},
 {'path': [720575940637934308, 720575940625967546, 720575940619069093],
  'value': 0.0070546737213403},
 {'path': [720575940637934308, 720575940625967546, 720575940629448463],
  'value': 0.006294939628272884},
 {'path': [720575940637934308, 720575940625967546, 720575940620476748],
  'value': 0.006212324620284742},
 {'path': [720575940637934308, 720575940625967546, 720575940621292996],
  'value': 0.006208112874779464},
 {'path': [720575940637934308, 720575940625967546, 720575940616136733],
  'value': 0.0061229243619179965},
 {'path': [720575940637934308, 720575940625967546, 7205

In [None]:
root_to_props(720575940606483657)

In [72]:
def propagate(input_root_ids, input_values, value_type='syn_strength'):
    # find all downstream connections
    downstream_connections = connections[connections['pre_root_id'].isin(input_root_ids)]
    # find all downstream neurons
    downstream_neurons = np.unique(downstream_connections['post_root_id'])
    # create a mapping from root id to index
    root_to_input = {r:i for i,r in enumerate(input_root_ids)}
    root_to_output = {r:i for i,r in enumerate(downstream_neurons)}
    # create a connectivity matrix
    connection_matrix = np.zeros((len(downstream_neurons), len(input_root_ids)))
    # fill the matrix
    for i in tqdm(downstream_connections.index):
        pre_id = downstream_connections.loc[i, 'pre_root_id']
        post_id = downstream_connections.loc[i, 'post_root_id']
        pre_index = root_to_input[pre_id]
        post_index = root_to_output[post_id]
        connection_matrix[post_index, pre_index] += downstream_connections.loc[i, value_type]
    # propagate the values
    output_values = np.dot(connection_matrix, input_values)
    return downstream_neurons, output_values

In [74]:
PNs = classification[classification['class']=='ALPN']
left_PNs = PNs[PNs['side']=='left']['root_id'].unique()
right_PNs = PNs[PNs['side']=='right']['root_id'].unique()
len_lPNs, len_rPNs = len(left_PNs), len(right_PNs)
all_PNs = list(left_PNs) + list(right_PNs)

# input values
input_values = np.zeros((len(all_PNs), 1))
input_values[:len_lPNs] = 1
input_values[len_lPNs:] = -1

N_steps = 3
neurons = []
values = []

output_values = input_values
downstream_neurons = all_PNs

neurons.append(downstream_neurons)
values.append(output_values)

for i in range(N_steps):
    print('Step', i, end=': ')
    if os.path.exists('../../figures/ALPN_connectivity/ALPN_output_values_step_{}.npy'.format(i)):
        print('Data exists. Loading... ', end='')
        output_values = np.load('../../figures/ALPN_connectivity/ALPN_output_values_step_{}.npy'.format(i))
        downstream_neurons = np.load('../../figures/ALPN_connectivity/ALPN_downstream_neurons_step_{}.npy'.format(i))
        print('N={}'.format(len(downstream_neurons)), end=' ')
        print('Done.')
    else:
        print('Propagating... ', end='')
        downstream_neurons, output_values = propagate(downstream_neurons, output_values, value_type='syn_strength')
        np.save('../../figures/ALPN_connectivity/ALPN_output_values_step_{}.npy'.format(i), output_values)
        np.save('../../figures/ALPN_connectivity/ALPN_downstream_neurons_step_{}.npy'.format(i), downstream_neurons)
        print('N={}'.format(len(downstream_neurons)), end=' ')
        print('Done.')
    neurons.append(downstream_neurons)
    values.append(output_values)

Step 0: Data exists. Loading... N=11502 Done.
Step 1: Data exists. Loading... N=36589 Done.
Step 2: Data exists. Loading... N=96568 Done.


In [None]:
# remove neurons in later steps that are already present in earlier steps
for i in range(1, N_steps):
    values[i] = np.delete(values[i], np.isin(neurons[i], neurons[i-1]))
    neurons[i] = np.delete(neurons[i], np.isin(neurons[i], neurons[i-1]))



In [None]:
# step 1 neurons
step1_neurons = neurons[1]
step1_values = values[1]

# find the properties of the neurons
step1_props = pd.DataFrame([root_to_props(r) for r in tqdm(step1_neurons)], columns=['input_region', 'output_region', 'celltype', 'flow', 'super_class', 'class', 'subclass'])

step1_props['value'] = step1_values
step1_props['abs_value'] = np.abs(step1_values)
step1_props['neuron'] = step1_neurons

step1_props.sort_values('abs_value', ascending=False, inplace=True)

  0%|          | 0/10919 [00:00<?, ?it/s]

In [85]:
# find overlap with PFNs
np.any(step1_props['neuron'].apply(lambda x: x in PFNs))


False

In [86]:
# step 2 neurons
step2_neurons = neurons[2]
step2_values = values[2]

# find the properties of the neurons
step2_props = pd.DataFrame([root_to_props(r) for r in tqdm(step2_neurons)], columns=['input_region', 'output_region', 'celltype', 'flow', 'super_class', 'class', 'subclass'])

step2_props['value'] = step2_values
step2_props['abs_value'] = np.abs(step2_values)
step2_props['neuron'] = step2_neurons

step2_props.sort_values('abs_value', ascending=False, inplace=True)

  0%|          | 0/25680 [00:00<?, ?it/s]

In [95]:
# find overlap with PFNs
values = step2_props[step2_props['neuron'].apply(lambda x: x in PFNs)]['value'].values
for v in values:
    print(v)

-0.017482172638671065
-0.006107491856677497
-0.004519599264819699
-0.0032573289902279963
-0.003199402124942663
-0.0030537459283387497
-0.0030226466522970353
-0.002979264320330485
-0.0029564344126577517
-0.0023463400960766945
-0.0021448811419452632
-0.002054733229214368
-0.001894815475557597
-0.0018294770108831966
-0.0016814376131085544
-0.001650673474777701
-0.0016499728922041036
-0.0016132997357261294
-0.001561767921286894
-0.001555979658632762
-0.00152239075503946
-0.001491206404510525
-0.001399928134190005
-0.0013811889596016566
-0.0012659477107890642
-0.0012214983713355
-0.0011532058478191662
-0.001106457911304402
-0.0010232272587741718
-0.0010059398352174683
-0.000945296969491935
-0.0009396141317965369
-0.0008424126698865503
-0.0008089393187652304
-0.000744059921625681
-0.0007020105582387919
-0.0007013757755597498
-0.0006930487213251031
-0.0006602693899110804
-0.0006542208930115161
-0.0006158815317657972
-0.00061074918566775
-0.0006035306543278153
-0.000544469550540385
-0.00053951