# Fetching Pre and Post synaptic Neuron Location

## Libraries

In [1]:
! pip install standard-transform
! pip install --upgrade caveclient
! pip install tqdm

Defaulting to user installation because normal site-packages is not writeable
Collecting standard-transform
  Downloading standard-transform-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: standard-transform
  Building wheel for standard-transform (setup.py) ... [?25ldone
[?25h  Created wheel for standard-transform: filename=standard_transform-1.4.0-py3-none-any.whl size=22356 sha256=2a168c27c0e5126a86ef33878748015b943ffb1e687a1ef6682f96c13b17f690
  Stored in directory: /home/vscode/.cache/pip/wheels/ad/3f/3f/2d6f4450cc3c31eb60e2dc6c4a24648a2e9563b610b26c9b67
Successfully built standard-transform
Installing collected packages: standard-transform
Successfully installed standard-transform-1.4.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --u

In [1]:
import numpy as np
import pandas as pd
from caveclient import CAVEclient
from microns_phase3 import nda
from standard_transform import minnie_transform_vx
from tqdm.notebook import tqdm_notebook

Connecting microns@db.datajoint.com:3306


Could not create the ~log table
Could not access table `microns_phase3_nda`.`~log`


## Preparation

Function that transforms the voxel coorindate into a depth coordinate (function given to us by the authors of the Microns Project)

In [3]:
tform_vx = minnie_transform_vx()

### Querying the data from CAVEclient

In [4]:
client = CAVEclient()

In [5]:
client.auth.save_token("64ea0271652fd86dc7fb6376cd9d52b9")

We will be using the latest public release of the dataset, i.e. version 661

In [2]:
client = CAVEclient('minnie65_public')

In [3]:
client.materialize.version = 661

In [8]:
client.materialize.get_tables()

['synapses_pni_2',
 'baylor_gnn_cell_type_fine_model_v2',
 'nucleus_alternative_points',
 'connectivity_groups_v507',
 'proofreading_status_public_release',
 'allen_column_mtypes_v1',
 'allen_v1_column_types_slanted_ref',
 'aibs_column_nonneuronal_ref',
 'nucleus_ref_neuron_svm',
 'aibs_soma_nuc_exc_mtype_preds_v117',
 'baylor_log_reg_cell_type_coarse_v1',
 'apl_functional_coreg_forward_v5',
 'nucleus_detection_v0',
 'aibs_soma_nuc_metamodel_preds_v117',
 'coregistration_manual_v3']

In [9]:
tables = client.materialize.get_tables()

In [9]:
tables_dfs = []
for i in range(1, len(tables)):
    tables_dfs.append(client.materialize.query_table(tables[i]))

Table Owner Notice on nucleus_ref_neuron_svm: Please cite https://doi.org/10.1101/2022.07.20.499976 when using this table.


In [10]:
df = pd.concat(tables_dfs)
df.drop_duplicates(subset='pt_root_id', inplace=True)
# df.set_index('pt_root_id', inplace=True)

In [11]:
df.head()

Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,...,superceded_id,valid_id,status_dendrite,status_axon,session,scan_idx,unit_id,field,residual,score
0,18023.0,2020-09-28 22:43:00.306675+00:00,t,264.791327,75934266147628505,864691135207734905,4490,2022-12-16 22:26:46.784878+00:00,t,18023.0,...,,,,,,,,,,
1,18312.0,2020-09-28 22:44:09.407821+00:00,t,221.584753,75441272688753483,864691135758479438,28785,2022-12-16 22:28:23.869072+00:00,t,18312.0,...,,,,,,,,,,
2,255686.0,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,2439,2022-12-16 22:26:45.373463+00:00,t,255686.0,...,,,,,,,,,,
3,204945.0,2020-09-28 22:44:25.115874+00:00,t,250.471875,84466820245155764,864691135208560505,31608,2022-12-16 22:28:25.882301+00:00,t,204945.0,...,,,,,,,,,,
4,20997.0,2020-09-28 22:44:49.318428+00:00,t,285.783163,75161652802404739,864691136085125484,31231,2022-12-16 22:28:25.616224+00:00,t,20997.0,...,,,,,,,,,,


In [12]:
root_idxs = list(df.index)

These are all the available tables, but we are interested in the *coregistration_manual_v3*, that is the one containing the information about the manually proofread neurons and in the *synapses_pni_2*, which contains all the traced synapses.

In [4]:
df = client.materialize.query_table('coregistration_manual_v3')

In [11]:
df.sample(1)

Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position
8013,425900,2020-09-28 22:45:17.471883+00:00,t,359.609139,99936193001094277,864691135697891349,575,2023-04-05 22:38:09.698548+00:00,t,425900,9,3,5464,4,0.523058,12.0251,"[255568, 154512, 19935]","[nan, nan, nan]","[nan, nan, nan]"


We start by setting as the index he ID associated to the root of the synapse

In [11]:
df = df.set_index('pt_root_id')

In [6]:
df.head()

Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position
0,582276,2020-09-28 22:41:42.536121+00:00,t,274.291589,111332012547320963,864691135738685297,13434,2023-04-05 22:39:53.474202+00:00,t,582276,7,5,4909,4,6.10722,9.56869,"[338720, 125232, 19589]","[nan, nan, nan]","[nan, nan, nan]"
1,194144,2020-09-28 22:42:01.511773+00:00,t,213.307228,83542405709639148,864691135614842827,3045,2023-04-05 22:39:03.110911+00:00,t,194144,7,4,9575,6,8.55708,-0.7149,"[136400, 170640, 17951]","[nan, nan, nan]","[nan, nan, nan]"
2,293318,2020-09-28 22:45:23.572432+00:00,t,430.10818,90783515419343652,864691135502985397,1474,2023-04-05 22:36:19.477499+00:00,t,293318,9,4,8066,6,5.63758,8.915025,"[189472, 119296, 26037]","[nan, nan, nan]","[nan, nan, nan]"
3,332833,2020-09-28 22:44:41.864456+00:00,t,274.418729,93535317407139914,864691135387371905,7915,2023-04-05 22:39:15.451639+00:00,t,332833,6,7,6413,6,5.27806,7.808064,"[209328, 174304, 20004]","[nan, nan, nan]","[nan, nan, nan]"
4,295460,2020-09-28 22:41:24.368712+00:00,t,208.616653,90225718084406516,864691135661410544,11589,2023-04-05 22:39:47.420305+00:00,t,295460,9,3,7748,6,5.20837,2.999543,"[185088, 157776, 15691]","[nan, nan, nan]","[nan, nan, nan]"


In [5]:
df.shape

(13925, 19)

In [12]:
unit_keys = df[['session', 'scan_idx', 'unit_id']].to_dict(orient='records')

In [7]:
unit_keys_1 = df.iloc[:5000, :][['session', 'scan_idx', 'unit_id']].to_dict(orient='records')
unit_keys_2 = df.iloc[5000:10000, :][['session', 'scan_idx', 'unit_id']].to_dict(orient='records')
unit_keys_3 = df.iloc[10000:13925, :][['session', 'scan_idx', 'unit_id']].to_dict(orient='records')

In [8]:
temp_1 = (nda.AreaMembership & unit_keys_1).fetch('brain_area')
print('1/3 Completed!')
temp_2 = (nda.AreaMembership & unit_keys_2).fetch('brain_area')
print('2/3 Completed!')
temp_3 = (nda.AreaMembership & unit_keys_3).fetch('brain_area')
print('3/3 Completed!')
area_membership = np.concatenate((temp_1, temp_2, temp_3), axis=0)
assert len(area_membership) == df.shape[0]

1/3 Completed!
2/3 Completed!
3/3 Completed!


In [9]:
df['Area Membership'] = area_membership
df.head()

Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position,Area Membership
0,582276,2020-09-28 22:41:42.536121+00:00,t,274.291589,111332012547320963,864691135738685297,13434,2023-04-05 22:39:53.474202+00:00,t,582276,7,5,4909,4,6.10722,9.56869,"[338720, 125232, 19589]","[nan, nan, nan]","[nan, nan, nan]",V1
1,194144,2020-09-28 22:42:01.511773+00:00,t,213.307228,83542405709639148,864691135614842827,3045,2023-04-05 22:39:03.110911+00:00,t,194144,7,4,9575,6,8.55708,-0.7149,"[136400, 170640, 17951]","[nan, nan, nan]","[nan, nan, nan]",V1
2,293318,2020-09-28 22:45:23.572432+00:00,t,430.10818,90783515419343652,864691135502985397,1474,2023-04-05 22:36:19.477499+00:00,t,293318,9,4,8066,6,5.63758,8.915025,"[189472, 119296, 26037]","[nan, nan, nan]","[nan, nan, nan]",V1
3,332833,2020-09-28 22:44:41.864456+00:00,t,274.418729,93535317407139914,864691135387371905,7915,2023-04-05 22:39:15.451639+00:00,t,332833,6,7,6413,6,5.27806,7.808064,"[209328, 174304, 20004]","[nan, nan, nan]","[nan, nan, nan]",V1
4,295460,2020-09-28 22:41:24.368712+00:00,t,208.616653,90225718084406516,864691135661410544,11589,2023-04-05 22:39:47.420305+00:00,t,295460,9,3,7748,6,5.20837,2.999543,"[185088, 157776, 15691]","[nan, nan, nan]","[nan, nan, nan]",V1


In [12]:
df[['pt_root_id', 'Area Membership']].to_csv('root_area_membership.csv', index=False)

In [14]:
root_ids = df.index.tolist()
len(root_ids)

13925

In [16]:
subset_ids = root_ids

In [19]:
input_values = []
output_values = []
v_area_root = []
v_area_post = []

for cont, i in enumerate(subset_ids):

    if cont % 500 == 0:
        print(cont)
    
    input_df = client.materialize.synapse_query(post_ids=i)
    
    input_df.rename(columns = {'pre_pt_root_id':'pt_root_id'}, inplace = True) 

    df_matched = pd.merge(input_df, df, on='pt_root_id', how='inner')
    if df_matched.empty:
        continue
    #This is the correct depth value that is retrieved by a special function created by the MICrONS team    
    input_vertices_list = tform_vx.apply(df_matched['pt_position'])
    graph_x_stack = np.vstack(input_vertices_list)
    graph_x_values = graph_x_stack[:,1]

    output_vertices = tform_vx.apply(df.at[i,'pt_position'])
    graph_y_value = output_vertices[1]
    if type(graph_y_value) != np.float64:
        continue
    
    num_points = len(graph_x_values)
    input_values.extend(graph_x_values)
    output_values.extend([graph_y_value]*num_points)
    v_area_root.extend(df.loc[df_matched['pt_root_id']]['Area Membership'])
    v_area_post.extend([df.loc[i]['Area Membership']]*num_points)
    


0


500
1000
1500
2000
2500
3000
3500


In [24]:
df_export = pd.DataFrame(list(zip(input_values, output_values, v_area_root, v_area_post)), columns=['Input Depth', 'Output Depth', 'Area Membership Root', 'Area Membership Post'])
df_export.head()

Unnamed: 0,Input Depth,Output Depth,Area Membership Root,Area Membership Post
0,335.410876,349.834836,RL,V1
1,295.587597,349.834836,V1,V1
2,309.880951,349.834836,RL,V1
3,344.816976,349.834836,V1,V1
4,403.543122,349.834836,RL,V1


In [21]:
df_export.to_csv('graph_values_3.csv')

In [22]:
# df_list = [pd.read_csv(f"/workspaces/microns_phase3_nda/personal_notebooks/graph_values_{n}.csv") for n in range(1,17)]
# complete_df = pd.concat(df_list, ignore_index=True)
# complete_df.drop(['Unnamed: 0'], axis=1, inplace=True)
# complete_df.head()

In [23]:
# complete_df.to_csv('all_functional_synapses_with_area_membership.csv')

## trying to extract all the synapses