# Fetching Pre and Post synaptic Neuron Location

## Libraries

In [1]:
! pip install standard-transform
! pip install --upgrade caveclient

Defaulting to user installation because normal site-packages is not writeable
Collecting standard-transform
  Downloading standard-transform-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: standard-transform
  Building wheel for standard-transform (setup.py) ... [?25ldone
[?25h  Created wheel for standard-transform: filename=standard_transform-1.4.0-py3-none-any.whl size=22356 sha256=11d0360b083583c68c5c82045f06bd9d122f92a87f60ef7d42491bf41d306a60
  Stored in directory: /home/vscode/.cache/pip/wheels/ad/3f/3f/2d6f4450cc3c31eb60e2dc6c4a24648a2e9563b610b26c9b67
Successfully built standard-transform
Installing collected packages: standard-transform
Successfully installed standard-transform-1.4.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install -

In [2]:
import numpy as np
import pandas as pd
from caveclient import CAVEclient
from microns_phase3 import nda
from standard_transform import minnie_transform_vx

Connecting microns@db.datajoint.com:3306


Could not create the ~log table
Could not access table `microns_phase3_nda`.`~log`


## Preparation

Function that transforms the voxel coorindate into a depth coordinate (function given to us by the authors of the Microns Project)

In [3]:
tform_vx = minnie_transform_vx()

### Querying the data from CAVEclient

In [4]:
client = CAVEclient()

In [5]:
client.auth.save_token("64ea0271652fd86dc7fb6376cd9d52b9")

We will be using the latest public release of the dataset, i.e. version 661

In [6]:
client = CAVEclient('minnie65_public')

In [7]:
client.materialize.version = 661

In [8]:
client.materialize.get_tables()

['synapses_pni_2',
 'baylor_gnn_cell_type_fine_model_v2',
 'nucleus_alternative_points',
 'connectivity_groups_v507',
 'proofreading_status_public_release',
 'allen_column_mtypes_v1',
 'allen_v1_column_types_slanted_ref',
 'aibs_column_nonneuronal_ref',
 'nucleus_ref_neuron_svm',
 'aibs_soma_nuc_exc_mtype_preds_v117',
 'baylor_log_reg_cell_type_coarse_v1',
 'apl_functional_coreg_forward_v5',
 'nucleus_detection_v0',
 'aibs_soma_nuc_metamodel_preds_v117',
 'coregistration_manual_v3']

These are all the available tables, but we are interested in the *coregistration_manual_v3*, that is the one containing the information about the manually proofread neurons and in the *synapses_pni_2*, which contains all the traced synapses.

In [9]:
df = client.materialize.query_table('coregistration_manual_v3')

We start by setting as the index he ID associated to the root of the synapse

In [10]:
df = df.set_index('pt_root_id')

In [11]:
df.head()

Unnamed: 0_level_0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position
pt_root_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
864691135738685297,582276,2020-09-28 22:41:42.536121+00:00,t,274.291589,111332012547320963,13434,2023-04-05 22:39:53.474202+00:00,t,582276,7,5,4909,4,6.10722,9.56869,"[338720, 125232, 19589]","[nan, nan, nan]","[nan, nan, nan]"
864691135614842827,194144,2020-09-28 22:42:01.511773+00:00,t,213.307228,83542405709639148,3045,2023-04-05 22:39:03.110911+00:00,t,194144,7,4,9575,6,8.55708,-0.7149,"[136400, 170640, 17951]","[nan, nan, nan]","[nan, nan, nan]"
864691135502985397,293318,2020-09-28 22:45:23.572432+00:00,t,430.10818,90783515419343652,1474,2023-04-05 22:36:19.477499+00:00,t,293318,9,4,8066,6,5.63758,8.915025,"[189472, 119296, 26037]","[nan, nan, nan]","[nan, nan, nan]"
864691135387371905,332833,2020-09-28 22:44:41.864456+00:00,t,274.418729,93535317407139914,7915,2023-04-05 22:39:15.451639+00:00,t,332833,6,7,6413,6,5.27806,7.808064,"[209328, 174304, 20004]","[nan, nan, nan]","[nan, nan, nan]"
864691135661410544,295460,2020-09-28 22:41:24.368712+00:00,t,208.616653,90225718084406516,11589,2023-04-05 22:39:47.420305+00:00,t,295460,9,3,7748,6,5.20837,2.999543,"[185088, 157776, 15691]","[nan, nan, nan]","[nan, nan, nan]"


In [12]:
df.shape

(13925, 18)

In [13]:
unit_keys_1 = df.iloc[:5000, :][['session', 'scan_idx', 'unit_id']].to_dict(orient='records')
unit_keys_2 = df.iloc[5000:10000, :][['session', 'scan_idx', 'unit_id']].to_dict(orient='records')
unit_keys_3 = df.iloc[10000:13925, :][['session', 'scan_idx', 'unit_id']].to_dict(orient='records')

In [14]:
temp_1 = (nda.AreaMembership & unit_keys_1).fetch('brain_area')
print('1/3 Completed!')
temp_2 = (nda.AreaMembership & unit_keys_2).fetch('brain_area')
print('2/3 Completed!')
temp_3 = (nda.AreaMembership & unit_keys_3).fetch('brain_area')
print('3/3 Completed!')
area_membership = np.concatenate((temp_1, temp_2, temp_3), axis=0)
assert len(area_membership) == df.shape[0]

1/3 Completed!
2/3 Completed!
3/3 Completed!


In [15]:
df['Area Membership'] = area_membership
df.head()

Unnamed: 0_level_0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position,Area Membership
pt_root_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
864691135738685297,582276,2020-09-28 22:41:42.536121+00:00,t,274.291589,111332012547320963,13434,2023-04-05 22:39:53.474202+00:00,t,582276,7,5,4909,4,6.10722,9.56869,"[338720, 125232, 19589]","[nan, nan, nan]","[nan, nan, nan]",V1
864691135614842827,194144,2020-09-28 22:42:01.511773+00:00,t,213.307228,83542405709639148,3045,2023-04-05 22:39:03.110911+00:00,t,194144,7,4,9575,6,8.55708,-0.7149,"[136400, 170640, 17951]","[nan, nan, nan]","[nan, nan, nan]",V1
864691135502985397,293318,2020-09-28 22:45:23.572432+00:00,t,430.10818,90783515419343652,1474,2023-04-05 22:36:19.477499+00:00,t,293318,9,4,8066,6,5.63758,8.915025,"[189472, 119296, 26037]","[nan, nan, nan]","[nan, nan, nan]",V1
864691135387371905,332833,2020-09-28 22:44:41.864456+00:00,t,274.418729,93535317407139914,7915,2023-04-05 22:39:15.451639+00:00,t,332833,6,7,6413,6,5.27806,7.808064,"[209328, 174304, 20004]","[nan, nan, nan]","[nan, nan, nan]",V1
864691135661410544,295460,2020-09-28 22:41:24.368712+00:00,t,208.616653,90225718084406516,11589,2023-04-05 22:39:47.420305+00:00,t,295460,9,3,7748,6,5.20837,2.999543,"[185088, 157776, 15691]","[nan, nan, nan]","[nan, nan, nan]",V1


In [16]:
root_ids = df.index.tolist()
len(root_ids)

13925

In [17]:
subset_ids = root_ids[7000:8000]

In [18]:
input_values = []
output_values = []
v_area = []

for i in subset_ids:
    input_df = client.materialize.synapse_query(post_ids=i)
    
    input_df.rename(columns = {'pre_pt_root_id':'pt_root_id'}, inplace = True) 
   
    df_matched = pd.merge(input_df,df,on='pt_root_id',how='inner')
    if df_matched.empty:
        continue
        
    #This is the correct depth value that is retrieved by a special function created by the MICrONS team    
    input_vertices_list = tform_vx.apply(df_matched['pt_position'])
    graph_x_stack = np.vstack(input_vertices_list)
    graph_x_values = graph_x_stack[:,1]

    output_vertices = tform_vx.apply(df.at[i,'pt_position'])
    graph_y_value = output_vertices[1]
    if type(graph_y_value) != np.float64:
        continue
    
    num_points = len(graph_x_values)
    input_values.extend(graph_x_values)
    output_values.extend([graph_y_value]*num_points)
    v_area.extend(df_matched['Area Membership'])

In [19]:
df_export = pd.DataFrame(list(zip(input_values, output_values, v_area)), columns=['Inputh Depth', 'Output Depth', 'Area Membership'])
df_export.head()

Unnamed: 0,Inputh Depth,Output Depth,Area Membership
0,181.079174,222.435788,V1
1,261.10116,222.435788,RL
2,350.274572,222.435788,V1
3,350.274572,222.435788,V1
4,350.274572,222.435788,V1


In [20]:
df_export.to_csv('graph_values_8.csv')