In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from copy import deepcopy
import utils as ut

In [2]:
# Import relevant data
cells = pd.read_csv('data/cells_no_repeats.csv', index_col=0)
cells.set_index('pt_root_id', inplace=True)
print(cells.shape)
display(cells.head())

synapses_all = pd.read_csv('data/synapses_w_ids.csv', index_col=0)
synapses_all.set_index('synapse_id', inplace=True)
print(synapses_all.shape)
display(synapses_all.head())

(56209, 4)


Unnamed: 0_level_0,cell_type,pt_x,pt_y,pt_z
pt_root_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
864691135639004475,23P,71136,110528,20220
864691135771677771,23P,72576,108656,20291
864691135864089470,23P,79632,121456,16754
864691135560505569,23P,80128,124000,16563
864691136315868311,23P,80144,126928,16622


(5421809, 17)


Unnamed: 0_level_0,pre_pt_root_id,post_pt_root_id,size,cell_type_pre,cb_x_pre,cb_y_pre,cb_z_pre,cell_type_post,cb_x_post,cb_y_post,cb_z_post,cb_x_diff,cb_y_diff,cb_z_diff,ctr_pt_x,ctr_pt_y,ctr_pt_z
synapse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,864691135564683351,864691136811959667,960,23P,557.248,570.56,732.52,23P,791.744,483.968,780.44,-234.496,86.592,-47.92,637.608,371.352,720.2
1,864691135614119115,864691135508912649,7576,23P,774.72,504.96,895.68,23P,807.936,459.584,870.28,-33.216,45.376,25.4,805.376,488.376,863.6
3,864691135113147801,864691136022555784,684,23P,883.072,451.456,817.84,23P,851.52,483.84,759.96,31.552,-32.384,57.88,858.328,516.648,775.88
4,864691135194393642,864691135341362885,23172,23P,781.248,449.984,696.88,23P,798.72,465.152,758.56,-17.472,-15.168,-61.68,789.4,478.04,691.0
5,864691136272938174,864691135683554546,3660,23P,762.368,473.792,773.68,23P,820.352,446.784,719.08,-57.984,27.008,54.6,756.624,440.928,710.6


In [3]:
key_columns = ['pre_pt_root_id', 'post_pt_root_id', 'cell_type_pre', 'ctr_pt_x', 'ctr_pt_y', 'ctr_pt_z', 'size']
synapses = synapses_all.loc[:, key_columns]
display(synapses.head())

Unnamed: 0_level_0,pre_pt_root_id,post_pt_root_id,cell_type_pre,ctr_pt_x,ctr_pt_y,ctr_pt_z,size
synapse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,864691135564683351,864691136811959667,23P,637.608,371.352,720.2,960
1,864691135614119115,864691135508912649,23P,805.376,488.376,863.6,7576
3,864691135113147801,864691136022555784,23P,858.328,516.648,775.88,684
4,864691135194393642,864691135341362885,23P,789.4,478.04,691.0,23172
5,864691136272938174,864691135683554546,23P,756.624,440.928,710.6,3660


In [4]:
synapses_by_cell = synapses.groupby('post_pt_root_id')
print(synapses_by_cell.size().describe())

count    56179.000000
mean        96.509532
std         74.177980
min          1.000000
25%         53.000000
50%         84.000000
75%        122.000000
max       1952.000000
dtype: float64


In [5]:
cell_types = list(cells['cell_type'].unique())
print(len(cell_types))

percentiles = [5, 50, 95]
print(len(percentiles))

7
3


Here's the vision:

For each corner
1. Generate an mst from all the cells
2. Generate an mst from only the excitatory cells
3. Retreive the sequences from both msts
4. Evaluate the similarity between the retreived sequences


In [46]:
corners = {}
for cell_type in cell_types:
    for percentile in percentiles:
        corner = pd.read_csv(f'data/corners/w_ct_synid/{cell_type}_corner_{percentile}.csv', index_col=0)
        corner.set_index('synapse_id', inplace=True)
        corners[(cell_type, percentile)] = corner
print(len(corners))
display(corners[('23P', 5)].head())

21


Unnamed: 0_level_0,id,pre_pt_root_id,post_pt_root_id,size,ctr_pt_x,ctr_pt_y,ctr_pt_z,cell_type_pre
synapse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-424773884,424773884,864691133277109275,864691135358817112,344,1352.904,402.832,884.48,Unknown
-434139270,434139270,864691135641361891,864691135358817112,536,1361.344,416.128,841.92,Unknown
-445164164,445164164,864691136215043518,864691135358817112,4184,1395.048,402.864,841.64,Unknown
-425011376,425011376,864691136329585756,864691135358817112,4644,1348.564,374.184,845.52,Unknown
-391849847,391849847,864691134415834633,864691135358817112,2876,1261.456,341.928,788.6,Unknown


In [38]:
# for (cell_type, percentile), corner in corners.items():
#     cell_id = corner['post_pt_root_id'].values[0]
#     syn_group = synapses_by_cell.get_group(cell_id)
#     error = False

#     matches = 0

#     for corner_indx, row in corner.iterrows():
#         pos_cond = f'(ctr_pt_x == {row["ctr_pt_x"]} and ctr_pt_y == {row["ctr_pt_y"]} and ctr_pt_z == {row["ctr_pt_z"]})'
#         pre_id_cond = f'(pre_pt_root_id == {row["pre_pt_root_id"]})'
#         size_cond = f'(size == {row["size"]})'

#         match = syn_group.query(pos_cond + ' and ' + pre_id_cond + ' and ' + size_cond)
        
#         if len(match) == 0:
#             corner.loc[corner_indx, 'synapse_id'] = int(-corner.loc[corner_indx, 'id'])
#             corner.loc[corner_indx, 'cell_type_pre'] = 'Unknown'
#         elif len(match) == 1:
#             syn_id = match.index[0]
#             corner.loc[corner_indx, 'synapse_id'] = int(syn_id)
#             corner.loc[corner_indx, 'cell_type_pre'] = match['cell_type_pre'].values[0]
#         elif len(match) > 1:
#             print(f'Error: multiple matches for {corner_indx}')
#             error = True
#             break

#         matches += len(match)
    
#     if error:
#         break
        
#     print(f'{cell_type} {percentile}: {matches} matches, {len(syn_group) - matches} misses')



23P 5: 18 matches, 0 misses
23P 50: 74 matches, 0 misses
23P 95: 165 matches, 0 misses
4P 5: 22 matches, 0 misses
4P 50: 88 matches, 0 misses
4P 95: 185 matches, 0 misses
6P-IT 5: 15 matches, 0 misses
6P-IT 50: 78 matches, 0 misses
6P-IT 95: 154 matches, 0 misses
6P-CT 5: 24 matches, 0 misses
6P-CT 50: 86 matches, 0 misses
6P-CT 95: 157 matches, 0 misses
5P-IT 5: 21 matches, 0 misses
5P-IT 50: 108 matches, 0 misses
5P-IT 95: 256 matches, 0 misses
5P-NP 5: 4 matches, 0 misses
5P-NP 50: 36 matches, 0 misses
5P-NP 95: 81 matches, 0 misses
5P-ET 5: 41 matches, 0 misses
5P-ET 50: 272 matches, 0 misses
5P-ET 95: 645 matches, 0 misses
