In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from caveclient import CAVEclient
from requests.exceptions import HTTPError

Here's what I'm going to do:

1. I'm going to identify the root IDs of the post-synaptic cells I want to observe.
2. I'm going to query the entire synapse table for each of these IDs.
3. I'm going to extract the synapse locations from the synapse table.
4. I'm going to generate a minimum spanning tree from the synapse locations.
5. I'm going to extract the branch sequences from the minimum spanning tree.
6. I'm going to calculate the error between these branch sequences and those I find from just the excitatory synapses.

Need:
1. Corner ID's (5th, 50th, 95th percentile input degree for each cell type)
2. Synapse table for each corner
3. Algorithm for extracting branch sequences from MST
4. Algorithm for calculating error between branch sequences

In [77]:
# Versions: 117 and 343 and 661
client = CAVEclient('minnie65_public_v343')

In [78]:
client.materialize.get_tables()

['synapses_pni_2',
 'nucleus_neuron_svm',
 'nucleus_detection_v0',
 'functional_coreg',
 'allen_v1_column_types_slanted',
 'aibs_column_nonneuronal',
 'func_unit_em_match_release',
 'proofreading_status_public_release',
 'aibs_soma_nuc_metamodel_preds_v117']

In [91]:
client.materialize.get_annotation_count('nucleus_neuron_svm')

171815

In [92]:
client.materialize.get_annotation_count('nucleus_detection_v0')

144120

In [79]:
client.materialize.get_table_metadata('nucleus_detection_v0')

{'schema': 'nucleus_detection',
 'valid': True,
 'id': 8214,
 'aligned_volume': 'minnie65_phase3',
 'table_name': 'nucleus_detection_v0__minnie3_v1',
 'created': '2020-11-02T18:56:35.530100',
 'schema_type': 'nucleus_detection',
 'user_id': '121',
 'description': 'A table of nuclei detections from a nucleus detection model developed by Shang Mu, Leila Elabbady, Gayathri Mahalingam and Forrest Collman. Pt is the centroid of the nucleus detection. id corresponds to the flat_segmentation_source segmentID. Only included nucleus detections of volume>25 um^3, below which detections are false positives, though some false positives above that threshold remain. ',
 'notice_text': None,
 'reference_table': None,
 'flat_segmentation_source': 'precomputed://https://bossdb-open-data.s3.amazonaws.com/iarpa_microns/minnie/minnie65/nuclei',
 'write_permission': 'PRIVATE',
 'read_permission': 'PUBLIC',
 'last_modified': '2022-10-25T19:24:28.559914',
 'segmentation_source': '',
 'pcg_table_name': 'minni

In [80]:
# Import synapse data
synapses = pd.read_csv('data/synapses_w_ids.csv', index_col=0)
synapses.set_index('synapse_id', inplace=True)
display(synapses.head())

Unnamed: 0_level_0,pre_pt_root_id,post_pt_root_id,size,cell_type_pre,cb_x_pre,cb_y_pre,cb_z_pre,cell_type_post,cb_x_post,cb_y_post,cb_z_post,cb_x_diff,cb_y_diff,cb_z_diff,ctr_pt_x,ctr_pt_y,ctr_pt_z
synapse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,864691135564683351,864691136811959667,960,23P,557.248,570.56,732.52,23P,791.744,483.968,780.44,-234.496,86.592,-47.92,637.608,371.352,720.2
1,864691135614119115,864691135508912649,7576,23P,774.72,504.96,895.68,23P,807.936,459.584,870.28,-33.216,45.376,25.4,805.376,488.376,863.6
3,864691135113147801,864691136022555784,684,23P,883.072,451.456,817.84,23P,851.52,483.84,759.96,31.552,-32.384,57.88,858.328,516.648,775.88
4,864691135194393642,864691135341362885,23172,23P,781.248,449.984,696.88,23P,798.72,465.152,758.56,-17.472,-15.168,-61.68,789.4,478.04,691.0
5,864691136272938174,864691135683554546,3660,23P,762.368,473.792,773.68,23P,820.352,446.784,719.08,-57.984,27.008,54.6,756.624,440.928,710.6


In [81]:
synapses_by_ct = synapses.groupby('cell_type_post')

In [82]:
corners = {}

for cell_type, syns in synapses_by_ct:
    synapses_by_cell = syns.groupby('post_pt_root_id')

    cells_inputs = []
    for cell_id, input_degree in synapses_by_cell.size().items():
        cells_inputs.append((cell_id, input_degree))

    # Sort cells_inputs in ascending order by input_degree
    cells_inputs.sort(key=lambda x: x[1])

    # Get the cell_ids and input_degres of the 5th, 95th, and 50th percentiles
    sparse = cells_inputs[len(cells_inputs) * 5 // 100]
    mid = cells_inputs[len(cells_inputs) * 50 // 100]
    dense = cells_inputs[len(cells_inputs) * 95 // 100]

    corners[cell_type] = {
        'input_degrees': [sparse[1], mid[1], dense[1]],
        'cell_ids': [sparse[0], mid[0], dense[0]]
    }

print(len(corners))

7


In [83]:
for cell_type, corner in corners.items():
    print(cell_type)
    print(corner['cell_ids'], corner['input_degrees'])

23P
[864691135358817112, 864691135771609595, 864691135490583527] [18, 74, 165]
4P
[864691136123745830, 864691135162227501, 864691135888625289] [22, 88, 185]
5P-ET
[864691135472337458, 864691136296648219, 864691136370941832] [41, 272, 645]
5P-IT
[864691136272995518, 864691136877143406, 864691135012425718] [21, 108, 256]
5P-NP
[864691135467833424, 864691135338006502, 864691134884768250] [4, 36, 81]
6P-CT
[864691135753666509, 864691136109199032, 864691136418869655] [24, 86, 157]
6P-IT
[864691135739594644, 864691134884753914, 864691135621575108] [15, 78, 154]


In [84]:
# Query the complete synapse table for each corner
# percentiles = [5, 50, 95]
# for cell_type, corner in corners.items():
#     for i, cell_id in enumerate(corner['cell_ids']):
#         print(cell_id, corner['input_degrees'][i])
#         try:
#             syns = client.materialize.query_table('synapses_pni_2',
#                                                   filter_equal_dict={'post_pt_root_id': cell_id})
#             syns.to_csv(f'data/corners/{cell_type}_corner_{percentiles[i]}.csv')
#         except HTTPError as e:
#             print(e)
# print('Done')

In [85]:
# Import cells table
excitatory_cells = pd.read_csv('data/cells_no_repeats.csv', index_col=0)
print(excitatory_cells.shape)
display(excitatory_cells.head())

(56209, 5)


Unnamed: 0_level_0,cell_type,pt_root_id,pt_x,pt_y,pt_z
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,23P,864691135639004475,71136,110528,20220
1,23P,864691135771677771,72576,108656,20291
2,23P,864691135864089470,79632,121456,16754
3,23P,864691135560505569,80128,124000,16563
4,23P,864691136315868311,80144,126928,16622


In [86]:
unique_exc_ids = excitatory_cells['pt_root_id'].unique()
print(len(unique_exc_ids))

56209


In [100]:
# Query the nuclues_detection_v0 table for every excitatory cell's volume
nuclei = client.materialize.query_table('nucleus_detection_v0',
                                        select_columns=['pt_root_id', 'volume'])
print(nuclei.shape)

(144120, 2)


In [126]:
exc_nuclei = nuclei[nuclei['pt_root_id'].isin(unique_exc_ids)]
print(exc_nuclei.shape)

(59372, 2)


In [127]:
exc_nuclei_by_id = exc_nuclei.groupby('pt_root_id')
print(len(exc_nuclei_by_id))

56209


In [128]:
# Sort the groups by nucleus count
exc_nuclei_by_id_sorted = sorted(exc_nuclei_by_id, key=lambda x: len(x[1]), reverse=True)
display(exc_nuclei_by_id_sorted[:5])

[(864691135233407577,
                  pt_root_id     volume
  25603   864691135233407577  48.723886
  25746   864691135233407577  58.168771
  26939   864691135233407577  55.692165
  31165   864691135233407577  60.822815
  31930   864691135233407577  46.284964
  ...                    ...        ...
  142032  864691135233407577  49.735107
  142033  864691135233407577  45.048627
  142034  864691135233407577  48.877404
  142035  864691135233407577  49.579295
  142036  864691135233407577  46.421934
  
  [170 rows x 2 columns]),
 (864691134988798074,
                  pt_root_id     volume
  77300   864691134988798074  52.994703
  77749   864691134988798074  37.316362
  78040   864691134988798074  60.976497
  78041   864691134988798074  51.292242
  78353   864691134988798074  49.246700
  ...                    ...        ...
  141943  864691134988798074  32.885473
  141944  864691134988798074  68.346020
  141960  864691134988798074  45.695468
  141961  864691134988798074  48.072294
  1419

In [145]:
exc_nuclei_counts = np.array([(cell_id, len(nuclei)) for cell_id, nuclei in exc_nuclei_by_id_sorted])
print(np.where(exc_nuclei_counts[:, 1] > 1)[0].shape)
print(exc_nuclei_counts[np.where(exc_nuclei_counts[:, 1] > 1)[0]])

(2055,)
[[864691135233407577                170]
 [864691134988798074                106]
 [864691135864698076                 32]
 ...
 [864691137196913729                  2]
 [864691137196953409                  2]
 [864691137197179713                  2]]


In [144]:
2055 / len(exc_nuclei_counts)

0.03655998149762493

In [131]:
exc_nuclei_no_repeats = exc_nuclei.drop_duplicates(subset=['pt_root_id'])
print(exc_nuclei_no_repeats.shape)
display(exc_nuclei_no_repeats.head())

(56209, 2)


Unnamed: 0,pt_root_id,volume
219,864691135639004475,343.540531
222,864691135771677771,301.426115
368,864691135864089470,256.422216
369,864691135560505569,263.637074
370,864691136315868311,233.121055


In [132]:
def calc_radius(volume):
    return (3 * volume / (4 * np.pi)) ** (1 / 3)