# Setup

In [None]:
############################################################################################################################ 
# Get the latest CREST files for each ID within the target folder (dirname)

from pathlib import Path
import json
from sqlite3 import connect as sqlite3_connect
from sqlite3 import DatabaseError
from igraph import Graph as ig_Graph
from igraph import plot as ig_plot
from scipy.spatial.distance import cdist
from random import choice as random_choice
from itertools import combinations
from numpy import array, unravel_index, argmin, mean
import random
import numpy as np
from copy import deepcopy
import itertools
from datetime import datetime
from time import time
import neuroglancer
from webbrowser import open as wb_open
from webbrowser import open_new as wb_open_new
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm import tqdm

In [2]:
import sys
sys.path.append('/Users/kperks/Documents/ell-connectome/eCREST/eCREST')

In [3]:
# from eCREST_cli_beta import ecrest, import_settings
from eCREST_cli import ecrest, import_settings, get_cell_filepaths

The 'ecrest' class has been imported from eCREST_cli.py

An instance of this object will be able to:
- open an neuroglancer viewer for proofrieading (see "Proofread using CREST")
    - add-remove segments (using graph feature for efficiency)
    - format itself and save itself as a CREST-style .json
- convert from neuroglancer json (see "Convert From Neuroglancer to eCREST")
    - format itself and save itself as a CREST-style .json
    


### Import settings

If you save a copy of settings_dict.json (found in the "under construction" directory of eCREST repo) locally somewhere outside the repo (like in your save_dir), then you can use the following code cell to import. This avoids needing to re-type the save_dir and db_path each time you "git pull" updates from the repo to this notebook.

In [4]:
path_to_settings_json = '/Users/kperks/Documents/ell-connectome/eCREST-local-files/settings_dict.json'
settings_dict = import_settings(path_to_settings_json)

In [5]:
dirpath = Path(settings_dict['save_dir'])
# dirpath = "/Users/kperks/Documents/gdrive/.shortcut-targets-by-id/16q1BuOMfD2ta0Cwq8CjMlRe4rDvbuWC5/ELL_connectome/CREST_reconstructions/mg-network"

# Get all cells info

## Cell Types

In [21]:
nodefiles = get_cell_filepaths(dirpath)

## manual type

In [22]:
cell_type = {}
not_typed = []
for x,f in nodefiles.items():
    cell = ecrest(settings_dict,filepath = f,launch_viewer=False)
    cell_type[int(x)] = cell.get_ctype('manual') 
    if (cell.get_ctype('manual') == []) | (cell.get_ctype('manual') == ''):
        cell_type[int(x)]=np.NaN
        not_typed.append(x)# print(f'cell {x} is not cell-typed in json')
        
print('the following cells are not typed in the main network')
print(not_typed)        
        
df_type = pd.DataFrame(cell_type.items(),columns = ['id','cell_type'])

df_type.loc[df_type['cell_type'].isin(['dml']),'cell_type']='mli'
df_type.loc[df_type['cell_type'].isin(['grc-d']),'cell_type']='grc'
df_type.loc[df_type['cell_type'].isin(['grc-s']),'cell_type']='smpl'
df_type.loc[df_type['cell_type'].isin(['pfm']),'cell_type']='pf'

the following cells are not typed in the main network
[]


## auto type

this field is populated when an auto-typing of the cells was different from the manual typing

In [23]:
cell_type = {}
# not_typed = []
for x,f in nodefiles.items():
    cell = ecrest(settings_dict,filepath = f,launch_viewer=False)
    cell_type[int(x)] = cell.get_ctype('auto') 
    if (cell.get_ctype('auto') == []) | (cell.get_ctype('auto') == ''):
        cell_type[int(x)]=np.NaN
        # not_typed.append(x)# print(f'cell {x} is not cell-typed in json')
        
# print('the following cells are not typed in the main network')
# print(not_typed)        
        
df_type_auto = pd.DataFrame(cell_type.items(),columns = ['id','cell_type'])


In [24]:
df_type_auto.dropna(inplace=True)

In [25]:
df_type_auto.head()

Unnamed: 0,id,cell_type
104,127607174,mg2
106,127672247,mg2
154,129572524,mg1
744,213501262,mg1
769,214440956,mg2


## combine manual and auto df_type

In [26]:
for i,r in df_type_auto.iterrows():
    df_type.loc[i,'cell_type'] = r['cell_type'] # the match up of i for df_type and _auto depends on both being made by iterating over the same nodefiles list

In [27]:
# for i,r in df_type.iterrows():
#     df_type.loc[i,'fsize'] = nodefiles[str(r['id'])].stat().st_size

# df_type[df_type['type'].isin(['sg2'])].sort_values('fsize').head(20)

In [28]:
df_type.head()

Unnamed: 0,id,cell_type
0,102463116,pf
1,103366246,uk
2,103549879,mli
3,107201352,mli
4,109348216,mli


### save to file

In [29]:
df_type.to_csv(dirpath / 'metadata/df_type.csv')

## Base Segments

In [30]:
nodefiles = get_cell_filepaths(dirpath)

In [31]:
# Create a base_segments dictionary of all reconstructed cells 

base_segments = {}
for x,f in nodefiles.items():
    # if cell_type[x] in network_types: # if do this, you can't check if the post-syn segments exist as a reconstruction
    cell = ecrest(settings_dict,filepath = f)#,launch_viewer=False)
    base_segments[cell.cell_data['metadata']['main_seg']['base']] = cell.cell_data['base_segments']
    
    try:
        assert cell.cell_data['metadata']['main_seg']['base'] == x
    except:
        print(x,cell.cell_data['metadata']['main_seg']['base'])

## Cell structure labeling checks

In [117]:
print('need labeling for:')

for x,segs in base_segments.items():
    if (len(segs['unknown']) == len([s for k,v in segs.items() for s in v])) & (cell_type[x] in ['lf','lg']):
        print(f'{x} {cell_type[x]}')

need labeling for:


# Build Graph

In [32]:
network_types = ['mli']#set([v for v in df_type['cell_type'].unique()])#['tsd']# ['mg1','mg2','lg','lf','sg1','sg2','sgx','grc','aff']

In [33]:
synanno_type = 'post-synaptic'
vx_sizes = [16, 16, 30]

## find edges and set the cell-structure attribute of the edge based on which part of the cell the edge goes to
edge_list = []

with tqdm(total=len(nodefiles.keys())) as pbar:
    for x_pre in nodefiles.keys():
        pbar.update(1)
        
        if df_type[df_type['id'].isin([int(x_pre)])]['cell_type'].values[0] in network_types:
            
            # if the node has post-synaptic annotations (the current cell is assumed pre-synaptic)
            pre = ecrest(settings_dict,filepath = nodefiles[x_pre])#,launch_viewer=False)
            if pre.cell_data['end_points'][synanno_type] != []:
                # for each synapse
                for syn_ in pre.cell_data['end_points'][synanno_type]:
                    '''assumes that the annotation is a point annotation stored in the list as ([x,y,z,segment_id],'annotatePoint')
                    previous ot Jan 25 2024, it was just [x,y,z,segment_id]'''
                    syn_ = syn_[0]
                    try:
                        post_seg = syn_[3]
                        syn_ = array([int(syn_[i]) for i in range(3)]) # synapses annotations exported as nanometers, so do not need to convert
    
                        # go through each other nodes
                        for x_post in nodefiles.keys():
                            # if cell_type[x_post] in network_types:
                            post = base_segments[x_post] 
                            for k,v in post.items():
                                for v_ in list(v): #find keys (can be multiple on the same cell) for matching segment ids
                                    if post_seg == v_: 
                                        # add edge to the graph between current node and matching node
                                        edge_list.append([x_pre,x_post,k,syn_[0],syn_[1],syn_[2]])
                                            
    
                    except IndexError as msg:
                        cellid = x_pre
                        print(msg, f'for cell {cellid} synapse at {array([int(syn_[i]/vx_sizes[i]) for i in range(3)])} voxels has no segment id')
    
            else:
                continue


100%|████████████████████████████████████████████████████████████████████████████████████████████████| 5710/5710 [03:28<00:00, 27.43it/s]


## Specific cell(S)

In [46]:
edge_list = []
cell_list = ['295969355']
synanno_type = 'post-synaptic'

for x_pre in cell_list:
    pre = ecrest(settings_dict,filepath = nodefiles[x_pre])
    if pre.cell_data['end_points'][synanno_type] != []:
        # for each synapse
        for syn_ in pre.cell_data['end_points'][synanno_type]:
            '''assumes that the annotation is a point annotation stored in the list as ([x,y,z,segment_id],'annotatePoint')
            previous ot Jan 25 2024, it was just [x,y,z,segment_id]'''
            syn_ = syn_[0]
            try:
                post_seg = syn_[3]
                syn_ = array([int(syn_[i]) for i in range(3)]) # synapses annotations exported as nanometers, so do not need to convert

                # go through each other nodes
                for x_post in nodefiles.keys():
                    # if cell_type[x_post] in network_types:
                    post = base_segments[x_post] 
                    for k,v in post.items():
                        for v_ in list(v): #find keys (can be multiple on the same cell) for matching segment ids
                            if post_seg == v_: 
                                # add edge to the graph between current node and matching node
                                
                                edge_list.append([x_pre,x_post,k,syn_[0],syn_[1],syn_[2]])
                                    

            except IndexError as msg:
                cellid = x_pre
                print(msg, f'for cell {cellid} synapse at {array([int(syn_[i]/vx_sizes[i]) for i in range(3)])} voxels has no segment id')

    else:
        continue



# Synapses dataframe

In [34]:
df_syn = pd.DataFrame(edge_list,columns = ['pre','post','structure','x','y','z'])

# for i,r in df_syn.iterrows():
#     df_syn.loc[i,'pre_type']=cell_type[df_syn.loc[i,'pre']]
#     df_syn.loc[i,'post_type']=cell_type[df_syn.loc[i,'post']]

In [None]:
# df_syn

## If want to peak at df_Edges

In [61]:
for i,r in df_syn.iterrows():
    try:
        df_syn.loc[i,'pre_type'] =df_type[df_type['id'].isin([int(r['pre'])])].cell_type.values[0]
        df_syn.loc[i,'post_type']=df_type[df_type['id'].isin([int(r['post'])])].cell_type.values[0]
    except:
        # print(r['pre'],r['post'])
        continue

In [60]:
df_type['id'].values

array([109461049, 109502641, 110418632, ..., 669195832, 670337293,
        91387157])

In [62]:
df_edges = df_syn[['pre','post','pre_type','post_type']].value_counts().reset_index(name='weight') #'structure',

In [None]:
# df_edges#[df_edges['post_type'].isin(['mg2','lg'])]

In [63]:
display(df_edges[['pre','post_type']].groupby('post_type')['pre'].count())

post_type
lf     2
lg     1
mg2    1
sg1    3
sg2    2
Name: pre, dtype: int64

In [None]:
for i,r in df_edges.iterrows():
    df_edges.loc[i,'pre_diam']=soma_diam[str(df_edges.loc[i,'pre'])]
    df_edges.loc[i,'post_diam']=soma_diam[str(df_edges.loc[i,'post'])]

df_edges.loc[:,'diam_diff'] = (df_edges['post_diam']-df_edges['pre_diam'])/df_edges['pre_diam']

In [None]:
# df_edges

focal_cell_id = df_edges['post'].unique()
display(df_syn[df_syn['pre']==focal_cell_id][['post','post_type']].value_counts().reset_index(
    name='weight')['post_type'].value_counts().reset_index(name='ncells'))

# save df_syn

In [35]:
savepath = Path('/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/CREST_reconstructions/mg-network/graphs')

df_syn.to_csv(savepath / 'df_postsyn_mli.csv')

In [None]:
df_syn

In [64]:
df_syn[df_syn['pre'].isin(['463095118','461948885'])]

Unnamed: 0,pre,post,structure,x,y,z
1043,461948885,299405462,apical dendrite,251584,191232,55740
1044,461948885,213545590,apical dendrite,250896,190960,52830
1045,461948885,214581797,apical dendrite,247792,190224,53430
1046,461948885,132342130,apical dendrite,245328,191232,63120
1047,461948885,386363638,apical dendrite,246512,191504,65460
...,...,...,...,...,...,...
1170,463095118,643848637,apical dendrite,196576,214000,95220
1171,463095118,559041561,multiple,185664,214448,94470
1172,463095118,198648129,unknown,182128,213584,94380
1173,463095118,49314829,unknown,183088,212992,94200
