# setup

In [1]:
############################################################################################################################ 
# Get the latest CREST files for each ID within the target folder (dirname)

from pathlib import Path
import json
from sqlite3 import connect as sqlite3_connect
from sqlite3 import DatabaseError
from igraph import Graph as ig_Graph
from igraph import plot as ig_plot
from scipy.spatial.distance import cdist
from random import choice as random_choice
from itertools import combinations
from numpy import array, unravel_index, argmin, mean,unique,nan
import pandas as pd
from copy import deepcopy
from datetime import datetime
from time import time
import neuroglancer
from webbrowser import open as wb_open
from webbrowser import open_new as wb_open_new
import neuroglancer
import numpy as np
import matplotlib.pyplot as plt

In [4]:
import sys
sys.path.append('/Users/kperks/Documents/ell-connectome/eCREST/eCREST')

In [5]:
# from eCREST_cli_beta import ecrest, import_settings
from eCREST_cli import ecrest, import_settings, get_cell_filepaths

In [6]:
def get_base_segments_dict(dirpath):

    nodefiles = [child.name for child in sorted(dirpath.iterdir()) if (child.name[0]!='.') & (child.is_file()) & ("desktop" not in child.name)]

    # Create a base_segments dictionary of all cells in the directory
    base_segments = {}
    for x in nodefiles:
        # print(x)
        with open(dirpath / x, 'r') as myfile: # 'p' is the dirpath and 'f' is the filename from the created 'd' dictionary
            cell_data=myfile.read()
            cell_data = json.loads(cell_data)
        base_segments[x] = set([a for b in cell_data['base_segments'].values() for a in b]) #cell.cell_data['base_segments']
        # base_segments[x] = set([a for b in cell_data['base_segments'].values() for a in b]) #cell.cell_data['base_segments']

    return base_segments

def check_duplicates(base_segments):
    '''
    base_segments is a dictionary of all segments that this script checks among
    '''
    df_all = pd.DataFrame()
    for _,this_cell in base_segments.items():
        overlap = []
        num_dup = []
        for x in base_segments.keys():
            overlap.append(len(this_cell&base_segments[x])/len(base_segments[x]))
            num_dup.append(len(this_cell&base_segments[x]))

        df = pd.DataFrame({
            "self": _,
            "dups": list(base_segments.keys()),
            "overlap-percent": overlap,
            "number_seg_lap": num_dup
            }).replace(0, nan, inplace=False).dropna()
        df = df[df['dups'] != _]
        if not df.empty:
            df_all = pd.concat([df_all,df]) 
    return df_all

The 'ecrest' class has been imported from eCREST_cli.py

An instance of this object will be able to do things like:
- open an neuroglancer viewer for proofrieading (see "Proofread using CREST")
    - add-remove segments (using graph feature for efficiency)
    - format itself and save itself as a CREST-style .json
- add or remove annotation layers (see "Annotation Layers")
- check for overlap with other .json files in a directory folder (see "check duplicates")
- label cell structures
- add base_segments from a list (see "add segments")
- import annotations from another file (see "Annotation Import")
- convert from neuroglancer json (see "Convert From Neuroglancer to eCREST")
    - format itself and save itself as a CREST-style .json
    


# Settings definitions

Whether you are converting from neuroglancer or creating a new reconstruction, the settings_dict parameters is needed to create CREST json files with correct formatting. 
- 'save_dir' : the default directory where eCREST reconstructions are saved as JSON files
- 'db_path' : the path to the agglomeration database file on the local computer (a copy is in the Google Drive). 

In [8]:
path_to_settings_json = '<path to google drive on the local computer>/My Drive/ELL_connectome/Ela/settings_dict_Ela.json'
path_to_settings_json = '/Users/kperks/Documents/ell-connectome/eCREST-local-files/settings_dict.json'
settings_dict = import_settings(path_to_settings_json)

In [9]:
settings_dict

{'save_dir': '/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/CREST_reconstructions/mg-network',
 'max_num_base_added': 1000,
 'cell_structures': ['unknown',
  'axon',
  'basal dendrite',
  'apical dendrite',
  'dendrite',
  'multiple'],
 'annotation_points': ['exit volume',
  'natural end',
  'uncertain',
  'pre-synaptic',
  'post-synaptic'],
 'db_path': '/Users/kperks/Documents/ell-connectome/eCREST-local-files/Mariela_bigquery_exports_agglo_v230111c_16_crest_proofreading_database.db'}

# Get base_segments dictionaries 

## Read from file (created with [this Colab notebook](https://colab.research.google.com/drive/19N8taRKeTt_Bgx_yF5AD5ntkU7zy7unc?usp=sharing)) 

In [None]:
# Read list of dictionaries from a JSON file
with open(Path(settings_dict['save_dir']).parent.parent / 'Ela/dictionary_jsons/dictionaries.json', 'r') as json_file: #assumes your save_dir is /mg_network/Rachel/ on your working computer
    dict_all = json.load(json_file)
  
dict_list = dict_all.keys()

# Convert lists to sets within each dictionary in data
for key, dictionary in dict_all.items():
    for sub_key, value in dictionary.items():
        if isinstance(value, list):
            dictionary[sub_key] = set(value)

# Dynamically create dictionaries based on the JSON keys
for key, value in dict_all.items():
    globals()[key] = value

print(dict_list)

## Create locally

On some computers this can take a long time (if low RAM or low memory available. If this takes more than 5 minutes, just use Colab notebook to do it

In [40]:
# create base segment dictionaries to check for duplicates
# use manually-defined function in setup for "get_base_segments_dict, which can check for files with same id)"

base_segments_Ela = get_base_segments_dict(Path(settings_dict['save_dir']).parent.parent / 'Ela/reconstructions')
base_segments_main = get_base_segments_dict(Path(settings_dict['save_dir']))
base_segments_todo = get_base_segments_dict(Path(settings_dict['save_dir']).parent.parent / 'Ela/PE_post')

# Proofread using (e)CREST

The ```ecrest``` class defined in eCREST_cli.py can be used to proofread base_segment reconstructions enhanced by the agglomeration database.

An instance of this class can be initialized with either:
- ecrest(segment_id): a "main_base_id" in *int* format
- ecrest(filepath): an existing CREST .json file
- ecrest(segment_id, segment_list): the main_base_id from the neuroglancer file you are converting and a list of base_segments.

The ```launch_viewer``` flag default is "False" so that you can interact with the contents of a reconstruction without actually opening it visually in a neuroglancer tab. **NOTE**: Some ecrest functions require that the ecrest instance is created with ```launch_viewer==True```.

## NEW reconstruction from segment ID

If you wanted to start reconstructing a new cell from a main base segment, you would use the following code block to launch.

In [None]:
segment_id = '199762907'
crest = ecrest(settings_dict,segment_id = segment_id, launch_viewer=True)

# The following is optional and can be commented out. It enables adding segments by alt+left-mouse instead of left-mouse double click. 
crest.change_key_binding({"alt+mousedown0" : "add-or-remove-seg"})

### Check for duplicates against main network, your todo file, and your reconstruction folder

In [None]:
# check for duplicates of current reconstruction with base segments dictionary
# to save time, this line can be run alone after initializing base_segments dictionary above
print('overlap in Ela reconstruction folder:'); df = crest.check_duplicates(base_segments_Ela); display(df)
print('overlap in main network folder:'); df = crest.check_duplicates(base_segments_main); display(df)
print('overlap in todo folder:'); df = crest.check_duplicates(base_segments_todo); display(df)

### Add soma annotations 

Only add soma annotations if it is a new reconstruction with a soma in the volume.  
    - 4 annotations if the soma is fully in the volume, only 3 if partially out  
    - annotations get added along the x plane and y plane

In [None]:
crest.add_endpoint_annotation_layers(['soma'],link=True) # spine_inputs

### define cell type

In [41]:
cell_type = 'mg2' # Assign the cell type then run the code cell

crest.define_ctype(cell_type,"manual")

### save reconstruction

default location should be Ela_reconstructions (specified in settings_dict.json)

In [42]:
crest.save_cell_graph()

Saved cell 199762907 reconstruction locally at 2025-01-13 12.11.20



## EDIT reconstruction from file

If you wanted to edit a reconstruction from an existing file, you would use the following code block to launch.

Specify the cell_id and the path to the directory that cell is in. 

> NOTE: You can also directly copy paste the full filepath to the cell you want to open and pass it to the ```filepath``` flag.  
In that case, the only code you need is crest = ecrest(settings_dict,filepath= [*paste filepath here*], launch_viewer=True)


### get reconstruction files in cell's directory

In [163]:
# if overlap is with the main netowork, then you want:
Path(settings_dict['save_dir']).parent.parent / 'CREST_reconstructions/mg_network'

# if overlap is with Ela reconstruction folder, then you want:
Path(settings_dict['save_dir'])

# if overlap is with todo folder, then open the todo folder cell and make sure that the blue segment is included in your reconstruction, then delete the other todo cell (or start from the other todo and include the one you are working on, etc):
Path(settings_dict['save_dir']).parent / 'PE_post'

PosixPath('/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/CREST_reconstructions')

In [5]:
directory_path = Path(settings_dict['save_dir'])

cell_filepaths = get_cell_filepaths(directory_path) # gets filepaths for all cells in a directory

### Open cell for adding todo segment(s) to existing reconstruction

In [None]:
cell_id = '387415866' # specify the cell id

crest = ecrest(settings_dict,filepath= cell_filepaths[cell_id], launch_viewer=True)
print(cell_filepaths[cell_id])

# optional:
crest.change_key_binding({"alt+mousedown0" : "add-or-remove-seg"})

### Check for duplicates against main network, your todo file, and your reconstruction folder

In [None]:
# check for duplicates of current reconstruction with base segments dictionary
# to save time, this line can be run alone after initializing base_segments dictionary above
print('overlap in Ela reconstruction folder:'); df = crest.check_duplicates(base_segments_Ela); display(df)
print('overlap in main network folder:'); df = crest.check_duplicates(base_segments_main); display(df)
print('overlap in todo folder:'); df = crest.check_duplicates(base_segments_todo); display(df)

### SAVE edited reconstruction

Save to default location (your reconstruction folder). so... if you later find a duplicate both with the main network and your reconstruction folder, keep adding on to the newer one in your reconstruction folder rather than going back to the original again. 

In [None]:
crest.save_cell_graph() # Default location is Path(settings_dict['save_dir'])

Saved cell 299439953 reconstruction locally at 2025-01-14 20.20.34


# Get all postsynaptic PE partners in network

## dictionary all reconstruction files

In [3]:
dirpath_list = [Path(settings_dict['save_dir']).parent.parent / 'CREST_reconstructions/mg_network', Path(settings_dict['save_dir'])]
# if your directory is last in this list, then any newer versions that you have done should overwrite the older versions in the dictionary -- we can check this manually

In [13]:
dirpath_list = [Path('/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/CREST_reconstructions/mg-network'),Path('/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/Ela/reconstructions')]

In [17]:
nodefiles = {}
for dirpath in dirpath_list:
    d2 = get_cell_filepaths(dirpath)
    nodefiles.update(d2) #The update() method overwrites the values of existing keys with the new values

## Base Segments of reconstructions

In [13]:
nodefiles = get_cell_filepaths(dirpath)

In [14]:
# Create a base_segments dictionary of all reconstructed cells 

base_segments = {}
for x,f in nodefiles.items():
    # if cell_type[x] in network_types: # if do this, you can't check if the post-syn segments exist as a reconstruction
    cell = ecrest(settings_dict,filepath = f)#,launch_viewer=False)
    base_segments[cell.cell_data['metadata']['main_seg']['base']] = cell.cell_data['base_segments']
    
    try:
        assert cell.cell_data['metadata']['main_seg']['base'] == x
    except:
        print(x,cell.cell_data['metadata']['main_seg']['base'])

## Build synapses dataframe

In [21]:
df_type = pd.read_csv(dirpath.parent.parent / 'CREST_reconstructions/mg_network/metadata/df_type.csv')
# df_type = pd.read_csv(Path('/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/CREST_reconstructions/mg-network/metadata/df_type.csv'))

In [23]:
pe_id_list = df_type[df_type['cell_type'].isin(['pe'])]['id'].values

In [16]:
synanno_type = 'post-synaptic'
vx_sizes = [16, 16, 30]

## find edges and set the cell-structure attribute of the edge based on which part of the cell the edge goes to
edge_list = []

with tqdm(total=len(nodefiles.keys())) as pbar:
    for x_pre in pe_id_list: #nodefiles.keys():
        pbar.update(1)
        
        # if df_type[df_type['id'].isin([int(x_pre)])]['cell_type'].values[0] in network_types:
            
        # if the node has post-synaptic annotations (the current cell is assumed pre-synaptic)
        pre = ecrest(settings_dict,filepath = nodefiles[x_pre])#,launch_viewer=False)
        if pre.cell_data['end_points'][synanno_type] != []:
            # for each synapse
            for syn_ in pre.cell_data['end_points'][synanno_type]:
                '''assumes that the annotation is a point annotation stored in the list as ([x,y,z,segment_id],'annotatePoint')
                previous to Jan 25 2024, it was just [x,y,z,segment_id]'''
                syn_ = syn_[0]
                try:
                    post_seg = syn_[3]
                    syn_ = array([int(syn_[i]) for i in range(3)]) # synapses annotations exported as nanometers, so do not need to convert

                    # go through each other nodes
                    for x_post in nodefiles.keys():
                        # if cell_type[x_post] in network_types:
                        post = base_segments[x_post] 
                        for k,v in post.items():
                            for v_ in list(v): #find keys (can be multiple on the same cell) for matching segment ids
                                if post_seg == v_: 
                                    # add edge to the graph between current node and matching node
                                    edge_list.append([x_pre,x_post,k,syn_[0],syn_[1],syn_[2]])
                                        

                except IndexError as msg:
                    cellid = x_pre
                    print(msg, f'for cell {cellid} synapse at {array([int(syn_[i]/vx_sizes[i]) for i in range(3)])} voxels has no segment id')

        else:
            continue


100%|████████████████████████████████████████████████████████████████████████████████████████████████| 5705/5705 [02:54<00:00, 32.74it/s]


## Synapses dataframe

In [17]:
df_syn = pd.DataFrame(edge_list,columns = ['pre','post','structure','x','y','z'])

## SAVE

In [18]:
savepath = Path(Path(settings_dict['save_dir']).parent)

df_syn.to_csv(savepath / 'df_pe_postsyn.csv')

# Cell Types combined

combine cell types from your reconstruction folder with types in main network

## get cell types from reconstructions

In [4]:
dirpath = Path(settings_dict['save_dir'])
nodefiles_Ela = get_cell_filepaths(dirpath)

In [None]:
cell_type = {}
not_typed = []
for x,f in nodefiles_Ela.items():
    cell = ecrest(settings_dict,filepath = f,launch_viewer=False)
    cell_type[int(x)] = cell.get_ctype('manual') 
    if (cell.get_ctype('manual') == []) | (cell.get_ctype('manual') == ''):
        cell_type[int(x)]=np.NaN
        not_typed.append(x)# print(f'cell {x} is not cell-typed in json')
        
print('the following cells are not typed in the main network')
print(not_typed)        
        
df_type_Ela = pd.DataFrame(cell_type.items(),columns = ['id','cell_type'])


## get cell types from saved main network df_type

In [21]:
df_type_main = pd.read_csv(dirpath.parent.parent / 'CREST_reconstructions/mg_network/metadata/df_type.csv')


## combine df_type from Ela and main network

In [None]:
df_type = pd.concat([df_type_Ela, df_type_main],ignore_index=True)

# PE postsynaptic network analysis

In [86]:
df_syn = pd.read_csv(Path(Path(settings_dict['save_dir']).parent) / 'df_pe_postsyn.csv')
syn = 'post-synaptic'

In [87]:
len(df_syn)

1069

## add cell type to df_syn

In [90]:
for i,r in df_syn.iterrows():
    try:
        df_syn.loc[i,'pre_type'] =df_type[df_type['id'].isin([r['pre']])].cell_type.values[0]
        df_syn.loc[i,'post_type']=df_type[df_type['id'].isin([r['post']])].cell_type.values[0]
    except:
        print(r['pre'],r['post'])
        continue

df_syn.loc[:,'post_type'] = [t.lower() for t in df_syn['post_type']]
df_syn.loc[:,'pre_type'] = [t.lower() for t in df_syn['pre_type']]

## Connection weights


In [62]:
df_edges = df_syn[['pre','post','pre_type','post_type']].value_counts().reset_index(name='weight') #'structure',

## Connection patterns

In [328]:
count_type = 'nsyn' # 'ncells' #
mask = (df_edges['post_type'].isin(['sg1','sg2','mg1','mg2','lg','lf','mli','grc']))#,'smpl','mli','tsd','h'])) # df_edges['pre'].isin([290552453,27220895,31694533,102463116,188296613,15401313,17877032,187151336,117041378,122039969,36165549]) & 
#[295969348,295969442,295969134,295969355,295968777,282228761,283375247, 283391297,283390956,282230475,268614458,268614383,273086215,187230424]

types_ = ['smpl'] #should be all that is in df_pfsyn anyway
df_map = pd.DataFrame()
for t in types_:
    if count_type == 'nsyn':
        df_grouped = df_edges[(df_edges['pre_type']==t) & mask].groupby(
            ['pre','pre_type','post_type']).sum(numeric_only=True).reset_index().pivot(
            index='pre', columns='post_type', values='weight').fillna(0).reset_index()
    
    if count_type == 'ncells':
        df_grouped = df_edges[(df_edges['pre_type']==t) & mask].groupby(
            ['pre','pre_type','post_type']).count().reset_index().pivot(
            index='pre', columns='post_type', values='post').fillna(0).reset_index()
    
    df_grouped['pre_type']=t
    df_map = pd.concat([df_map,df_grouped])
    
df_map = df_map.fillna(0)
df_map = df_map.set_index('pre')
df_map = df_map.drop(['pre_type'],axis=1)

In [None]:
df_map.head()

### connectivity heatmap

In [330]:
target_order = ['lg','mg1','sg1','lf','mg2','sg2','mli']#,'smpl','mli','h','tsd']
df_map = df_map.loc[:,[t for t in target_order if t in df_map.columns.values]]

In [331]:
df_map.sum()

post_type
lg       1.0
mg1      8.0
sg1      6.0
lf     175.0
mg2    162.0
sg2    250.0
mli     81.0
grc     50.0
dtype: float64

In [333]:
sort1 = 'mg1'
sort1_map = df_map[df_map[sort1]>0].sort_values([sort1],ascending=False)

In [334]:
sort2 = 'mg2'
sort2_map = df_map[~df_map.index.isin(sort1_map.index)].sort_values([sort2])

In [335]:
other_map = df_map[(~df_map.index.isin(sort1_map.index)) & ~df_map.index.isin(sort2_map.index)]#.sort_values(['mg2'],ascending=False)

In [336]:
sorted_full = pd.concat([sort1_map,other_map,sort2_map])

In [None]:
# Plot the heatmap
fig, axes = plt.subplots(1, figsize=(5,10))
sns.heatmap(data=sorted_full, annot=False, fmt="0.0f", cmap=cmap, 
            ax=axes, vmin=1)  # vmin slightly above 0 to treat negative as "under"
axes.set_title('total syn per pre cell')
axes.set_yticks([])

In [None]:
# Plotting
fig, ax = plt.subplots(figsize=(5, 5))
# Create a stacked bar plot
sorted_full.loc[:,target_order].plot.bar(stacked=True,color = syn_colors,ax=ax, width=0.9)
# ax.set_xticks([])
ax.legend(title='Postsynaptic \n Type', bbox_to_anchor=(1.35, 1), loc='upper right')


## conditional OUTPUT analysis

In [93]:
def get_conditional_output(df_edges,normalize=False):
    '''get p(connect)'''
    df_map = df_edges.groupby(['pre','pre_type','post_type']).sum(numeric_only=True).reset_index().pivot(index='pre', columns='post_type', values='weight').fillna(0).reset_index().set_index('pre')

    if normalize==True:
        df_map = df_map.div(df_map.sum(axis=1),axis=0)
    
        '''group data'''
        result = []
        for g in df_map.columns:
            result.append(list(df_map[(df_map[g] > 0.05)].mean().values))

    if normalize==False:
        '''group data'''
        result = []
        for g in df_map.columns:
            result.append(list(df_map[(df_map[g] > 1)].mean().values))

    order = df_map.columns
        
    return result,order

In [94]:
pre_groups = ['pe']
post_groups = ['mg1','mg2','lg','lf','sg1','sg2','mli']

### shuffle result

In [96]:
result_shuff = []

# Iterate the specified number of times
for i in range(100):
    df_syn_shuff = deepcopy(df_syn)
    mask = df_syn_shuff['pre_type'].isin(pre_groups) & df_syn_shuff['post_type'].isin(post_groups)  # Filter out rows with post_type not in post_types_order and pre_tyep not in pre_types_order
    df_syn_shuff = df_syn_shuff[mask]
    # Shuffle the dataframe
    # df_syn_rand.loc[:,['pre']] = df_syn_rand['pre'].sample(frac = 1).values ## *** this does not work unless you re-type the pre_type column after***
    shuff_rows = df_syn_shuff[['post','x','y','z','y_adj','post_type']].sample(frac = 1)
    df_syn_shuff.loc[:,['post']] = shuff_rows['post'].values
    df_syn_shuff.loc[:,['x']] = shuff_rows['x'].values
    df_syn_shuff.loc[:,['y']] = shuff_rows['y'].values
    df_syn_shuff.loc[:,['z']] = shuff_rows['z'].values
    df_syn_shuff.loc[:,['y_adj']] = shuff_rows['y_adj'].values
    df_syn_shuff.loc[:,['post_type']] = shuff_rows['post_type'].values

    df_edges_shuff=df_syn_shuff[['pre','post','pre_type','post_type']].value_counts().reset_index(name='weight')
    #df_syn_shuff.drop(['Unnamed: 0','x','y','z','y_adj','structure'],axis=1).value_counts().reset_index(name='weight')

    result_,order = get_conditional_output(df_edges_shuff,normalize=True)
    
    # Append the result as a row to the result_df
    result_shuff.append(result_)

result_shuff = np.asarray(result_shuff)

u_mat = result_shuff.mean(axis=0)

std_mat = result_shuff.std(axis=0)

### data

In [97]:
df_syn_data = deepcopy(df_syn)
mask = df_syn_data['pre_type'].isin(pre_groups) & df_syn_data['post_type'].isin(post_groups)  # Filter out rows with post_type not in post_types_order and pre_tyep not in pre_types_order
df_syn_data = df_syn_data[mask]

df_edges_data=df_syn_data[['pre','post','pre_type','post_type']].value_counts().reset_index(name='weight')
#df_syn_data.drop(['Unnamed: 0','x','y','z','structure'],axis=1).value_counts().reset_index(name='weight')

result_data,order = get_conditional_output(df_edges_data,normalize=True)

# Calculate the z-scores
z_scores = (result_data - u_mat) / std_mat
z_scores[np.isclose(std_mat, 0)] = 0  # Replace z-scores with 0 where std is 0 # Handle cases where std_2d is zero to avoid division by zero


cond_input_mat = pd.DataFrame(z_scores,columns = order, index = order)

In [None]:

# Define the desired order
order = ['sg1','mg1','lg','sg2','mg2','lf','mli']

# Reorder rows and columns
df_reordered=cond_input_mat.reindex(index=order, columns=order)

# Ensure the color range is centered around 0
vmin = -max(abs(df_reordered.min().min()), abs(df_reordered.max().max()))
vmax = -vmin

sns.set_context("paper",font_scale=1)
hfig,ax = plt.subplots(1,figsize=(2,2))
sns.heatmap(df_reordered,
    cmap="RdBu_r",  # Diverging colormap from red to blue
    vmin=vmin,
    vmax=vmax,
    center=0)



### an example of shuffle result

In [None]:

df_syn_shuff = deepcopy(df_syn)
mask = df_syn_shuff['pre_type'].isin(included_groups) & df_syn_shuff['post_type'].isin(['sg1','mg1','lg','sg2','mg2','lf'])  # Filter out rows with post_type not in post_types_order and pre_tyep not in pre_types_order
df_syn_shuff = df_syn_shuff[mask]
# Shuffle the dataframe
# df_syn_rand.loc[:,['pre']] = df_syn_rand['pre'].sample(frac = 1).values ## *** this does not work unless you re-type the pre_type column after***
shuff_rows = df_syn_shuff[['pre','x','y','z','pre_type']].sample(frac = 1)
df_syn_shuff.loc[:,['pre']] = shuff_rows['pre'].values
df_syn_shuff.loc[:,['x']] = shuff_rows['x'].values
df_syn_shuff.loc[:,['y']] = shuff_rows['y'].values
df_syn_shuff.loc[:,['z']] = shuff_rows['z'].values
df_syn_shuff.loc[:,['y_adj']] = shuff_rows['y_adj'].values
df_syn_shuff.loc[:,['pre_type']] = shuff_rows['pre_type'].values

df_edges_shuff=df_edges_shuff[['pre','post','pre_type','post_type']].value_counts().reset_index(name='weight')
#df_syn_shuff.drop(['Unnamed: 0','x','y','z','y_adj','structure'],axis=1).value_counts().reset_index(name='weight')

result_,order = get_conditional_output(df_edges_shuff)

cond_input_mat = pd.DataFrame(result_,columns = df_map.columns, index = df_map.columns)

# Define the desired order
order = ['sg1','mg1','lg','sg2','mg2','lf']

# Reorder rows and columns
df_reordered=cond_input_mat.reindex(index=order, columns=order)

# Ensure the color range is centered around 0
vmin = -max(abs(df_reordered.min().min()), abs(df_reordered.max().max()))
vmax = -vmin

sns.heatmap(df_reordered,
    cmap="RdBu_r",  # Diverging colormap from red to blue
    vmin=vmin,
    vmax=vmax,
    center=0)

# TODO reconstruction files from synapses

In [17]:
syn_to_find = set()       

syn_type = 'post-synaptic'#'spine_inputs' #

vx_sizes = [16, 16, 30]

## dictionary all reconstruction files

In [3]:
dirpath_list = [Path(settings_dict['save_dir']).parent.parent / 'CREST_reconstructions/mg_network', Path(settings_dict['save_dir'])]
# if your directory is last in this list, then any newer versions that you have done should overwrite the older versions in the dictionary -- we can check this manually

In [13]:
dirpath_list = [Path('/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/CREST_reconstructions/mg-network'),Path('/Users/kperks/Library/CloudStorage/GoogleDrive-sawtelllab@gmail.com/My Drive/ELL_connectome/Ela/reconstructions')]

In [17]:
nodefiles = {}
for dirpath in dirpath_list:
    d2 = get_cell_filepaths(dirpath)
    nodefiles.update(d2) #The update() method overwrites the values of existing keys with the new values

## Base Segments of reconstructions

In [13]:
nodefiles = get_cell_filepaths(dirpath)

pfs reconstructed from each type
299496636 mg1 5/22, 
214581797 mg2 1/22, 
301787806 lg, 11/11
393325331 lf, 8/16

In [18]:
cells_todo = ['447127759'] 

In [19]:
syn_to_find = set()

for c_id in cells_todo:
    crest = ecrest(settings_dict,filepath= nodefiles[c_id], launch_viewer=False)
    for syn_ in crest.cell_data['end_points'][syn_type]:
        try:
            syn_to_find.add(syn_[0][3])

        except IndexError as msg:
            cellid = crest.cell_data['metadata']['main_seg']['base']
            print(msg, f'for cell {cellid} synapse at {array([int(syn_[0][i]/vx_sizes[i]) for i in range(3)])} has no segment id')

In [20]:
len(crest.cell_data['end_points'][syn_type])

111

In [21]:
len(syn_to_find)

99

First, find if any of these post-synaptic segments are already part of reconstructions completed

In [14]:
# Create a base_segments dictionary of all reconstructed cells 
# base_segments = crest.get_base_segments_dict(Path(settings_dict['save_dir']))

base_segments = {}
for x,f in nodefiles.items():
    # if cell_type[x] in network_types: # if do this, you can't check if the post-syn segments exist as a reconstruction
    cell = ecrest(settings_dict,filepath = f)#,launch_viewer=False)
    base_segments[cell.cell_data['metadata']['main_seg']['base']] = cell.cell_data['base_segments']
    
    try:
        assert cell.cell_data['metadata']['main_seg']['base'] == x
    except:
        print(x,cell.cell_data['metadata']['main_seg']['base'])

In [23]:
topop=set()
for k,v in base_segments.items():
    if syn_to_find & v != set():
        # print(f'use reconstruction {k}')
        topop = topop.union(syn_to_find & v)
    
len(topop)

42

Adjust "syn_to_find" to eliminate these base segments from the todo list

In [24]:
syn_to_find = syn_to_find.difference(topop)

len(syn_to_find)

57

create crest files for each of the unidentified post-synaptic partners

Save reconstructed_segs as a json to go through manually

In [None]:
todo_folder_path = Path(settings_dict['save_dir']).parent / 'PE_post'

for segment_id in sorted(list(syn_to_find)):

    cell = ecrest(settings_dict,segment_id = segment_id, launch_viewer=False)
    cell.save_cell_graph(directory_path = todo_folder_path)#'todo_presynaptic/Krista/sgx_394470350')#/Krista/mg_214581797')