# setup

In [None]:
############################################################################################################################ 
# Get the latest CREST files for each ID within the target folder (dirname)

from pathlib import Path
import json
from sqlite3 import connect as sqlite3_connect
from sqlite3 import DatabaseError
from igraph import Graph as ig_Graph
from igraph import plot as ig_plot
from scipy.spatial.distance import cdist
from random import choice as random_choice
from itertools import combinations
from numpy import array, unravel_index, argmin, mean,unique,nan
import pandas as pd
from copy import deepcopy
from datetime import datetime
from time import time
import neuroglancer
from webbrowser import open as wb_open
from webbrowser import open_new as wb_open_new
import neuroglancer
import numpy as np
import matplotlib.pyplot as plt

In [3]:
import sys
sys.path.append('/Users/kperks/Documents/ell-connectome/eCREST/eCREST/eCREST_cli.py')

In [None]:
# from eCREST_cli_beta import ecrest, import_settings
from eCREST_cli import ecrest, import_settings, get_cell_filepaths

In [2]:
def get_base_segments_dict(dirpath):

    nodefiles = [child.name for child in sorted(dirpath.iterdir()) if (child.name[0]!='.') & (child.is_file()) & ("desktop" not in child.name)]

    # Create a base_segments dictionary of all cells in the directory
    base_segments = {}
    for x in nodefiles:
        # print(x)
        with open(dirpath / x, 'r') as myfile: # 'p' is the dirpath and 'f' is the filename from the created 'd' dictionary
            cell_data=myfile.read()
            cell_data = json.loads(cell_data)
        base_segments[x] = set([a for b in cell_data['base_segments'].values() for a in b]) #cell.cell_data['base_segments']
        # base_segments[x] = set([a for b in cell_data['base_segments'].values() for a in b]) #cell.cell_data['base_segments']

    return base_segments

def check_duplicates(base_segments):
    '''
    base_segments is a dictionary of all segments that this script checks among
    '''
    df_all = pd.DataFrame()
    for _,this_cell in base_segments.items():
        overlap = []
        num_dup = []
        for x in base_segments.keys():
            overlap.append(len(this_cell&base_segments[x])/len(base_segments[x]))
            num_dup.append(len(this_cell&base_segments[x]))

        df = pd.DataFrame({
            "self": _,
            "dups": list(base_segments.keys()),
            "overlap-percent": overlap,
            "number_seg_lap": num_dup
            }).replace(0, nan, inplace=False).dropna()
        df = df[df['dups'] != _]
        if not df.empty:
            df_all = pd.concat([df_all,df]) 
    return df_all

The 'ecrest' class has been imported from eCREST_cli.py

An instance of this object will be able to do things like:
- open an neuroglancer viewer for proofrieading (see "Proofread using CREST")
    - add-remove segments (using graph feature for efficiency)
    - format itself and save itself as a CREST-style .json
- add or remove annotation layers (see "Annotation Layers")
- check for overlap with other .json files in a directory folder (see "check duplicates")
- label cell structures
- add base_segments from a list (see "add segments")
- import annotations from another file (see "Annotation Import")
- convert from neuroglancer json (see "Convert From Neuroglancer to eCREST")
    - format itself and save itself as a CREST-style .json
    


# Settings definitions

Whether you are converting from neuroglancer or creating a new reconstruction, the settings_dict parameters is needed to create CREST json files with correct formatting. 
- 'save_dir' : the default directory where eCREST reconstructions are saved as JSON files
- 'db_path' : the path to the agglomeration database file on the local computer (a copy is in the Google Drive). 

In [3]:
path_to_settings_json = '<path to google drive on the local computer>/My Drive/ELL_connectome/Ela/settings_dict_Ela.json'
settings_dict = import_settings(path_to_settings_json)

In [None]:
settings_dict

# Get base_segments dictionaries 

## Read from file (created with [this Colab notebook](https://colab.research.google.com/drive/19N8taRKeTt_Bgx_yF5AD5ntkU7zy7unc?usp=sharing)) 

In [None]:
# Read list of dictionaries from a JSON file
with open(Path(settings_dict['save_dir']).parent.parent / 'Ela/dictionary_jsons/dictionaries.json', 'r') as json_file: #assumes your save_dir is /mg_network/Rachel/ on your working computer
    dict_all = json.load(json_file)
  
dict_list = dict_all.keys()

# Convert lists to sets within each dictionary in data
for key, dictionary in dict_all.items():
    for sub_key, value in dictionary.items():
        if isinstance(value, list):
            dictionary[sub_key] = set(value)

# Dynamically create dictionaries based on the JSON keys
for key, value in dict_all.items():
    globals()[key] = value

print(dict_list)

## Create locally

On some computers this can take a long time (if low RAM or low memory available. If this takes more than 5 minutes, just use Colab notebook to do it

In [40]:
# create base segment dictionaries to check for duplicates
# use manually-defined function in setup for "get_base_segments_dict, which can check for files with same id)"

base_segments_Ela = get_base_segments_dict(Path(settings_dict['save_dir']).parent.parent / 'Ela/reconstructions')
base_segments_main = get_base_segments_dict(Path(settings_dict['save_dir']))
base_segments_todo = get_base_segments_dict(Path(settings_dict['save_dir']).parent.parent / 'Ela/PE_post')

# Proofread using (e)CREST

The ```ecrest``` class defined in eCREST_cli.py can be used to proofread base_segment reconstructions enhanced by the agglomeration database.

An instance of this class can be initialized with either:
- ecrest(segment_id): a "main_base_id" in *int* format
- ecrest(filepath): an existing CREST .json file
- ecrest(segment_id, segment_list): the main_base_id from the neuroglancer file you are converting and a list of base_segments.

The ```launch_viewer``` flag default is "False" so that you can interact with the contents of a reconstruction without actually opening it visually in a neuroglancer tab. **NOTE**: Some ecrest functions require that the ecrest instance is created with ```launch_viewer==True```.

## NEW reconstruction from segment ID

If you wanted to start reconstructing a new cell from a main base segment, you would use the following code block to launch.

In [None]:
segment_id = '199762907'
crest = ecrest(settings_dict,segment_id = segment_id, launch_viewer=True)

# The following is optional and can be commented out. It enables adding segments by alt+left-mouse instead of left-mouse double click. 
crest.change_key_binding({"alt+mousedown0" : "add-or-remove-seg"})

### Check for duplicates against main network, your todo file, and your reconstruction folder

In [None]:
# check for duplicates of current reconstruction with base segments dictionary
# to save time, this line can be run alone after initializing base_segments dictionary above
print('overlap in Ela reconstruction folder:'); df = crest.check_duplicates(base_segments_Ela); display(df)
print('overlap in main network folder:'); df = crest.check_duplicates(base_segments_main); display(df)
print('overlap in todo folder:'); df = crest.check_duplicates(base_segments_todo); display(df)

### Add soma annotations 

Only add soma annotations if it is a new reconstruction with a soma in the volume.  
    - 4 annotations if the soma is fully in the volume, only 3 if partially out  
    - annotations get added along the x plane and y plane

In [None]:
crest.add_endpoint_annotation_layers(['soma'],link=True) # spine_inputs

### define cell type

In [41]:
cell_type = 'mg2' # Assign the cell type then run the code cell

crest.define_ctype(cell_type,"manual")

### save reconstruction

default location should be Ela_reconstructions (specified in settings_dict.json)

In [42]:
crest.save_cell_graph()

Saved cell 199762907 reconstruction locally at 2025-01-13 12.11.20
