# Convert json from neuroglancer to CREST

## Setup

### Imports

In [1]:
############################################################################################################################ 
# Get the latest CREST files for each ID within the target folder (dirname)

from pathlib import Path
import json
from sqlite3 import connect as sqlite3_connect
from sqlite3 import DatabaseError
from igraph import Graph as ig_Graph
from igraph import plot as ig_plot
from scipy.spatial.distance import cdist
from random import choice as random_choice
from itertools import combinations
from numpy import array, unravel_index, argmin, mean
from copy import deepcopy
from datetime import datetime

### Define a 'crest_json' class using functions from CREST.py

An instance of this object will be able to format itself and save itself as a CREST-style .json

In [119]:
class crest_json:
    
    def __init__(self,main_base_id, db_path):
        
        '''
        At some point, store these initialization values (ie addresses, keys lists) as a 'params' file that can be provided to the init function instead of hard-coding
        
        main_base_id : base segment ID from neuroglancer list
        
        db_path : filepath to agglomeration database sql file locally on computer
        
        '''
        
        print(f'Creating CREST file for {main_base_id}', 'Cell Reconstruction')
        
        self.db_cursors = {key: {} for key in ['Cell Reconstruction']}
        self.db_paths = {key: {} for key in ['Cell Reconstruction']}
        self.db_path_labels = {key: {} for key in ['Cell Reconstruction']}
        self.settings_dict = {key: {} for key in ['Cell Reconstruction']}
        
        # Create the connection to the database (right now just for 'Cell Reconstruction')
        self.update_selected_db(db_path) 
        
        # Set up addresses
        # addresses are stored in the agglomo SQL file
        
        required_addresses = ['agglo_address', 'base_address', 'em_address', 'cloud_storage_address']
        [self.agglo_seg, self.base_seg, self.em, self.cloud_storage_address]  = self.get_addresses(required_addresses, 'Cell Reconstruction')
#         self.em = 'brainmaps://10393113184:ell:roi450um_xyz'
#         self.base_seg = 'brainmaps://10393113184:ell:roi450um_seg32fb16fb_220930'
#         self.agglo_seg = 'brainmaps://10393113184:ell:roi450um_seg32fb16fb_220930:v230111c_16_strict_only_spl'

        keys_base_segments = ['unknown','axon', 'basal dendrite', 'apical dendrite', 'dendrite', 'multiple']
        keys_end_points = ['exit volume', 'natural end', 'uncertain', 'pre-synaptic', 'post-synaptic']
        
        agglo_seg_id = self.get_agglo_seg_of_base_seg(main_base_id)
        
        self.cell_data = {
            'graph_edges': [],
            'graph_nodes': [],
            'base_locations': {},
            'added_graph_edges': [], 
            'added_graph_edges_pre_proofreading': [],
            'end_points': {key: [] for key in keys_end_points},
            'base_seg_merge_points': [],
            'removed_base_segs': set(),
            'anchor_seg' : str(main_base_id),
            'metadata': {   
                'main_seg' : {'agglo' : {self.agglo_seg : agglo_seg_id}, 'base' : str(main_base_id)},
                'data_sources': {
                    'em' : self.em, 
                    'base': self.base_seg, 
                    'agglo': self.agglo_seg,
                    },
                'timing' : [],
                'completion' : []
                },
            'base_segments' : {dtype: set() for dtype in keys_base_segments}
        }
        
        self.get_vx_sizes('Cell Reconstruction')

    def get_addresses(self, required_addresses, mode):
        
        '''
        req_addresses = ['agglo_address', 'base_address', 'em_address', 'cloud_storage_address']
        '''
        a = ', '.join(required_addresses)

        self.db_cursors[mode].execute(f'''SELECT {a} FROM addresses_table LIMIT 1''')

        results = self.db_cursors[mode].fetchall()[0]

        return results
        
    def update_selected_db(self, db_path, mode = 'Cell Reconstruction'):
        '''
        mode: 'Cell Reconstruction'
        '''

#         if not 'No file selected' in db_path: # cannot be 'No file selected' because doing it manually, not by gui

        db_connection = sqlite3_connect(db_path, check_same_thread=False) 
        db_cursor = db_connection.cursor()

        self.db_cursors[mode] = db_cursor
        self.db_paths[mode] = db_path
        self.settings_dict[mode]['cred'] = db_path
        
        db_path_to_display = db_path
        
        self.db_path_labels[mode]['text'] = db_path_to_display

    def get_vx_sizes(self, mode):
        
        '''
        mode : "Network Exploration" or "Cell Reconstruction"
        '''
        
        self.db_cursors[mode].execute('SELECT * FROM voxel_sizes_table')

        self.vx_sizes = {}

        for dtype, x, y, z, x_size, y_size, z_size in self.db_cursors[mode].fetchall():

            self.vx_sizes[dtype] = [x, y, z]

            if dtype == 'em':
                self.starting_location = [int(x_size/2), int(y_size/2), int(z_size/2),]

        
    def import_base_segments(self,base_segments):
        
        '''
        base_segments is the list of segments from neuroglancer json (which is why get put in "unknown")
        '''
        # Turn lists back to sets:
        self.cell_data['base_segments']['unknown'] = set([str(x) for x in base_segments])
                
    
    def create_pr_graph(self):

        seg_id = self.cell_data['metadata']['main_seg']['base']

        print(f'Creating base segment graph for cell {seg_id}', 'Cell Reconstruction')

        all_base_segs = [str(a) for b in self.cell_data['base_segments'].values() for a in b]
        
        self.update_base_locations(all_base_segs)

              
        ####
        # Correct base segment locations that got left out 
        '''td:
        figure out why they are left out.
        for example, for one the segment id returned '0' even though there was a location returned
        '''
        no_loc_base_segs = [str(x) for x in all_base_segs if x not in self.cell_data['base_locations']]
        for s in no_loc_base_segs:
            results_dict = self.get_locations_from_base_segs(s)
            k = list(results_dict.keys())[0] # get key for this segment ID in queried segment location
            self.cell_data['base_locations'][s] = self.get_corrected_xyz(results_dict[k], 'seg') # manually log its location with given segment ID
        ####
        
        
        possible_edges = []
        agglo_segs_done = set()
        base_segs_done = set()

        for base_seg in all_base_segs:

            if base_seg in base_segs_done: continue

            agglo_seg = self.get_agglo_seg_of_base_seg(base_seg)
            children_base_segs = self.get_base_segs_of_agglo_seg(agglo_seg)
            base_segs_done.update(children_base_segs)

            if not agglo_seg in agglo_segs_done:

                edges = self.get_edges_from_agglo_seg(agglo_seg)

                agglo_segs_done.add(agglo_seg)
                possible_edges.extend(edges)

        all_bs_set = set(all_base_segs)
        possible_edges = [x for x in possible_edges if x[0] in all_bs_set]
        chosen_edges = [x for x in possible_edges if x[1] in all_bs_set]

        self.pr_graph = ig_Graph(directed=False)
        self.pr_graph.add_vertices(all_base_segs)
        self.pr_graph.add_edges(chosen_edges)

        self.add_cc_bridging_edges_pairwise()
        self.attach_noloc_segs()

        '''
        # removed assertion of pr_graph.clusters==1 because importing from a neuroglancer json might "break" this and it is ok...
        
        assert len(self.pr_graph.clusters(mode='weak')) == 1
        '''
        
        n_clusters = len(self.pr_graph.clusters(mode='weak'))
        
        print(f'{n_clusters} clusters in graph (note should/would be only 1 if loaded base ID from agglomo fresh)')
        
        self.assert_segs_in_sync()
        
        print(f'successful assertion that graph segments and segments listed in base_segments match')

    def save_cell_graph(self, directory_path = None, file_name=None, save_to_cloud=False):
        
        cell_data = deepcopy(self.cell_data)
        cell_data['graph_nodes'] = [x['name'] for x in self.pr_graph.vs]
        cell_data['graph_edges'] = [(self.pr_graph.vs[x.source]['name'], self.pr_graph.vs[x.target]['name']) for x in self.pr_graph.es]

        # Convert sets to lists for saving in json file:
        for dtype in cell_data['base_segments'].keys():
            cell_data['base_segments'][dtype] = list(cell_data['base_segments'][dtype])
        
        cell_data['removed_base_segs'] = list(cell_data['removed_base_segs'])

        timestamp = str(datetime.now())[:-7].replace(':','.')
        main_base_id = cell_data['metadata']['main_seg']['base']

        completion_list = list(set(cell_data['metadata']['completion']))
        completion_list.sort()
        completion_string = ','.join(completion_list).replace('_', ' ')

        if directory_path == None:
            directory_path = Path().absolute()
            
        if file_name == None:
            file_name = f'cell_graph_{main_base_id}_{completion_string}_{timestamp}.json'

        cell_data['metadata']['data_sources']['agglo'] = self.agglo_seg

#         with open(f'{self.save_dir}/{file_name}', 'w') as fp:
#             json_dump(cell_data, fp)
        with open(directory_path / file_name, 'w') as fp:
            json.dump(cell_data, fp)

        print(f'Saved cell {main_base_id} locally', 'Cell Reconstruction')

    def update_base_locations(self, seg_list):

        seg_list = [x for x in seg_list if x not in self.cell_data['base_locations'].keys()]

        result_dict = self.get_locations_from_base_segs(seg_list)

        for r in result_dict:
            self.cell_data['base_locations'][r] = self.get_corrected_xyz(result_dict[r], 'seg')

    def get_locations_from_base_segs(self, base_segs, batch_size = 1000):

        results = {}

        if len(base_segs) > 0:
        
            num_batches = int(len(base_segs)/batch_size)
            
            for batch in range(num_batches+1):

                q = ','.join([str(x) for x in base_segs[batch*batch_size:(batch+1)*batch_size]])
                
                query = f"""SELECT seg_id, x, y, z FROM base_location WHERE seg_id IN ({q})"""

                self.db_cursors['Cell Reconstruction'].execute(query)

                this_batch = {str(x[0]): (int(x[1]), int(x[2]), int(x[3])) for x in self.db_cursors['Cell Reconstruction'].fetchall()}

                results.update(this_batch)

        return results

    def get_corrected_xyz(self, xyz, adj_key, rel_to_em=False):

        result = []

        for pos, coord in enumerate(xyz):
            result.append(coord*self.vx_sizes[adj_key][pos])
            
        if rel_to_em==True:
            result = [int(result[x]/self.vx_sizes['em'][x]) for x in range(3)]

        return result

    def get_agglo_seg_of_base_seg(self, base_seg):

        base_seg = str(base_seg)

        query = f"""SELECT agglo_id FROM agglo_base_resolved WHERE base_id = {base_seg}"""

        self.db_cursors['Cell Reconstruction'].execute(query)
        agglo_segs = [str(x[0]) for x in self.db_cursors['Cell Reconstruction'].fetchall()]

        assert len(agglo_segs) <= 1

        if agglo_segs == []:
            return base_seg
        else:
            return agglo_segs[0]

    def get_base_segs_of_agglo_seg(self, agglo_seg):

        agglo_seg = str(agglo_seg)

        query = f"""SELECT base_id FROM agglo_base_resolved WHERE agglo_id = {agglo_seg}"""

        self.db_cursors['Cell Reconstruction'].execute(query)
        base_segs = [str(x[0]) for x in self.db_cursors['Cell Reconstruction'].fetchall()]
        base_segs.append(agglo_seg)

        return base_segs

    def get_edges_from_agglo_seg(self, agglo_seg):

        agglo_seg = str(agglo_seg)

        query = f"""SELECT label_a, label_b FROM agglo_to_edges WHERE agglo_id = {agglo_seg}"""

        self.db_cursors['Cell Reconstruction'].execute(query)
        edges = [(str(x[0]), str(x[1])) for x in self.db_cursors['Cell Reconstruction'].fetchall()]

        return edges
    
    def add_cc_bridging_edges_pairwise(self):
        
        '''
        con_comms = "connected components" abbreviation
        '''

        con_comms = list(self.pr_graph.clusters(mode='weak'))

        while len(con_comms) > 1:

            candidate_edges = []

            for cc1, cc2 in combinations(con_comms, 2): # gets all possible pairwise combinations between segments
                
                # get the name of each base segment
                cc1_base_segs = [self.pr_graph.vs[x]['name'] for x in cc1]
                cc2_base_segs = [self.pr_graph.vs[x]['name'] for x in cc2]

                cc1_list = [x for x in cc1_base_segs if x in self.cell_data['base_locations']]
                cc2_list = [x for x in cc2_base_segs if x in self.cell_data['base_locations']]

                if cc1_list == [] or cc2_list == []:
                    continue

                sel_cc1, sel_cc2, dist = self.get_closest_dist_between_ccs(cc1_list, cc2_list)
                candidate_edges.append([sel_cc1, sel_cc2, dist])

            if candidate_edges == []: 
                return

            origin, target, dist = min(candidate_edges, key = lambda x: x[2])

            self.pr_graph.add_edges([(origin, target)])
            self.cell_data['added_graph_edges_pre_proofreading'].append([origin, target, dist])
#             self.update_mtab(f'Added an edge between segments {origin} and {target}, {dist} nm apart', 'Cell Reconstruction')

            con_comms = list(self.pr_graph.clusters(mode='weak'))

    def get_closest_dist_between_ccs(self, cc1_node_list, cc2_node_list):

        cc1_node_locs = [self.cell_data['base_locations'][x] for x in cc1_node_list]
        cc2_node_locs = [self.cell_data['base_locations'][x] for x in cc2_node_list]

        f = cdist(cc1_node_locs, cc2_node_locs, 'euclidean')

        min_indices = unravel_index(argmin(f, axis=None), f.shape)

        sel_cc1 = cc1_node_list[min_indices[0]]
        sel_cc2 = cc2_node_list[min_indices[1]]
        dist = int(f[min_indices])  

        return sel_cc1, sel_cc2, dist
            
    def attach_noloc_segs(self):
        ''' NOTE that this does not run (it returns) if self.pr_graph.clusters(mode='weak') == 1
        This is a case that is asserted in oringinal CREST.py in '''
        
        # For isolated segments without locations, attach to largest connected component:
        remaining_cc = list(self.pr_graph.clusters(mode='weak'))

        if len(remaining_cc) == 1: return

        if len(remaining_cc) > 1:
            no_loc_base_segs = set([x['name'] for x in self.pr_graph.vs if x['name'] not in self.cell_data['base_locations']])
            largest_cc = max(remaining_cc, key = lambda x: len(x))
            for cc in remaining_cc:
                no_loc_this_cc = cc & no_loc_base_segs
                if cc != largest_cc and no_loc_this_cc != set():
                    rand_seg1 = random_choice(list(no_loc_this_cc))
                    rand_seg2 = random_choice(list(largest_cc))
                    self.pr_graph.add_edges([(rand_seg1, rand_seg2)])
                    self.cell_data['added_graph_edges_pre_proofreading'].append([rand_seg1, rand_seg2, 'unknown'])
#                     print(f'Added an edge between segments {rand_seg1} and {rand_seg2}', 'Cell Reconstruction')

    def assert_segs_in_sync(self, return_segs=False):

#         displayed_segs = set([str(x) for x in self.viewer.state.layers['base_segs'].segments]) # not connected to neuroglancer, so not relevant
        graph_segs = set([x['name'] for x in self.pr_graph.vs])
        listed_segs = set([a for b in [self.cell_data['base_segments'][cs] for cs in self.cell_data['base_segments'].keys()] for a in b])

        assert listed_segs == graph_segs

#         if not displayed_segs == graph_segs:
#             self.update_displayed_segs()
        
        if return_segs:
#             return displayed_segs
            return graph_segs
        else:
            return None


    def import_annotations(self,neuroglancer_data, neuroglancer_layer_name, crest_layer_name):
        
        for n, c in zip(neuroglancer_layer_name,crest_layer_name):
            
            # get the 'layers' dictionary that has that name

            neuroglancer_layer = next((item for item in neuroglancer_data['layers'] if item["name"] == n), None)

            # create the annotation list for CREST and put it into cell_data

            annotation_list = []

            for v in neuroglancer_layer['annotations']:
                # print(v)
                corrected_location = self.get_corrected_xyz(v['point'], 'seg')

                if 'segments' not in v.keys():
                    annotation_list.extend([corrected_location])
                if 'segments' in v.keys():
                    annotation_list.extend([corrected_location + v['segments'][0]])

            self.cell_data['end_points'][c]=annotation_list
 

### Path definitions and filename extraction

In [4]:
dirname = "D:\\electric-fish\\eCREST_local-files\\neuroglancer-json"

In [128]:
p = Path(dirname)
names = [p_.name for p_ in list(p.glob('cell_*'))] # get all the filenames in the directory that start with "cell_"

display(names)

['cell_216129202_type_MG2_v2_NS.json',
 'cell_300316308_type_MG2_v2_NS.json',
 'cell_300380579_type_MG1_NS.json',
 'cell_387368998_type_MG1_v8_NS_MP.json',
 'cell_42802314_type_MG2_NS.json',
 'cell_472175645_type_MG1_mpg.json',
 'cell_472409584_type_MG1_v2_NS.json']

### Set path to database

In [175]:
db_path = Path("D:\\electric-fish\\eCREST_local-files\\Mariela_bigquery_exports_agglo_v230111c_16_crest_proofreading_database.db")


## Get info from the base neuroglancer json you want converted

In [183]:
# Find all segments for a specific cell ID

# key = '305332461'
# f = d[key][1]

# f = names[2] # for now, index based on which filename you want from the list
f = 'cell_472409584_type_MG1_v2_NS.json'

with open(p / f, 'r') as myfile: # 'p' is the dirpath and 'f' is the filename from the created 'd' dictionary
    data=myfile.read()
neuroglancer_data = json.loads(data)

base_segment_ID = f.split('_')[1] # gets the base segment ID from the name

print(f'you have selected cell {base_segment_ID} to convert')

you have selected cell 472409584 to convert


### Obtain the list of base_segments from the neuroglancer json

In [184]:
base_segment_list_ng = neuroglancer_data['layers'][1]['segments']
main_base_id = base_segment_ID

## Use agglomeration database to get info needed for CREST.json format

## Crest an instance of the crest_json class

Initialize with the main_base_id from the neuroglancer file you are converting.

Use the db_path to the agglomeration dataset (must be local on computer)

In [185]:
crest = crest_json(main_base_id, db_path)

## Note cloud storage address ;)
# crest.cloud_storage_address   

Creating CREST file for 472409584 Cell Reconstruction


## Initialize base segments and graph

### Import all the neurglancer-defined base segments into crest_json object

In [186]:
crest.import_base_segments(base_segment_list_ng)

### Create the graph from the base segments

Note that this step also populates 'base_locations'.

> Note this script was modified to address base segment locations not associated  correctly with their base_seg_id in sql database?

In [187]:
crest.create_pr_graph()


Creating base segment graph for cell 472409584 Cell Reconstruction
1 clusters in graph (note should/would be only 1 if loaded base ID from agglomo fresh)
successful assertion that graph segments and segments listed in base_segments match


## Get annotations from neuroglancer


### First, specify the annotation layer names in neuroglancer and crest

In [188]:
# Specify the original neuroglancer layer name
neuroglancer_layer_name = ['synapses','annotations']

# Define the 'end_points' annotation layer names to populate for CREST
crest_layer_name = ['post-synaptic','natural end']

### Then, transfer from neuroglancer to crest

In [189]:
crest.import_annotations(neuroglancer_data, neuroglancer_layer_name, crest_layer_name)

## Save new json

This step populates the cell_data graph data.

In [181]:
directory_path = Path("D:\\electric-fish\\eCREST_local-files\\neuroglancer-json\\to-crest-json")

In [190]:
crest.save_cell_graph(directory_path = directory_path) # If do not give file_path, then it will auto-generate one like CREST produces

Saved cell 472409584 locally Cell Reconstruction


## Other...

Crest does something similar when reading from list of dicts

In [None]:

if type(self.cells_todo) == dict:
    self.cells_todo = {str(seg_id): {cell_struc: set([str(a) for a in self.cells_todo[seg_id][cell_struc]]) for cell_struc in self.cells_todo[seg_id]} for seg_id in self.cells_todo.keys()}
    '''
    ###... from above (to read easier)
    for seg_id in self.cells_todo.keys():
        for cell_struc in self.cells_todo[seg_id]}:
            for a in self.cells_todo[seg_id][cell_struc]]):
                self.cells_todo = {str(seg_id): {cell_struc: set([str(a) # creates a set() of all segments in the cell structure
    ###
    
    so, self.cells_todo is a dictionary of {'main_base_segment_ID'}{'unknown' : set(base_segments), ...'other cell structures' : set(base_segments)}
    '''

# Get all file names from cloud for seg_ids of interest:
all_cloud_file_names = [x.name for x in self.proofread_files_bucket.list_blobs() if x.name.split('_')[2] in self.cells_todo]
all_local_file_names = [x for x in listdir(self.save_dir) if 'cell' in x and x.split('_')[2] in self.cells_todo]

#self.completion_message_dict = {}

all_cloud_seg_ids = set([x.split('_')[2] for x in all_cloud_file_names])
all_local_seg_ids = set([x.split('_')[2] for x in all_local_file_names])
cells_with_files = all_cloud_seg_ids.union(all_local_seg_ids)
cells_without_files = [x for x in self.cells_todo if x not in cells_with_files]

num_fileless_cells = len(cells_without_files)

if num_fileless_cells > 0:
    self.update_mtab(f'No starting file found locally or in cloud for {num_fileless_cells} cells', 'Cell Reconstruction')
else:
    self.update_mtab(f'Starting files found for all cells, checking completion status of each file ...', 'Cell Reconstruction')

complete_cells = []

for seg_id in self.cells_todo: # if cells_todo is a dict, then seg_id will be the dict key

    '''
    This is just for if the cells already have a file... if import from neuroglancer dict, they will not
    
    # If a seg ID already has a file, we need to choose which one to use
    if seg_id in cells_with_files:

        if self.most_recent_file_complete(seg_id, ['local']) and specific_file == None:
            complete_cells.append(seg_id)
            #msg = f'Cell {seg_id} has already been completed for the selected cell structures locally'
            #self.update_mtab(msg, 'Cell Reconstruction')
            #self.completion_message_dict[seg_id] = msg
            continue

        # If no complete cell locally, start most recent file, whether it originates from cloud or local:

        if specific_file != None:
            most_recent_file = file_name
        else:
            most_recent_file = self.get_most_recent_cell_files(seg_id, ['cloud', 'local'])[0]

        if most_recent_file in listdir(self.save_dir):

            msg = f'Cell {seg_id} not completed for all the selected cell structures in the most recent (local) version'

            file_source = 'local'

        else:

            this_seg_cloud_file_names = [x.name for x in self.proofread_files_bucket.list_blobs() if x.name.split('_')[2] == str(seg_id)]

            assert most_recent_file in this_seg_cloud_file_names

            file_source = 'cloud'

            if self.most_recent_file_complete(seg_id, ['cloud']):
                msg = f'Cell {seg_id} completed for all the selected cell structures in the most recent (cloud) version'
            else:
                msg = f'Cell {seg_id} not completed for all the selected cell structures in the most recent (cloud) version'

        #self.update_mtab(msg, 'Cell Reconstruction')
        #self.completion_message_dict[seg_id] = msg

        if file_source == 'cloud':

            try:
                blob = self.proofread_files_bucket.blob(most_recent_file)
                blob.download_to_filename(f'{self.save_dir}/{most_recent_file}')

            except ConnectionError:
                self.create_cloud_storage_client()
                blob = self.proofread_files_bucket.blob(most_recent_file)
                blob.download_to_filename(f'{self.save_dir}/{most_recent_file}')

            self.update_mtab(f'Proofread cell file {most_recent_file} downloaded from cloud', 'Cell Reconstruction')

        with open(f'{self.save_dir}/{most_recent_file}', 'r') as fp:
            self.cell_data = json_load(fp)

        # If agglo_id has changed from last time, add new base segments - currently disabled:
        last_agglo_id = self.cell_data['metadata']['data_sources']['agglo']
        changed_agglo_id = (last_agglo_id != self.agglo_seg)

        changed_agglo_id = False

        if changed_agglo_id:

            self.add_new_base_segs_from_new_agglo(seg_id)

            # Wipe clean the stored graph:
            self.cell_data['graph_edges'] = []
            self.cell_data['graph_nodes'] = []

            self.create_pr_graph()
            self.save_cell_graph()
    '''

    # Otherwise, it depends on whether the input cells todo is a list or dictionary:
    else:
        self.making_starting_cell_data(seg_id)
        '''basically, 
            - sets up self.cell_data with default keys
            - imports addresses
            - takes 'unknown' key:value pair dictionary in self.cells_todo dictionary and puts that into self.cell_data['base_segments']
                 -- self.cell_data['base_segments'] = self.cells_todo[main_base_id]
        '''

        
        ''' 
        # self.pre_load_edges seems set to 0 and not changed
        # and self.get_new_gen_dict_entries function definition seems commented out
        
        if self.pre_load_edges == 1:
            all_base_segs = [a for b in self.cell_data['base_segments'].values() for a in b]
            self.get_new_gen_dict_entries(all_base_segs, 0)
        '''
        
        self.create_pr_graph()
        '''
        - gets all base segments from ['base_segments'][all keys]
        - self.update_base_locations(all_base_segs)
            -- populates cell_data['base_locations']
            -- self.get_locations_from_base_segs(seg_list not already in 'base_locations')
            -- self.cell_data['base_locations'][r] = self.get_corrected_xyz(result_dict[r], 'seg') 
                --- adjusts xyz based on resolution per voxel? coord*self.vx_sizes['seg'][x,y, or z]

        possible_edges = []
        agglo_segs_done = set()
        base_segs_done = set()
        
        for base_seg in all_base_segs: # getting all of the agglomeration segments with base segments in them and getting their edges
                                        # (keeping a list of base_seg_done to not double-do it because many base segments per agglo seg)

            if base_seg in base_segs_done: continue

            agglo_seg = self.get_agglo_seg_of_base_seg(base_seg) 
            children_base_segs = self.get_base_segs_of_agglo_seg(agglo_seg)
            base_segs_done.update(children_base_segs)

            if not agglo_seg in agglo_segs_done:

                edges = self.get_edges_from_agglo_seg(agglo_seg) # so edges are among base_segments within an agglomeration segment?
                                                                # query = f"""SELECT label_a, label_b FROM agglo_to_edges WHERE agglo_id = {agglo_seg}"""
                
                agglo_segs_done.add(agglo_seg)
                possible_edges.extend(edges)

        all_bs_set = set(all_base_segs)
        possible_edges = [x for x in possible_edges if x[0] in all_bs_set]  # first, make sure the first vertex is in all_base_segments
        chosen_edges = [x for x in possible_edges if x[1] in all_bs_set]    # second, make sure the second vertex is in all_base_segments

        self.pr_graph = ig_Graph(directed=False) # create a graph object
        self.pr_graph.add_vertices(all_base_segs) # add base segments as vertices
        self.pr_graph.add_edges(chosen_edges)  # add edges

        self.add_cc_bridging_edges_pairwise()
            -- if there is more than one connected component (so if more than one agglomo in reconstruction maybe?) then it connects them between the closes base segments among them
            -- calculates distance between base segments in each cluster from list(self.pr_graph.clusters(mode='weak'))\
            -- repeats until only one cluster in graph
            
        self.attach_noloc_segs()

        assert len(self.pr_graph.clusters(mode='weak')) == 1
        
        '''
        
        self.save_cell_graph()
        '''
        populates 'graph_nodes' and 'graph_edges'
        '''


    self.cells_todo_d[seg_id] = self.cell_data['base_segments']

# Save new settings file for quick completion lookup next time:
with open(f'{self.script_directory}/CREST_settings.json', 'w') as fp:
    json_dump(self.settings_dict, fp)

# Make sure cells todo is a list:
self.cells_todo = [x for x in self.cells_todo if x not in complete_cells]

if specific_file == None:
    self.remove_skipped_cells()
