In [16]:
#import cudf  (use if you are on linux with a recent Nvidia GPU)
import pandas as pd
import numpy as np
from pathlib import Path

### Define local data folder paths 

Here we use the Flywire reconstruction (v783) that can be downloaded from [https://codex.flywire.ai/api/download](https://codex.flywire.ai/api/download)

In [17]:
data_folder = Path.cwd() / "data" / "FLY"
meta_folder = data_folder / "meta"
plot_folder = data_folder / "visualization"
plot_folder.mkdir(parents=True, exist_ok=True)

synapses_file = data_folder / "synapses_flat_v783.csv"


# Select overlap volume size
# grid_size =  25000 
selection_descriptor = f"optical-lobes"

synapses_file_curated = data_folder / f"synapses_flat_{selection_descriptor}.csv"

### Select neurons and cell types of interest  

Load cell types meta information into dictionaries

In [18]:
df_celltypes = pd.read_csv(meta_folder/"cell_types.csv")
df_celltypes['root_id'] = df_celltypes['root_id'].astype(str)
celltypes_rootid_typeid = df_celltypes.set_index('root_id')['primary_type_id'].to_dict()
celltypes_typeid_name = df_celltypes.set_index('primary_type_id')['primary_type'].to_dict()
celltypes_name_typeid = df_celltypes.set_index('primary_type')['primary_type_id'].to_dict()

Here we select some neurons and the respective cell types from an optical lobe column

In [19]:
df_optical_columns = pd.read_csv(meta_folder/"column_assignment.csv")

map_rootid_columnid = df_optical_columns.set_index('root_id')['column_id'].to_dict()
enabled_ids = set([str(x) for x in sorted(df_optical_columns.root_id.values.tolist())])

Write the selected cells, their zero-index-based neuron IDs, and their cell types to a separate dataframe

In [20]:
sel_id = []
sel_mapped_id = []
sel_celltype = []
sel_celltype_name = []
sel_columns = []
for mapped_id, orirignal_id in enumerate(sorted(enabled_ids)):
    sel_id.append(orirignal_id)
    sel_mapped_id.append(mapped_id)
    if(orirignal_id in celltypes_rootid_typeid):
        celltype_id = celltypes_rootid_typeid[orirignal_id]
        sel_celltype.append(celltype_id)
        sel_celltype_name.append(celltypes_typeid_name[celltype_id])
        sel_columns.append(map_rootid_columnid[int(orirignal_id)])
    else:
        sel_celltype.append(-1)
        sel_celltype_name.append("-1")
        sel_columns.append(-1)

df_selected = pd.DataFrame({'root_id': sel_id, 'mapped_id': sel_mapped_id, 'column_id': sel_columns, 'celltype_id': sel_celltype, 'celltype_name': sel_celltype_name})
df_selected.to_csv(meta_folder/f"selected_neurons_{selection_descriptor}.csv", index=False)

In [21]:
df_selected.head()

Unnamed: 0,root_id,mapped_id,column_id,celltype_id,celltype_name
0,720575940596125868,0,93,0,T5c
1,720575940599333574,1,355,4,Tm1
2,720575940599457990,2,247,5,T4b
3,720575940599459782,3,513,6,T5b
4,720575940599704006,4,331,7,T5a


In [22]:
id_mapping = df_selected.set_index('root_id')["mapped_id"].to_dict()
id_column = df_selected.set_index('root_id')["column_id"].to_dict()

Read the raw synapses file, attach meta information (e.g. type labels), set zero-index-based neuron IDs for selected neurons, and assign missing data to -1.  

In [23]:
def curate_annotate_synapses(filename_in, filename_out):

    def get_mapped_id(value):
        if(value == ""):
            return "-1"
        else:
            if(value in id_mapping):
                return str(id_mapping[value])
            else:
                return "-1"
    
    def get_celltype(root_id):
        if(root_id in celltypes_rootid_typeid):
            celltype_id = celltypes_rootid_typeid[root_id]
            return str(celltype_id)
        else:
            return "-1"

    with open(filename_in, "r") as f_in, open(filename_out, "w") as f_out:
        linecount = 0
        for line in f_in:
            if(linecount == 0):
                f_out.write("x,y,z,pre_id_mapped,post_id_mapped,pre_celltype,post_celltype,column_id\n")
                linecount += 1
                continue

            parts = line.rstrip().split(",")
            pre_id = get_mapped_id(parts[0])
            post_id = get_mapped_id(parts[1])

            if(pre_id == "-1" and post_id == "-1"):
                continue

            pre_id_orig = parts[0]
            post_id_orig = parts[1]

            if(post_id != "-1"):
                column = str(id_column[post_id_orig])
            else:
                column = str(id_column[pre_id_orig])
                
            x = parts[2]
            y = parts[3]
            z = parts[4]

            pre_celltype = get_celltype(pre_id_orig)
            post_celltype = get_celltype(post_id_orig)
            
            parts_new = [x, y, z, pre_id, post_id, pre_celltype, post_celltype, column]
            #print(parts_new)
            f_out.write(",".join(parts_new) + "\n")

            #if(linecount == 1000):
            #    break
            linecount += 1

In [24]:
curate_annotate_synapses(synapses_file, synapses_file_curated)

### Apply local overlap volumes 

In [25]:
synapses_df = pd.read_csv(synapses_file_curated, dtype={
    "column_id": "uint32",    
    "pre_id_mapped": "int32", 
    "post_id_mapped": "int32",
    "pre_celltype": "int32", 
    "post_celltype": "int32",
})
synapses_df

Unnamed: 0,x,y,z,pre_id_mapped,post_id_mapped,pre_celltype,post_celltype,column_id
0,705988,275676,221800,0,-1,0,500,93
1,804940,353138,204840,-1,1393,-1,4,3
2,799768,358756,189820,-1,1998,-1,4,454
3,810024,346582,188400,-1,11746,-1,18,44
4,810096,346526,188320,-1,11918,-1,4,44
...,...,...,...,...,...,...,...,...
232231,736808,280330,105740,-1,15831,-1,107,110
232232,799926,279330,137200,-1,7037,-1,69,447
232233,798604,271060,141100,-1,7798,-1,69,236
232234,715262,260734,183440,-1,19419,-1,420,550


In [26]:
synapses_df.rename(columns={'column_id': 'overlap_volume'}, inplace=True)

In [27]:
df_synapses_aggregated = synapses_df.groupby(["overlap_volume", 
                                                "pre_id_mapped", 
                                                "post_id_mapped",
                                                "pre_celltype", 
                                                "post_celltype"]).size().reset_index(name="synapse_count")

df_synapses_aggregated.to_csv(data_folder/f"synapses_{selection_descriptor}_aggregated.csv", index=False) # replaced df_synapses_aggregated.to_pandas().to_csv
print(df_synapses_aggregated.synapse_count.sum())
df_synapses_aggregated

232236


Unnamed: 0,overlap_volume,pre_id_mapped,post_id_mapped,pre_celltype,post_celltype,synapse_count
0,1,-1,36,-1,77,1
1,1,-1,36,131,77,1
2,1,-1,36,444,77,2
3,1,-1,36,747,77,3
4,1,-1,36,864,77,1
...,...,...,...,...,...,...
47537,796,17948,-1,213,1483,1
47538,796,18112,5880,60,248,1
47539,796,18438,1019,60,37,1
47540,796,18445,-1,7,24,1
