In [None]:
#import cudf  (use if you are on linux with a recent Nvidia GPU)
import pandas as pd
import numpy as np
from pathlib import Path

### Define local data folder paths 

Here we use the Flywire reconstruction (v783) that can be downloaded from [https://codex.flywire.ai/api/download](https://codex.flywire.ai/api/download)

In [None]:
data_folder = Path.cwd() / "data" / "FLY"
meta_folder = data_folder / "meta"
plot_folder = data_folder / "visualization"
plot_folder.mkdir(parents=True, exist_ok=True)

synapses_file = data_folder / "synapses_flat_v783.csv"
synapses_file_curated = data_folder / "synapses_flat_v783_ext.csv"

# Select overlap volume size
grid_size =  25000 
grid_size_descriptor = f"grid-{grid_size}"

### Select neurons and cell types of interest  

Load cell types meta information into dictionaries

In [None]:
df_celltypes = pd.read_csv(meta_folder/"cell_types.csv")
df_celltypes['root_id'] = df_celltypes['root_id'].astype(str)
celltypes_rootid_typeid = df_celltypes.set_index('root_id')['primary_type_id'].to_dict()
celltypes_typeid_name = df_celltypes.set_index('primary_type_id')['primary_type'].to_dict()
celltypes_name_typeid = df_celltypes.set_index('primary_type')['primary_type_id'].to_dict()

Here we select some neurons and the respective cell types from an optical lobe column

In [None]:
df_optical_columns = pd.read_csv(meta_folder/"column_assignment.csv")
column_ids = [628]
df_optical_selected = df_optical_columns[df_optical_columns.column_id.isin(column_ids)]

enabled_celltypes = set(df_optical_selected.type.values.tolist())
enabled_ids = set([str(x) for x in sorted(df_optical_selected.root_id.values.tolist())])

In [None]:
# enabled_ids

Write the selected cells, their zero-index-based neuron IDs, and their cell types to a separate dataframe

In [None]:
sel_id = []
sel_mapped_id = []
sel_celltype = []
sel_celltype_name = []
for mapped_id, orirignal_id in enumerate(sorted(enabled_ids)):
    sel_id.append(orirignal_id)
    sel_mapped_id.append(mapped_id)
    if(orirignal_id in celltypes_rootid_typeid):
        celltype_id = celltypes_rootid_typeid[orirignal_id]
        sel_celltype.append(celltype_id)
        sel_celltype_name.append(celltypes_typeid_name[celltype_id])
    else:
        sel_celltype.append(-1)
        sel_celltype_name.append("-1")

df_selected = pd.DataFrame({'root_id': sel_id, 'mapped_id': sel_mapped_id, 'celltype_id': sel_celltype, 'celltype_name': sel_celltype_name})
df_selected.to_csv(meta_folder/f"selected_neurons.csv", index=False)

In [None]:
df_selected.head()

In [None]:
id_mapping = df_selected.set_index('root_id')["mapped_id"].to_dict()
#id_mapping

Read the raw synapses file, attach meta information (e.g. type labels), set zero-index-based neuron IDs for selected neurons, and assign missing data to -1.  

In [None]:
def curate_annotate_synapses(filename_in, filename_out, enabled_ct_set, id_mapping):

    def get_mapped_id(value):
        if(value == ""):
            return "-1"
        else:
            if(value in id_mapping):
                return str(id_mapping[value])
            else:
                return "-1"
    
    def get_celltype(root_id):
        if(root_id in celltypes_rootid_typeid):
            celltype_id = celltypes_rootid_typeid[root_id]
            celltype_name = celltypes_typeid_name[celltype_id]
            if(celltype_name in  enabled_ct_set):
                return str(celltype_id)
            else:
                return "-1"
        else:
            return "-1"

    with open(filename_in, "r") as f_in, open(filename_out, "w") as f_out:
        linecount = 0
        for line in f_in:
            if(linecount == 0):
                f_out.write("x,y,z,pre_id_mapped,post_id_mapped,pre_celltype,post_celltype\n")
                linecount += 1
                continue

            parts = line.rstrip().split(",")
            pre_id = get_mapped_id(parts[0])
            post_id = get_mapped_id(parts[1])
            x = parts[2]
            y = parts[3]
            z = parts[4]

            pre_celltype = get_celltype(parts[0])
            post_celltype = get_celltype(parts[1])
            
            parts_new = [x, y, z, pre_id, post_id, pre_celltype, post_celltype]
            #print(parts_new)
            f_out.write(",".join(parts_new) + "\n")

            #if(linecount == 1000):
            #    break
            linecount += 1

In [None]:
curate_annotate_synapses(synapses_file, synapses_file_curated, enabled_celltypes, id_mapping)

### Apply local overlap volumes 

In [None]:
synapses_df = pd.read_csv(synapses_file_curated, dtype={
    "x": "uint32", 
    "y": "uint32", 
    "z": "uint32",     
    "pre_id": "int32", 
    "post_id": "int32",
    "pre_celltype": "int32", 
    "post_celltype": "int32",
})
synapses_df

In [None]:
# synapses_df["pre_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16')  # replaced cudf.Series
# synapses_df["post_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16') # replaced cudf.Series
synapses_df["overlap_volume"] = pd.Series(np.zeros(len(synapses_df)), dtype='uint64') # replaced cudf.Series

In [None]:
from lib.gridder import Gridder

gridder = Gridder()

xyz = synapses_df[["x", "y", "z"]].to_numpy()

gridder.setPositions(xyz)
indices, df_grid_meta = gridder.computeGrid(grid_size)
synapses_df["overlap_volume"] = indices.astype("uint64")

In [None]:
df_grid_meta.to_csv(data_folder/f"{grid_size_descriptor}_meta.csv", index=False)

In [None]:
synapses_df.sample(frac=0.05).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_5pct.csv", index=False)

In [None]:
#for neuron_id, mapped_id in mapping_dict.items():
#    synapses_df.loc[synapses_df.pre_id == neuron_id, "pre_id_mapped"] = mapped_id
#    synapses_df.loc[synapses_df.post_id == neuron_id, "post_id_mapped"] = mapped_id

In [None]:
synapses_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) | (synapses_df.post_id_mapped > -1)]
synapses_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_mapped-neurons.csv", index=False)
synapses_mapped_neurons

In [None]:
synapses_btw_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) & (synapses_df.post_id_mapped > -1)]
synapses_btw_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_between-mapped-neurons.csv", index=False)
synapses_btw_mapped_neurons

In [None]:
selected_overlap_volumes = synapses_mapped_neurons.overlap_volume.unique()
selected_overlap_volumes.size

In [None]:
range_filter_enabled = False

if(range_filter_enabled):
    
    offset = 5000

    x_min = synapses_mapped_neurons.x.min() - offset
    y_min = synapses_mapped_neurons.y.min() - offset
    #z_min = synapses_mapped_neurons.z.min() - offset

    x_max = synapses_mapped_neurons.x.max() + offset
    y_max = synapses_mapped_neurons.y.max() + offset
    #z_max = synapses_mapped_neurons.z.max() + offset

    df_synapses_range = synapses_df[
        #(synapses_df.x >= x_min) & (synapses_df.x <= x_max) &
        #(synapses_df.y >= y_min) & (synapses_df.y <= y_max) &
        synapses_df.overlap_volume.isin(selected_overlap_volumes)
        #(synapses_df.z >= z_min) & (synapses_df.z <= z_max) 
    ]

    df_synapses_range.reset_index(drop=True)

else:

    df_synapses_range = synapses_df.copy()

In [None]:
df_synapses_range.to_csv(data_folder/f"synapses_{grid_size_descriptor}.csv", index=False) # replaced df_synapses_range.to_pandas().to_csv
df_synapses_range.sample(frac=0.05).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_5pct.csv", index=False)

In [None]:
df_synapses_range

In [None]:
df_synapses_aggregated = df_synapses_range.groupby(["overlap_volume", 
                                                    "pre_id_mapped", 
                                                    "post_id_mapped",
                                                    "pre_celltype", 
                                                    "post_celltype"]).size().reset_index(name="synapse_count")

df_synapses_aggregated.to_csv(data_folder/f"synapses_{grid_size_descriptor}_aggregated.csv", index=False) # replaced df_synapses_aggregated.to_pandas().to_csv
print(df_synapses_aggregated.synapse_count.sum())
df_synapses_aggregated