In [1]:
#import cudf  (use if you are on linux with a recent Nvidia GPU)
import pandas as pd
import numpy as np
from pathlib import Path

### Define local data folder paths 

Here we use the Flywire reconstruction (v783) that can be downloaded from [https://codex.flywire.ai/api/download](https://codex.flywire.ai/api/download)

In [48]:
data_folder = Path.cwd() / "data" / "VNC"
meta_folder = data_folder / "meta"
meta_folder.mkdir(parents=True, exist_ok=True)
plot_folder = data_folder / "visualization"
plot_folder.mkdir(parents=True, exist_ok=True)

#synapses_file = data_folder / "synapses_flat_v783.csv"
# synapses_file_curated = data_folder / "synapses_flat_v783_ext.csv"

synapses_file = data_folder / "synapses_flat_Lion.CSV"
synapses_file_curated = data_folder / "synapses_flat_Lion_ext.CSV"

# Select overlap volume size
grid_size =  10000
grid_size_descriptor = f"grid-{grid_size}"

### Select neurons and cell types of interest  

Here we select some neurons and the respective cell types from an optical lobe column

In [16]:
df_synapses = pd.read_csv(synapses_file, sep=";", usecols=["x", "y", "z", "bodyId_pre", "bodyId_post", "celltype_int"])
df_synapses 

Unnamed: 0,x,y,z,bodyId_pre,bodyId_post,celltype_int
0,24446,23081,47906,163893,10014,18
1,24970,22692,46813,163893,10014,18
2,24763,23284,47174,163893,10014,18
3,23950,21090,47386,163893,10014,18
4,24890,22518,46762,163893,10014,18
...,...,...,...,...,...,...
55815,31057,24630,47459,10334,169914,35
55816,29609,25764,47159,10334,169914,35
55817,29600,25755,47175,10334,169914,35
55818,29445,25496,47114,10334,169914,35


In [37]:
df_synapses_renamed = df_synapses.rename(columns={"bodyId_pre" : "pre_id", "bodyId_post" : "post_id", "celltype_int" : "pre_celltype"})
df_synapses_renamed 

Unnamed: 0,x,y,z,pre_id,post_id,pre_celltype
0,24446,23081,47906,163893,10014,18
1,24970,22692,46813,163893,10014,18
2,24763,23284,47174,163893,10014,18
3,23950,21090,47386,163893,10014,18
4,24890,22518,46762,163893,10014,18
...,...,...,...,...,...,...
55815,31057,24630,47459,10334,169914,35
55816,29609,25764,47159,10334,169914,35
55817,29600,25755,47175,10334,169914,35
55818,29445,25496,47114,10334,169914,35


In [38]:
id_mapping = {}
for row_idx, row in df_synapses_renamed.iterrows():
    if(row.pre_id) not in id_mapping:
        id_mapping[row.pre_id] = len(id_mapping)
    if(row.post_id) not in id_mapping:
        id_mapping[row.post_id] = len(id_mapping)

In [39]:
df_synapses_renamed["pre_id_mapped"] = np.zeros(df_synapses_renamed.shape[0], dtype=int)
df_synapses_renamed["post_id_mapped"] = np.zeros(df_synapses_renamed.shape[0], dtype=int)

In [40]:
for row_idx, row in df_synapses_renamed.iterrows():
    pre_id = row.pre_id
    post_id = row.post_id
    df_synapses_renamed.at[row_idx, "pre_id_mapped"] = int(id_mapping[pre_id])
    df_synapses_renamed.at[row_idx, "post_id_mapped"] = int(id_mapping[post_id])

In [42]:
df_synapses_renamed.to_csv(data_folder / "synapses_flat_Lion_ext.csv", index=False)
df_synapses_renamed

Unnamed: 0,x,y,z,pre_id,post_id,pre_celltype,pre_id_mapped,post_id_mapped
0,24446,23081,47906,163893,10014,18,0,1
1,24970,22692,46813,163893,10014,18,0,1
2,24763,23284,47174,163893,10014,18,0,1
3,23950,21090,47386,163893,10014,18,0,1
4,24890,22518,46762,163893,10014,18,0,1
...,...,...,...,...,...,...,...,...
55815,31057,24630,47459,10334,169914,35,410,138
55816,29609,25764,47159,10334,169914,35,410,138
55817,29600,25755,47175,10334,169914,35,410,138
55818,29445,25496,47114,10334,169914,35,410,138


Read the raw synapses file, attach meta information (e.g. type labels), set zero-index-based neuron IDs for selected neurons, and assign missing data to -1.  

### Apply local overlap volumes 

In [49]:
synapses_df = pd.read_csv(synapses_file_curated, dtype={
    "x": "uint32", 
    "y": "uint32", 
    "z": "uint32",     
    "pre_id_mapped": "int32", 
    "post_id_mapped": "int32",
    "pre_celltype": "int32", 
})
synapses_df

Unnamed: 0,x,y,z,pre_id,post_id,pre_celltype,pre_id_mapped,post_id_mapped
0,24446,23081,47906,163893,10014,18,0,1
1,24970,22692,46813,163893,10014,18,0,1
2,24763,23284,47174,163893,10014,18,0,1
3,23950,21090,47386,163893,10014,18,0,1
4,24890,22518,46762,163893,10014,18,0,1
...,...,...,...,...,...,...,...,...
55815,31057,24630,47459,10334,169914,35,410,138
55816,29609,25764,47159,10334,169914,35,410,138
55817,29600,25755,47175,10334,169914,35,410,138
55818,29445,25496,47114,10334,169914,35,410,138


In [50]:
synapses_df.describe()

Unnamed: 0,x,y,z,pre_id,post_id,pre_celltype,pre_id_mapped,post_id_mapped
count,55820.0,55820.0,55820.0,55820.0,55820.0,55820.0,55820.0,55820.0
mean,24424.788033,22765.866374,46780.304085,3599947.0,35879.296059,11.921426,98.901362,118.941222
std,3524.96351,1691.876054,1738.834419,344742700.0,56619.266024,20.546131,95.833544,41.813229
min,14867.0,17590.0,41298.0,10016.0,10014.0,-1.0,0.0,1.0
25%,21996.0,21588.0,45818.75,14502.0,10088.0,0.0,43.0,131.0
50%,24517.0,22865.0,47102.0,18105.0,10589.0,4.0,77.0,133.0
75%,26709.0,23829.0,47893.0,21929.0,18309.0,16.0,114.0,136.0
max,35053.0,36694.0,52224.0,34716370000.0,169914.0,217.0,652.0,138.0


In [51]:
# synapses_df["pre_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16')  # replaced cudf.Series
# synapses_df["post_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16') # replaced cudf.Series
synapses_df["overlap_volume"] = pd.Series(np.zeros(len(synapses_df)), dtype='uint64') # replaced cudf.Series

In [52]:
from lib.gridder import Gridder

gridder = Gridder()

xyz = synapses_df[["x", "y", "z"]].to_numpy()

gridder.setPositions(xyz)
indices, df_grid_meta = gridder.computeGrid(grid_size)
synapses_df["overlap_volume"] = indices.astype("uint64")

In [53]:
df_grid_meta.to_csv(data_folder/f"{grid_size_descriptor}_meta.csv", index=False)

In [59]:
selected_overlap_volumes = synapses_df.overlap_volume.unique()
selected_overlap_volumes.size

6

In [60]:
df_synapses_aggregated = synapses_df.groupby(["overlap_volume", 
                                               "pre_id_mapped", 
                                               "post_id_mapped",
                                               "pre_celltype"]).size().reset_index(name="synapse_count")

df_synapses_aggregated.to_csv(data_folder/f"synapses_{grid_size_descriptor}_aggregated.csv", index=False) # replaced df_synapses_aggregated.to_pandas().to_csv
print(df_synapses_aggregated.synapse_count.sum())
df_synapses_aggregated

55820


Unnamed: 0,overlap_volume,pre_id_mapped,post_id_mapped,pre_celltype,synapse_count
0,0,0,1,18,15
1,0,0,130,18,9
2,0,0,131,18,60
3,0,0,132,18,16
4,0,0,133,18,12
...,...,...,...,...,...
4246,12,643,138,143,1
4247,12,644,138,192,1
4248,12,649,138,217,1
4249,21,628,138,143,1
