In [1]:
#import cudf
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
data_folder = Path.cwd() / "data" / "FLY"
plot_folder = data_folder / "visualization"
plot_folder.mkdir(parents=True, exist_ok=True)

synapses_file = data_folder / "synapses_flat_v783.csv"
synapses_file_curated = data_folder / "synapses_flat_v783_ext.csv"
soma_file = data_folder / "meta" / "soma.csv"

grid_size =  25000 # 50000 25000
grid_size_descriptor = f"grid-{grid_size}"

In [None]:
def curate_annotate_synapses(filename_in, filename_out):

    def replace_empty_with_zero(value):
        return "0" if value == "" else value

    with open(filename_in, "r") as f_in, open(filename_out, "w") as f_out:
        for line in f_in:
            parts = line.split(",")
            parts[0] = replace_empty_with_zero(parts[0])
            parts[1] = replace_empty_with_zero(parts[1])
            f_out.write(",".join(parts))

In [None]:
# curate_annotate_synapses(synapses_file, synapses_file_curated)

In [3]:
synapses_df = pd.read_csv(synapses_file_curated, dtype={
    "x": "uint32", 
    "y": "uint32", 
    "z": "uint32",     
    "pre_root_id": "uint64", 
    "post_root_id": "uint64"
})
synapses_df

Unnamed: 0,pre_root_id,post_root_id,x,y,z
0,720575940596125868,720575940608552405,705988,275676,221800
1,0,0,706908,275084,222160
2,0,0,707848,275456,222100
3,0,0,708050,275176,222200
4,0,0,711344,267664,223360
...,...,...,...,...,...
34156315,0,0,436160,163856,81040
34156316,0,0,436370,165082,78360
34156317,0,0,438644,167636,77860
34156318,0,0,442126,167062,78700


In [None]:
soma_df = pd.read_csv(soma_file, dtype={
    "neuron_id" : "uint64",
    "neuron_id_mapped" : "int16", 
}, usecols=["neuron_id", "neuron_id_mapped"])

mapping_dict = {np.uint64(row.neuron_id): np.int16(row.neuron_id_mapped - 1000) for row in soma_df.itertuples(index=False)}

In [None]:
soma_df

In [None]:
synapses_df.groupby(["pre_celltype", "post_celltype"]).size()

In [5]:
synapses_df["pre_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16')  # replaced cudf.Series
synapses_df["post_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16') # replaced cudf.Series
synapses_df["overlap_volume"] = pd.Series(np.zeros(len(synapses_df)), dtype='uint64') # replaced cudf.Series

In [6]:
from lib.gridder import Gridder

gridder = Gridder()

xyz = synapses_df[["x", "y", "z"]].to_numpy()

gridder.setPositions(xyz)
indices, df_grid_meta = gridder.computeGrid(grid_size)
synapses_df["overlap_volume"] = indices.astype("uint64")

In [7]:
df_grid_meta.to_csv(data_folder/f"{grid_size_descriptor}_meta.csv", index=False)

In [8]:
synapses_df.sample(frac=0.01).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_1pct.csv", index=False)

In [None]:
for neuron_id, mapped_id in mapping_dict.items():
    synapses_df.loc[synapses_df.pre_id == neuron_id, "pre_id_mapped"] = mapped_id
    synapses_df.loc[synapses_df.post_id == neuron_id, "post_id_mapped"] = mapped_id

In [None]:
synapses_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) | (synapses_df.post_id_mapped > -1)]
synapses_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_mapped-neurons.csv", index=False)
synapses_mapped_neurons

In [None]:
synapses_btw_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) & (synapses_df.post_id_mapped > -1)]
synapses_btw_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_between-mapped-neurons.csv", index=False)
synapses_btw_mapped_neurons

In [None]:
selected_overlap_volumes = synapses_mapped_neurons.overlap_volume.unique()
selected_overlap_volumes.size

In [None]:
range_filter_enabled = False

if(range_filter_enabled):
    
    offset = 5000

    x_min = synapses_mapped_neurons.x.min() - offset
    y_min = synapses_mapped_neurons.y.min() - offset
    #z_min = synapses_mapped_neurons.z.min() - offset

    x_max = synapses_mapped_neurons.x.max() + offset
    y_max = synapses_mapped_neurons.y.max() + offset
    #z_max = synapses_mapped_neurons.z.max() + offset

    df_synapses_range = synapses_df[
        #(synapses_df.x >= x_min) & (synapses_df.x <= x_max) &
        #(synapses_df.y >= y_min) & (synapses_df.y <= y_max) &
        synapses_df.overlap_volume.isin(selected_overlap_volumes)
        #(synapses_df.z >= z_min) & (synapses_df.z <= z_max) 
    ]

    df_synapses_range.reset_index(drop=True)

else:

    df_synapses_range = synapses_df.copy()

In [None]:
df_synapses_range.to_csv(data_folder/f"synapses_{grid_size_descriptor}.csv", index=False) # replaced df_synapses_range.to_pandas().to_csv
df_synapses_range.sample(frac=0.10).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_10pct.csv", index=False)

In [None]:
df_synapses_range

In [None]:
df_synapses_aggregated = df_synapses_range.groupby(["overlap_volume", 
                                                    "pre_id_mapped", 
                                                    "post_id_mapped",
                                                    "pre_celltype", 
                                                    "post_celltype",
                                                    "post_compartment"]).size().reset_index(name="synapse_count")

df_synapses_aggregated.to_csv(data_folder/f"synapses_{grid_size_descriptor}_aggregated.csv", index=False) # replaced df_synapses_aggregated.to_pandas().to_csv
print(df_synapses_aggregated.synapse_count.sum())
df_synapses_aggregated