In [1]:
#import cudf
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
data_folder = Path.cwd() / "data" / "FLY"
plot_folder = data_folder / "visualization"
plot_folder.mkdir(parents=True, exist_ok=True)

synapses_file = data_folder / "synapses_flat.csv"
soma_file = data_folder / "meta" / "soma.csv"

grid_size =  25000 # 50000 25000
grid_size_descriptor = f"grid-{grid_size}"

In [3]:
synapses_df = pd.read_csv(synapses_file, dtype={   # use pd instead of cudf bc cudf is not supported on windows 
    "x": "uint32", 
    "y": "uint32", 
    "z": "uint32",     
    "pre_id": "uint64", 
    "post_id": "uint64", 
    "pre_celltype": "int8", 
    "post_celltype": "int8", 
    "post_compartment": "int8"
})

synapses_df.shape

(3238715, 8)

In [4]:
soma_df = pd.read_csv(soma_file, dtype={
    "neuron_id" : "uint64",
    "neuron_id_mapped" : "int16", 
}, usecols=["neuron_id", "neuron_id_mapped"])

mapping_dict = {np.uint64(row.neuron_id): np.int16(row.neuron_id_mapped - 1000) for row in soma_df.itertuples(index=False)}

In [5]:
soma_df

Unnamed: 0,neuron_id,neuron_id_mapped
0,648518346341382533,1000
1,648518346349477155,1001
2,648518346349477331,1002
3,648518346349487432,1003
4,648518346349487752,1004
...,...,...
445,648518346349540048,1445
446,648518346349540051,1446
447,648518346349540053,1447
448,648518346349540055,1448


In [6]:
synapses_df.groupby(["pre_celltype", "post_celltype"]).size()

pre_celltype  post_celltype
-1            -1               2221498
               1                944380
               20                22518
               21                10423
               22                15283
               23                 3813
               24                 2269
               25                 2749
 1            -1                  7421
               1                  2189
               20                  460
               21                  114
               22                  264
               23                   43
               24                  175
               25                   12
 20           -1                   117
               1                    81
               20                    5
               21                    6
               22                    8
               23                    1
               25                    2
 21           -1                   368
               1                    

In [7]:
synapses_df["pre_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16')  # replaced cudf.Series
synapses_df["post_id_mapped"] = pd.Series(-1 * np.ones(len(synapses_df)), dtype='int16') # replaced cudf.Series
synapses_df["overlap_volume"] = pd.Series(np.zeros(len(synapses_df)), dtype='uint64') # replaced cudf.Series

In [8]:
from lib.gridder import Gridder

gridder = Gridder()

xyz = synapses_df[["x", "y", "z"]].to_numpy()

gridder.setPositions(xyz)
indices, df_grid_meta = gridder.computeGrid(grid_size)
synapses_df["overlap_volume"] = indices.astype("uint64")

In [9]:
df_grid_meta.to_csv(data_folder/f"{grid_size_descriptor}_meta.csv", index=False)

In [10]:
synapses_df.sample(frac=0.10).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_10pct.csv", index=False)

In [11]:
for neuron_id, mapped_id in mapping_dict.items():
    synapses_df.loc[synapses_df.pre_id == neuron_id, "pre_id_mapped"] = mapped_id
    synapses_df.loc[synapses_df.post_id == neuron_id, "post_id_mapped"] = mapped_id

In [12]:
synapses_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) | (synapses_df.post_id_mapped > -1)]
synapses_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_mapped-neurons.csv", index=False)
synapses_mapped_neurons

Unnamed: 0,x,y,z,pre_id,post_id,pre_celltype,post_celltype,post_compartment,pre_id_mapped,post_id_mapped,overlap_volume
0,442184,191008,68080,648518346349368558,648518346349532006,-1,1,2,-1,112,2186
1,277176,178544,8320,648518346341406765,648518346349537692,-1,1,2,-1,236,994
2,349024,279152,25440,648518346342407933,648518346349517783,-1,20,4,-1,29,1638
5,273144,263016,41520,648518346341367494,648518346349537814,-1,1,2,-1,244,841
7,471388,263340,57160,648518346342489852,648518346349532796,-1,1,2,-1,121,2410
...,...,...,...,...,...,...,...,...,...,...,...
3238701,266424,208592,84280,648518346346307512,648518346349538157,-1,1,2,-1,281,815
3238703,352608,170528,72600,648518346349368344,648518346349531362,-1,1,2,-1,102,1584
3238706,358136,223536,81920,648518346346298709,648518346349538278,-1,1,2,-1,297,1613
3238708,278072,240560,1800,648518346346883758,648518346349538387,-1,1,2,-1,311,1022


In [13]:
synapses_btw_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) & (synapses_df.post_id_mapped > -1)]
synapses_btw_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_between-mapped-neurons.csv", index=False)
synapses_btw_mapped_neurons

Unnamed: 0,x,y,z,pre_id,post_id,pre_celltype,post_celltype,post_compartment,pre_id_mapped,post_id_mapped,overlap_volume
159,329096,189928,38960,648518346349538298,648518346349538056,1,1,2,302,274,1387
305,267948,282944,69760,648518346349538791,648518346349536159,22,1,4,338,155,856
500,413880,202128,85160,648518346349528994,648518346349537255,22,1,4,82,198,1991
1184,220096,287048,58760,648518346349538462,648518346349538179,1,24,2,322,282,464
1224,428080,281152,15800,648518346349539333,648518346349530359,1,1,2,354,92,2226
...,...,...,...,...,...,...,...,...,...,...,...
3236348,412148,228624,31320,648518346349539215,648518346349532796,22,1,2,353,121,2003
3236450,241784,157376,40560,648518346349538179,648518346349533227,24,1,2,282,123,589
3236800,348312,262416,53760,648518346349539333,648518346349539846,1,22,2,354,421,1430
3237970,209920,208500,71880,648518346349538791,648518346349532086,22,1,4,338,116,422


In [14]:
selected_overlap_volumes = synapses_mapped_neurons.overlap_volume.unique()
selected_overlap_volumes.size

352

In [15]:
range_filter_enabled = False

if(range_filter_enabled):
    
    offset = 5000

    x_min = synapses_mapped_neurons.x.min() - offset
    y_min = synapses_mapped_neurons.y.min() - offset
    #z_min = synapses_mapped_neurons.z.min() - offset

    x_max = synapses_mapped_neurons.x.max() + offset
    y_max = synapses_mapped_neurons.y.max() + offset
    #z_max = synapses_mapped_neurons.z.max() + offset

    df_synapses_range = synapses_df[
        #(synapses_df.x >= x_min) & (synapses_df.x <= x_max) &
        #(synapses_df.y >= y_min) & (synapses_df.y <= y_max) &
        synapses_df.overlap_volume.isin(selected_overlap_volumes)
        #(synapses_df.z >= z_min) & (synapses_df.z <= z_max) 
    ]

    df_synapses_range.reset_index(drop=True)

else:

    df_synapses_range = synapses_df.copy()

In [16]:
df_synapses_range.to_csv(data_folder/f"synapses_{grid_size_descriptor}.csv", index=False) # replaced df_synapses_range.to_pandas().to_csv
df_synapses_range.sample(frac=0.10).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_10pct.csv", index=False)

In [17]:
df_synapses_range

Unnamed: 0,x,y,z,pre_id,post_id,pre_celltype,post_celltype,post_compartment,pre_id_mapped,post_id_mapped,overlap_volume
0,442184,191008,68080,648518346349368558,648518346349532006,-1,1,2,-1,112,2186
1,277176,178544,8320,648518346341406765,648518346349537692,-1,1,2,-1,236,994
2,349024,279152,25440,648518346342407933,648518346349517783,-1,20,4,-1,29,1638
3,219576,292024,62920,648518346343903120,648518346341862848,-1,-1,-1,-1,-1,478
4,268360,152968,60720,648518346342805166,648518346346304577,-1,-1,-1,-1,-1,786
...,...,...,...,...,...,...,...,...,...,...,...
3238710,335680,183552,27800,648518346349380560,648518346349538055,-1,-1,-1,-1,-1,1387
3238711,188712,255640,30560,648518346344323784,648518346344322355,-1,-1,-1,-1,-1,253
3238712,462984,156928,56280,648518346349430658,648518346342795643,-1,-1,-1,-1,-1,2354
3238713,201708,278808,18560,648518346342769846,648518346343461632,-1,-1,-1,-1,-1,462


In [18]:
df_synapses_aggregated = df_synapses_range.groupby(["overlap_volume", 
                                                    "pre_id_mapped", 
                                                    "post_id_mapped",
                                                    "pre_celltype", 
                                                    "post_celltype",
                                                    "post_compartment"]).size().reset_index(name="synapse_count")

df_synapses_aggregated.to_csv(data_folder/f"synapses_{grid_size_descriptor}_aggregated.csv", index=False) # replaced df_synapses_aggregated.to_pandas().to_csv
print(df_synapses_aggregated.synapse_count.sum())
df_synapses_aggregated

3238715


Unnamed: 0,overlap_volume,pre_id_mapped,post_id_mapped,pre_celltype,post_celltype,post_compartment,synapse_count
0,43,-1,-1,-1,-1,-1,2
1,44,-1,-1,-1,-1,-1,347
2,44,-1,44,-1,1,2,2
3,44,-1,130,-1,1,2,1
4,44,-1,268,-1,1,2,3
...,...,...,...,...,...,...,...
43799,2635,-1,294,-1,1,2,2
43800,2635,-1,373,-1,1,2,4
43801,2635,-1,376,-1,1,2,1
43802,2635,-1,442,-1,1,2,4
