In [6]:
import pandas as pd
import navis
import os
import trimesh
import rtree
from joblib import Parallel, delayed
from joblib import parallel_backend
from tqdm import tqdm


# Read 2N IDs from CSV
twoN_ids = pd.read_csv('../2N_ids.csv')

# Read previouslyfiltered synapses from CSV
syn_df = pd.read_csv('./input/filtered_1N_synapses.csv')

# Read ALPN IDs from CSV
alpn_ids = pd.read_csv('alpn_ids.csv')

# Subtract ALPN IDs from 2N IDs
other_ids = twoN_ids[~twoN_ids['root_id'].isin(alpn_ids['ids'])] # Non ALPN 2N ids


In [8]:
# Load all .ply mesh files
meshes = {}
glom_names = []
mesh_dir = './glom_meshes_proc'
for mesh_file in os.listdir(mesh_dir):
    if mesh_file.endswith('.ply'):
        glom_name = mesh_file.replace('.ply', '')
        glom_names.append(glom_name)
        meshes[glom_name] = navis.read_mesh(os.path.join(mesh_dir, mesh_file))

# Filter synapses to only include those where post_pt_root_id matches alpn_ids
other_syn_df = syn_df[syn_df['pre_pt_root_id'].isin(other_ids['root_id'])]
# Initialize new columns for each glomerulus with 0s
for glom in glom_names:
    other_syn_df.loc[:, glom] = 0

print(other_syn_df.columns)

Index(['id', 'pre_pt_root_id', 'post_pt_root_id', 'connection_score',
       'cleft_score', 'gaba', 'ach', 'glut', 'oct', 'ser',
       ...
       'VP2_L', 'VP2_R', 'VP3_L', 'VP3_R', 'VP4_L', 'VP4_R', 'VP5_L', 'VP5_R',
       'V_L', 'V_R'],
      dtype='object', length=134)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_syn_df.loc[:, glom] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_syn_df.loc[:, glom] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_syn_df.loc[:, glom] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

In [10]:
# Function to process a single row
def process_row(idx, row, meshes):
    result = {glom_name: 0 for glom_name in meshes.keys()}  # Initialize all glomerulus columns to 0
    try:
        point = row[['pre_pt_position_x', 'pre_pt_position_y', 'pre_pt_position_z']].values
        for glom_name, mesh in meshes.items():
            trimesh_mesh = mesh.trimesh
            if trimesh_mesh.contains([point])[0]:
                result[glom_name] = 1
    except IndexError:
        print(f"Index error occurred at idx {idx}")
    return idx, result

# Parallel processing with progress bar
with parallel_backend("loky", inner_max_num_threads=1):  # Use loky backend for better thread management
    results = Parallel(n_jobs=-1)(
        delayed(process_row)(idx, row, meshes)
        for idx, row in tqdm(other_syn_df.iterrows(), total=len(other_syn_df), desc="Processing rows")
    )

# Update the DataFrame
for idx, result in results:
    for glom_name, value in result.items():
        other_syn_df.loc[idx, glom_name] = value

Processing rows: 100%|██████████| 58972/58972 [1:46:40<00:00,  9.21it/s]


In [11]:
other_syn_df.to_csv('other2N_glomeruli_output.csv', index=False)

# Group by 'post_pt_root_id' and sum all other columns.
summed_df = other_syn_df.groupby('pre_pt_root_id').sum(numeric_only=True).reset_index()

# Display the resulting dataframe.
summed_df.head()
summed_df.to_csv('other2N_glomeruli_output_sum.csv', index=False)