# 1.0 Import immunolabelled neurons from Eckstein 

In [1]:
import pandas as pd 
gt=pd.read_csv('~/Documents/synapse_ntclassification/confirmedntclass.csv')

In [2]:
gt_hemi = gt[gt['dataset'] == 'hemibrain:v1.2.1'] 
gt_hemi

Unnamed: 0,id,known_nt,cell_type,dataset
1679,480918798,glutamate,LHPD3a4_c,hemibrain:v1.2.1
1680,915960391,dopamine,FB6H,hemibrain:v1.2.1
1681,759810119,serotonin,CSD,hemibrain:v1.2.1
1682,267214250,acetylcholine,pC1b,hemibrain:v1.2.1
1683,851459972,serotonin,CSD,hemibrain:v1.2.1
...,...,...,...,...
5598,882400424,acetylcholine,unknown,hemibrain:v1.2.1
5599,1041253084,acetylcholine,unknown,hemibrain:v1.2.1
5600,2095549849,acetylcholine,unknown,hemibrain:v1.2.1
5601,1131988003,acetylcholine,unknown,hemibrain:v1.2.1


# 2.0 Type List can be retrieved via

## 2.1 cell_type

In [10]:
df_nt = gt_hemi[gt_hemi['known_nt'] == 'serotonin']
type_list=df_nt['cell_type'].unique().tolist()
if 'unknown' in type_list:
    type_list.remove('unknown')
print(type_list)

['CSD', '5-HTPMPD01', 'ExR3', 'DPM', '5-HTPMPV01', '5-HTPMPV03', 'ER4d', '5-HTPLP01']


## 2.2 BodyID 

In [None]:
bodyID=df_nt['id'].tolist()

In [29]:
from neuprint import Client
from neuprint import fetch_neurons, NeuronCriteria as NC, SynapseCriteria as SC

TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlbWFpbCI6InN5bDU4QGNhbS5hYy51ayIsImxldmVsIjoibm9hdXRoIiwiaW1hZ2UtdXJsIjoiaHR0cHM6Ly9saDMuZ29vZ2xldXNlcmNvbnRlbnQuY29tL2EvQUNnOG9jSjlLWkpQYUlWWFBsbWFLMjJjVUU3dXRMNjZYbTlHVTA3bEd4QjFYS0N0TmN4NkRnPXM5Ni1jP3N6PTUwP3N6PTUwIiwiZXhwIjoxOTA0MzI3NjEwfQ.Q-_XE7u7VtRGzKNVMXDheClBXfeBwFiTDTFUI3Tr_rw" # <--- Paste your token here
           # (or define NEUPRINT_APPLICATION CREDENTIALS in your environment)

c = Client('neuprint.janelia.org', 'hemibrain:v1.2.1', TOKEN)

neuron_df, conn_df = fetch_neurons(bodyID)

In [30]:
import numpy as np 
type_list = neuron_df['type'].unique().tolist()
type_list = [x for x in type_list if x is not None and x is not np.nan]
print("List of unique types after removing 'None':", type_list)

List of unique types after removing 'None': ['5-HTPMPD01', 'DPM', '5-HTPMPV01', '5-HTPMPV03', 'CSD', 'ExR3', 'ER4d', '5-HTPLP01']


In [22]:
from neuprint import fetch_synapse_connections, SynapseCriteria


# Create a SynapseCriteria object with the specified confidence level
neuron_criteria = NC(type=type_list)
synapse_criteria = SC(rois=None, confidence=0.99)
neuprint=fetch_synapse_connections(neuron_criteria,None,synapse_criteria)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/193805 [00:00<?, ?it/s]

# 3.0 Function for sampling neuron and bbox formation 

In [18]:

def sample_top_neurons_by_synapse_count(df, num_neurons=10, samples_per_neuron=5, gt_neurotransmitter=None, seed=42):
    print(f"Number of unique bodyId_pre (neurons) in dataset: {df['bodyId_pre'].nunique()}")

    # Get the top bodyId_pre with the most rows (synapses)
    top_neurons = df['bodyId_pre'].value_counts().head(num_neurons).index.tolist()
    print(f"Top {len(top_neurons)} neurons selected by synapse count.")

    final_rows = []
    neuron_counter = 1

    for neuron_id in top_neurons:
        neuron_df = df[df['bodyId_pre'] == neuron_id]
        print(f"Neuron {neuron_id} has {len(neuron_df)} synapses.")

        if len(neuron_df) >= samples_per_neuron:
            sampled_rows = neuron_df.sample(n=samples_per_neuron, random_state=seed).copy()
            sampled_rows['neuron_class'] = f'neuron{neuron_counter}'
            sampled_rows['gt_neurotransmitter'] = gt_neurotransmitter
            final_rows.append(sampled_rows)
            neuron_counter += 1
        else:
            print(f"Neuron {neuron_id} skipped (only {len(neuron_df)} synapses available).")

    if not final_rows:
        print("No neurons were sampled successfully.")
        return None

    final_df = pd.concat(final_rows)
    expected_rows = (neuron_counter - 1) * samples_per_neuron

    if len(final_df) != expected_rows:
        print(f"Warning: Final dataframe has {len(final_df)} rows, expected {expected_rows}.")

    # Rearrange columns
    cols = ['gt_neurotransmitter', 'neuron_class'] + [col for col in final_df.columns if col not in ['gt_neurotransmitter', 'neuron_class']]
    final_df = final_df[cols]

    return final_df


In [26]:
sampled_df = sample_top_neurons_by_synapse_count(
    df=neuprint,
    num_neurons=20,
    samples_per_neuron=15,
    gt_neurotransmitter='serotonin'
)

if sampled_df is not None:
    display(sampled_df.head())
    sampled_df.to_csv("serotonin_neurons.csv", index=False)

Number of unique bodyId_pre (neurons) in dataset: 38
Top 20 neurons selected by synapse count.
Neuron 5813105172 has 1736 synapses.
Neuron 297230760 has 202 synapses.
Neuron 1036637638 has 198 synapses.
Neuron 669325882 has 186 synapses.
Neuron 791527493 has 156 synapses.
Neuron 425276848 has 126 synapses.
Neuron 297908801 has 125 synapses.
Neuron 919763043 has 119 synapses.
Neuron 1324365879 has 109 synapses.
Neuron 759810119 has 92 synapses.
Neuron 541697718 has 87 synapses.
Neuron 5813078603 has 60 synapses.
Neuron 851459972 has 57 synapses.
Neuron 1167632513 has 39 synapses.
Neuron 1261000309 has 36 synapses.
Neuron 1261056809 has 35 synapses.
Neuron 1167295916 has 34 synapses.
Neuron 1167295872 has 32 synapses.
Neuron 1167295856 has 32 synapses.
Neuron 1167891559 has 31 synapses.


Unnamed: 0,gt_neurotransmitter,neuron_class,bodyId_pre,bodyId_post,roi_pre,roi_post,x_pre,y_pre,z_pre,x_post,y_post,z_post,confidence_pre,confidence_post
3525,serotonin,neuron1,5813105172,425790257,gL(R),gL(R),18106,33674,17302,18105,33684,17310,0.994,0.999597
3637,serotonin,neuron1,5813105172,1515418525,b'L(R),b'L(R),21914,28523,19487,21902,28526,19499,0.991,0.992355
3620,serotonin,neuron1,5813105172,332340236,aL(R),aL(R),17499,27917,5847,17484,27922,5841,0.993,0.992191
3837,serotonin,neuron1,5813105172,1048854479,b'L(R),b'L(R),16061,32466,16639,16064,32476,16633,0.994,0.999913
3744,serotonin,neuron1,5813105172,1170400248,,b'L(R),23940,29929,18948,23924,29921,18954,0.994,0.998461


In [25]:
#bbox accoridng to centroid 
import pandas as pd

# Load your CSV file
centroid = pd.read_csv('~/Documents/synapse_ntclassification/serotonin_neurons.csv')

# Function to calculate the centroid, bounding box, and check if any coordinate is out of range
def calculate_centroid_and_check(row, bbox_expansion):
    # Calculate the centroids
    centroid_x = (row['x_pre'] + row['x_post']) / 2
    centroid_y = (row['y_pre'] + row['y_post']) / 2
    centroid_z = (row['z_pre'] + row['z_post']) / 2

    # Define the bounding box using the customizable expansion value
    bbox_xmin = centroid_x - bbox_expansion
    bbox_xmax = centroid_x + bbox_expansion
    bbox_ymin = centroid_y - bbox_expansion
    bbox_ymax = centroid_y + bbox_expansion
    bbox_zmin = centroid_z - bbox_expansion
    bbox_zmax = centroid_z + bbox_expansion

    # Check if any of the coordinates are outside the bounding box
    x_out_of_range = not (bbox_xmin <= row['x_pre'] <= bbox_xmax and bbox_xmin <= row['x_post'] <= bbox_xmax)
    y_out_of_range = not (bbox_ymin <= row['y_pre'] <= bbox_ymax and bbox_ymin <= row['y_post'] <= bbox_ymax)
    z_out_of_range = not (bbox_zmin <= row['z_pre'] <= bbox_zmax and bbox_zmin <= row['z_post'] <= bbox_zmax)

    # Combine the checks into a single 'any_out_of_range' flag
    any_out_of_range = x_out_of_range or y_out_of_range or z_out_of_range

    return pd.Series({
        'centroid_x': centroid_x,
        'centroid_y': centroid_y,
        'centroid_z': centroid_z,
        'bbox_xmin': bbox_xmin,
        'bbox_xmax': bbox_xmax,
        'bbox_ymin': bbox_ymin,
        'bbox_ymax': bbox_ymax,
        'bbox_zmin': bbox_zmin,
        'bbox_zmax': bbox_zmax,
        'any_out_of_range': any_out_of_range  # True if any coordinate is out of range
    })

# Set the customizable bounding box expansion value
bbox_expansion_value = 15 

# Apply the function to each row, passing the custom bbox expansion value
centroid[['centroid_x', 'centroid_y', 'centroid_z', 
          'bbox_xmin', 'bbox_xmax', 'bbox_ymin', 'bbox_ymax', 'bbox_zmin', 'bbox_zmax', 
          'any_out_of_range']] = centroid.apply(calculate_centroid_and_check, axis=1, bbox_expansion=bbox_expansion_value)

# Specify the columns you want to save
columns_to_save = [
    'neuron_class','gt_neurotransmitter', 'bodyId_pre', 'bodyId_post',
    'bbox_xmin', 'bbox_xmax', 'bbox_ymin', 'bbox_ymax', 'bbox_zmin', 'bbox_zmax',
    'x_pre', 'y_pre', 'z_pre', 'x_post', 'y_post', 'z_post', 
    'any_out_of_range'  # Single column to indicate out of range
]

# Filter rows where any coordinate is out of range
out_of_range_data = centroid[centroid['any_out_of_range']]

# If there are rows with out of range values, save them to a new CSV file (only the selected columns)
if not out_of_range_data.empty:
    out_of_range_data[columns_to_save].to_csv('out_of_range_octopamine.csv', index=False)
    print("Out of range data saved to 'out_of_range_neurons.csv'.")
else:
    print("No out of range data found.")

# Save the full DataFrame back to CSV with the selected columns
centroid[columns_to_save].to_csv('serotonin_bbox.csv', index=False)
print("Full data with centroids and bounding boxes saved to 'bbox.csv'.")

Out of range data saved to 'out_of_range_neurons.csv'.
Full data with centroids and bounding boxes saved to 'bbox.csv'.
