In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
synapses = pd.read_csv('data/raw/all_synapses.csv', index_col=[0])
cells = pd.read_csv('data/cells_split_xyz_no_duplicates.csv', index_col=[0])

In [3]:
# Step 1: Merge the dataframes
merged_df = pd.merge(synapses, cells, how='left', left_on='pre_pt_root_id', right_on='pt_root_id', suffixes=('', ''))
merged_df = pd.merge(merged_df, cells, how='left', left_on='post_pt_root_id', right_on='pt_root_id', suffixes=('_pre', '_post'))

# Step 2: Calculate the displacement
col_map = dict(zip(['pt_x_post', 'pt_y_post', 'pt_z_post'], 
                   ['pt_x_pre', 'pt_y_pre', 'pt_z_pre']))

diff_df = merged_df[['pt_x_pre', 'pt_y_pre', 'pt_z_pre']].subtract(
          merged_df[['pt_x_post', 'pt_y_post', 'pt_z_post']].rename(columns=col_map))

merged_df[['x_diff', 'y_diff', 'z_diff']] = diff_df

# Step 3: Access the displacement values
displacements = merged_df[['x_diff', 'y_diff', 'z_diff']].to_numpy()

# Step 4: Remove redundant column labels 'pt_root_id_pre' and 'pt_root_id_post'
merged_df = merged_df.drop(columns=['pt_root_id_pre', 'pt_root_id_post'])

# Step 5: Rescale the displacement values from 4x4x40 nm to 1x1x1 um
merged_df[['x_diff', 'y_diff']] *= 4 / 1000
merged_df['z_diff'] *= 40 / 1000

# Step 6: Save the merged dataframe
# merged_df.to_csv('data/combined_synapse_data_rescaled.csv')

In [4]:
print(merged_df.shape)
print(synapses.shape)
display(merged_df.head())

(6822344, 15)
(6822344, 4)


Unnamed: 0,pre_pt_root_id,post_pt_root_id,size,ctr_pt_position,cell_type_pre,pt_x_pre,pt_y_pre,pt_z_pre,cell_type_post,pt_x_post,pt_y_post,pt_z_post,x_diff,y_diff,z_diff
0,864691135564683351,864691136811959667,960,[159402 92838 18005],23P,139312,142640,18313,23P,197936,120992,19511,-234.496,86.592,-47.92
1,864691135614119115,864691135508912649,7576,[201344 122094 21590],23P,193680,126240,22392,23P,201984,114896,21757,-33.216,45.376,25.4
2,864691135683937719,864691135272192017,22576,[244156 202052 26596],6P-CT,227408,221344,21511,23P,202720,113200,22602,98.752,432.576,-43.64
3,864691135113147801,864691136022555784,684,[214582 129162 19397],23P,220768,112864,20446,23P,212880,120960,18999,31.552,-32.384,57.88
4,864691135194393642,864691135341362885,23172,[197350 119510 17275],23P,195312,112496,17422,23P,199680,116288,18964,-17.472,-15.168,-61.68


In [78]:
print(synapses.shape)
test_df = pd.merge(synapses, cells, how='left', left_on='pre_pt_root_id', right_on='pt_root_id', suffixes=('', ''))
print(test_df.shape)
test_df1 = pd.merge(test_df, cells, how='left', left_on='post_pt_root_id', right_on='pt_root_id', suffixes=('_pre', '_post'))
print(test_df1.shape)

(10000, 4)
(10000, 9)
(10000, 14)


In [79]:
print(test_df.columns)

Index(['pre_pt_root_id', 'post_pt_root_id', 'size', 'ctr_pt_position',
       'cell_type', 'pt_root_id', 'pt_x', 'pt_y', 'pt_z'],
      dtype='object')


In [80]:
test_df.drop(['pt_root_id'], axis=1, inplace=True)
test_df1.drop(['pt_root_id_post', 'pt_root_id_pre'], axis=1, inplace=True)

In [73]:
display(synapses)

Unnamed: 0,pre_pt_root_id,post_pt_root_id,size,ctr_pt_position
0,864691135564683351,864691136811959667,960,[159402 92838 18005]
1,864691135614119115,864691135508912649,7576,[201344 122094 21590]
2,864691135683937719,864691135272192017,22576,[244156 202052 26596]
3,864691135113147801,864691136022555784,684,[214582 129162 19397]
4,864691135194393642,864691135341362885,23172,[197350 119510 17275]
5,864691136272938174,864691135683554546,3660,[189156 110232 17765]
6,864691135858682472,864691135925929998,12096,[244034 201844 23242]
7,864691135155554404,864691135155554404,864,[202384 112270 18044]
8,864691135865001948,864691136050998515,13756,[197250 128596 19046]
9,864691135837192851,864691137197197121,25272,[224516 197026 25563]


In [74]:
display(test_df1)

Unnamed: 0,pre_pt_root_id,post_pt_root_id,size,ctr_pt_position,cell_type_pre,pt_x_pre,pt_y_pre,pt_z_pre,cell_type_post,pt_x_post,pt_y_post,pt_z_post
0,864691135564683351,864691136811959667,960,[159402 92838 18005],23P,139312,142640,18313,23P,197936,120992,19511
1,864691135614119115,864691135508912649,7576,[201344 122094 21590],23P,193680,126240,22392,23P,201984,114896,21757
2,864691135683937719,864691135272192017,22576,[244156 202052 26596],6P-CT,227408,221344,21511,23P,202720,113200,22602
3,864691135113147801,864691136022555784,684,[214582 129162 19397],23P,220768,112864,20446,23P,212880,120960,18999
4,864691135194393642,864691135341362885,23172,[197350 119510 17275],23P,195312,112496,17422,23P,199680,116288,18964
5,864691136272938174,864691135683554546,3660,[189156 110232 17765],23P,190592,118448,19342,23P,205088,111696,17977
6,864691135858682472,864691135925929998,12096,[244034 201844 23242],6P-IT,203824,222816,26742,5P-IT,227952,207568,21998
7,864691135155554404,864691135155554404,864,[202384 112270 18044],23P,202192,113280,18032,23P,202192,113280,18032
8,864691135865001948,864691136050998515,13756,[197250 128596 19046],23P,193776,122976,18938,23P,202848,122080,20297
9,864691135837192851,864691137197197121,25272,[224516 197026 25563],4P,219312,147296,25809,5P-ET,226736,203776,22834


In [75]:
# Display the cells that are in synapses
display(cells[cells['pt_root_id'].isin(synapses['pre_pt_root_id'])])

Unnamed: 0,cell_type,pt_root_id,pt_x,pt_y,pt_z
5771,23P,864691135564683351,139312,142640,18313
20026,23P,864691135194393642,195312,112496,17422
20205,23P,864691136272938174,190592,118448,19342
20241,23P,864691135865001948,193776,122976,18938
20292,23P,864691135614119115,193680,126240,22392
23667,23P,864691135155554404,202192,113280,18032
24084,6P-CT,864691135683937719,227408,221344,21511
26255,6P-IT,864691135858682472,203824,222816,26742
27565,23P,864691135113147801,220768,112864,20446
28433,4P,864691135837192851,219312,147296,25809


In [76]:
display(cells[cells['pt_root_id'].isin(synapses['post_pt_root_id'])])

Unnamed: 0,cell_type,pt_root_id,pt_x,pt_y,pt_z
23667,23P,864691135155554404,202192,113280,18032
23687,23P,864691136811959667,197936,120992,19511
23706,23P,864691136022555784,212880,120960,18999
23722,23P,864691135683554546,205088,111696,17977
23753,23P,864691136050998515,202848,122080,20297
23797,23P,864691135272192017,202720,113200,22602
23833,23P,864691135341362885,199680,116288,18964
23854,23P,864691135508912649,201984,114896,21757
23857,5P-ET,864691137197197121,226736,203776,22834
23891,5P-IT,864691135925929998,227952,207568,21998


In [44]:
# NOTE: Really outdated and slow code

# Use synapse and cell data to calculate displacement between the cell bodies of connected cells
displacements = []
total_synapses = synapses.shape[0]

test_df = synapses[:10]


pre_cells = synapses.isin(cells['pt_root_id'].to_numpy())


for i,synapse in tqdm(enumerate(synapses[['pre_pt_root_id', 'post_pt_root_id']].to_numpy()[:10])):
    pre_cell_id = synapse[0]
    post_cell_id = synapse[1]
    
    pre_cell_xyz = cells[cells['pt_root_id'] == pre_cell_id]['pt_position'].to_numpy()[0]
    pre_cell_xyz = np.fromstring(pre_cell_xyz[1:-1], dtype=int, sep=' ')

    post_cell_xyz = cells[cells['pt_root_id'] == post_cell_id]['pt_position'].to_numpy()[0]
    post_cell_xyz = np.fromstring(post_cell_xyz[1:-1], dtype=int, sep=' ')

    displacement = pre_cell_xyz - post_cell_xyz
    displacements.append(displacement)

displacements = np.array(displacements)
res_transform = np.diag([4 / 1000, 4 / 1000, 40 / 1000])  # 4x4x40 nm^3 to um^3

displacements = np.matmul(displacements, res_transform)

test_df['post_cell_type'] = cells[cells['pt_root_id'] == test_df['post_pt_root_id'].tolist()]['cell_type'].to_numpy()

test_df[['root_diff_x', 'root_diff_y', 'root_diff_z']] = displacements
test_df['dist_magnitude'] = np.linalg.norm(displacements, axis=1)

test_df.to_csv('data/test.csv')

10it [00:00, 199.14it/s]


ValueError: ('Lengths must match to compare', (60296,), (10,))