# Derive mito to mito distance dataframes by cellid

In [1]:
import pandas as pd
import numpy as np

In [2]:
# from https://github.com/shandran/layer23-volume/tree/main/mitochondria_analytics/data

mito = pd.read_csv('data/mito_above_soma.csv', index_col=[0])
mito

Unnamed: 0,mito_id,mito_vx,ctr_pos_x_vx,ctr_pos_y_vx,ctr_pos_z_vx,bbox_beg_x_vx,bbox_beg_y_vx,bbox_beg_z_vx,bbox_end_x_vx,bbox_end_y_vx,...,centroid_z,cell_type,cell_subtype,ctr_distance_to_soma,bbox_dir_x,bbox_dir_y,bbox_dir_z,bbox_distance,cellid_str,centroid_y-ctr_y
0,1783325,527936,75186,43844,1430,74824,43668,1323,75472,44120,...,24,pyramidal,pyramidal cell,4778.608166,-648,-452,-238,825.137564,648518346349523993,False
1,1780086,1597632,75086,43678,1032,74168,43342,675,76458,44298,...,24,pyramidal,pyramidal cell,4534.032201,-2290,-956,-687,2574.879609,648518346349523993,False
2,2026719,141924,79378,47892,742,79192,47734,738,79532,48034,...,24,pyramidal,pyramidal cell,10174.394134,-340,-300,-11,453.564769,648518346349523993,False
3,1540216,2780,69738,42838,21,69708,42816,21,69770,42862,...,24,pyramidal,pyramidal cell,1220.157777,-62,-46,-2,77.226938,648518346349523993,False
4,1547760,211468,70706,45080,1364,70248,44874,1328,71030,45284,...,24,pyramidal,pyramidal cell,3029.110100,-782,-410,-65,885.352472,648518346349523993,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210970,1649818,287444,72610,37220,2135,72356,37078,2095,72840,37450,...,2038,pyramidal,pyramidal cell,1314.834210,-484,-372,-62,613.582920,648518346349492130,False
210971,2397704,336092,84174,56050,1975,83712,55254,1936,84612,56830,...,2038,pyramidal,pyramidal cell,23022.738173,-900,-1576,-56,1815.740070,648518346349492130,False
210972,1667802,337984,72740,46280,1200,72594,45918,1153,72916,46642,...,2038,pyramidal,pyramidal cell,10363.949247,-322,-724,-95,798.050750,648518346349492130,False
210973,1654227,495816,72104,39630,1908,72006,38894,1826,72178,40170,...,2038,pyramidal,pyramidal cell,3774.941589,-172,-1276,-114,1292.577270,648518346349492130,False


In [3]:
# generate a df with pyr only
pyr = mito[mito['cell_type'] == 'pyramidal']
pyr

Unnamed: 0,mito_id,mito_vx,ctr_pos_x_vx,ctr_pos_y_vx,ctr_pos_z_vx,bbox_beg_x_vx,bbox_beg_y_vx,bbox_beg_z_vx,bbox_end_x_vx,bbox_end_y_vx,...,centroid_z,cell_type,cell_subtype,ctr_distance_to_soma,bbox_dir_x,bbox_dir_y,bbox_dir_z,bbox_distance,cellid_str,centroid_y-ctr_y
0,1783325,527936,75186,43844,1430,74824,43668,1323,75472,44120,...,24,pyramidal,pyramidal cell,4778.608166,-648,-452,-238,825.137564,648518346349523993,False
1,1780086,1597632,75086,43678,1032,74168,43342,675,76458,44298,...,24,pyramidal,pyramidal cell,4534.032201,-2290,-956,-687,2574.879609,648518346349523993,False
2,2026719,141924,79378,47892,742,79192,47734,738,79532,48034,...,24,pyramidal,pyramidal cell,10174.394134,-340,-300,-11,453.564769,648518346349523993,False
3,1540216,2780,69738,42838,21,69708,42816,21,69770,42862,...,24,pyramidal,pyramidal cell,1220.157777,-62,-46,-2,77.226938,648518346349523993,False
4,1547760,211468,70706,45080,1364,70248,44874,1328,71030,45284,...,24,pyramidal,pyramidal cell,3029.110100,-782,-410,-65,885.352472,648518346349523993,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210970,1649818,287444,72610,37220,2135,72356,37078,2095,72840,37450,...,2038,pyramidal,pyramidal cell,1314.834210,-484,-372,-62,613.582920,648518346349492130,False
210971,2397704,336092,84174,56050,1975,83712,55254,1936,84612,56830,...,2038,pyramidal,pyramidal cell,23022.738173,-900,-1576,-56,1815.740070,648518346349492130,False
210972,1667802,337984,72740,46280,1200,72594,45918,1153,72916,46642,...,2038,pyramidal,pyramidal cell,10363.949247,-322,-724,-95,798.050750,648518346349492130,False
210973,1654227,495816,72104,39630,1908,72006,38894,1826,72178,40170,...,2038,pyramidal,pyramidal cell,3774.941589,-172,-1276,-114,1292.577270,648518346349492130,False


In [4]:
pyr.cellid.value_counts()

648518346349538440    969
648518346349539853    945
648518346349539900    902
648518346349533482    901
648518346349538527    871
                     ... 
648518346349539506    100
648518346349522862    100
648518346349493472     48
648518346349517132     48
648518346349493874     40
Name: cellid, Length: 363, dtype: int64

# Generate 363 dfs; one for each cellid

In [5]:
pyrdf = pyr[['cellid', 'mito_id', 'ctr_pos_x_vx', 'ctr_pos_y_vx', 'ctr_pos_z_vx']]
pyrdf

Unnamed: 0,cellid,mito_id,ctr_pos_x_vx,ctr_pos_y_vx,ctr_pos_z_vx
0,648518346349523993,1783325,75186,43844,1430
1,648518346349523993,1780086,75086,43678,1032
2,648518346349523993,2026719,79378,47892,742
3,648518346349523993,1540216,69738,42838,21
4,648518346349523993,1547760,70706,45080,1364
...,...,...,...,...,...
210970,648518346349492130,1649818,72610,37220,2135
210971,648518346349492130,2397704,84174,56050,1975
210972,648518346349492130,1667802,72740,46280,1200
210973,648518346349492130,1654227,72104,39630,1908


In [6]:
# make a list of unique cellids (no repeats)
cellid_array = pyrdf.cellid.unique()
cellid_array

array([648518346349523993, 648518346349538711, 648518346349538102,
       648518346349538730, 648518346349532295, 648518346349537835,
       648518346349523981, 648518346349533350, 648518346349540057,
       648518346349539401, 648518346349538475, 648518346349538070,
       648518346349539510, 648518346349539961, 648518346349539884,
       648518346349537509, 648518346349538440, 648518346349537007,
       648518346349537489, 648518346349531230, 648518346349534794,
       648518346349532006, 648518346349539464, 648518346349539368,
       648518346349539414, 648518346349523667, 648518346349538387,
       648518346349537996, 648518346349540053, 648518346349538053,
       648518346349539100, 648518346349537718, 648518346349539076,
       648518346349538033, 648518346349539934, 648518346349539590,
       648518346349538278, 648518346349538188, 648518346349537075,
       648518346349539834, 648518346349538426, 648518346349538395,
       648518346349539865, 648518346349539575, 648518346349537

In [7]:
len(pyrdf.cellid.unique())

363

In [8]:
cellid_array[0]

648518346349523993

In [9]:
# Generate the dfs; there will be a SettingWithCopyWarning returned (ignore) 
# The i loop creates 363 dataframes, one for each pyr neuron/cellid
# The j loop calculates the mito distance magnitude vector, one column per mito in that neuron
# The last step in the j loop saves the df to /distance_dfs folder with name cellid_[cellid number].csv
# The loop returns to the next i value and repeats the process again for the next neuron/cellid
# The final result is 363 csv files in the distance_dfs folder (these range in size from 32 KB to 17 MB each)

for i in range(len(pyrdf.cellid.unique())):
    cellid_df = pyrdf[pyrdf['cellid'] == cellid_array[i]]
    for j in range(len(cellid_df)):
        var_x = 'mito_id_'+str(cellid_df.mito_id.iloc[j])+'_ctr_pos_x_vx'
        var_y = 'mito_id_'+str(cellid_df.mito_id.iloc[j])+'_ctr_pos_y_vx'
        var_z = 'mito_id_'+str(cellid_df.mito_id.iloc[j])+'_ctr_pos_z_vx'
        var_x_value = cellid_df.ctr_pos_x_vx.iloc[j]
        var_y_value = cellid_df.ctr_pos_y_vx.iloc[j]
        var_z_value = cellid_df.ctr_pos_z_vx.iloc[j]
        var_dist = 'mito_id_'+str(cellid_df.mito_id.iloc[j])+'_ctr_dist'
        cellid_df[var_dist] = np.sqrt((cellid_df.ctr_pos_x_vx - var_x_value)**2 + (cellid_df.ctr_pos_y_vx - var_y_value)**2 +(cellid_df.ctr_pos_z_vx - var_z_value)**2)
    cellid_df.to_csv('distance_dfs/'+'cellid_'+str(cellid_array[i])+'.csv')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
