In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from morphomics.io.io import load_obj, save_obj
from morphomics.persistent_homology.ph_analysis import get_lengths
from kxa_analysis import plot_hist
from morphomics.view.view import neuron
from morphomics import pipeline



In [44]:
mf_path = 'results/vectorization/Morphomics.PID_v1.pi_lm'
mf = load_obj(mf_path)
mf.groupby(['Model', 'Sex']).size()

Model                      Sex
1xKXA+ADRENALECTOMY_4h     F      119
1xKXA+FKBP5KO_4h           F      153
                           M      136
1xKXA+SAFIT2_4h            F      839
                           M      912
1xKXA_4h                   F      231
                           M      120
1xSaline+ADRENALECTOMY_4h  F      135
1xSaline+FKBP5KO_4h        F      154
                           M      133
1xSaline+SAFIT2_4h         F      968
                           M      765
1xSaline_4h                F      131
                           M      118
dtype: int64

In [41]:
len(mf)

4914

In [45]:
import numpy as np
import pandas as pd

def find_fuzzy_duplicate_indices(df, column='Model', tolerance=0.1):
    n = len(df)
    duplicates_list = [[] for _ in range(n)]

    for i in range(n):
        arr_i = df.at[i, column]
        for j in range(n):
            if i != j:
                arr_j = df.at[j, column]
                
                # Check shape first
                if arr_i.shape == arr_j.shape:
                    # Check element-wise closeness
                    if np.allclose(arr_i, arr_j, rtol=0, atol=tolerance):
                        duplicates_list[i].append(j)

    return duplicates_list


In [46]:
mf['DuplicateIndices'] = find_fuzzy_duplicate_indices(mf, column='barcodes', tolerance=0.1)


In [47]:
def remove_higher_index_duplicates(df, duplicate_col='DuplicateIndices'):
    # Collect all indices to drop
    indices_to_drop = set()
    
    for i, duplicates in df[duplicate_col].items():
        for dup_idx in duplicates:
            if dup_idx > i:
                indices_to_drop.add(dup_idx)

    # Drop those indices and reset index
    df_cleaned = df.drop(index=indices_to_drop).reset_index(drop=True)
    return df_cleaned


In [48]:
mf_cleaned = remove_higher_index_duplicates(mf, duplicate_col='DuplicateIndices')


In [40]:
len(mf_cleaned)

3260

In [49]:
mf_path = 'results/vectorization/Morphomics.PID_v1.pi_lm'
save_obj(mf_cleaned, mf_path)

In [36]:
mf.iloc[0]

Layer                                                      All_Layers
Model                                                 1xKXA+SAFIT2_4h
Sex                                                                 M
Animal              BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...
file_name           Filament_001_Filaments1_Trace_0021_nl_correcte...
file_path           ../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_...
swc_array           [[1.0, 1.0, 45996.094, 20777.736, 15.529, 0.66...
cells               <morphomics.cells.neuron.neuron.Neuron object ...
barcodes            [[36.6312370300293, 36.374752044677734], [19.0...
trees               <morphomics.cells.tree.tree.Tree object at 0x7...
nb_children         [2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, ...
nb_trunks                                                         2.0
max_length_bar                                              65.343277
nb_bars                                                            67
HasDuplicate        

In [35]:
mf.iloc[56]

Layer                                                      All_Layers
Model                                                 1xKXA+SAFIT2_4h
Sex                                                                 M
Animal              BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...
file_name            Filament_003_LAYER23_Trace_0006_nl_corrected.swc
file_path           ../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_...
swc_array           [[1.0, 1.0, 45996.094, 20777.736, 15.529, 0.66...
cells               <morphomics.cells.neuron.neuron.Neuron object ...
barcodes            [[36.6312370300293, 36.374752044677734], [19.0...
trees               <morphomics.cells.tree.tree.Tree object at 0x7...
nb_children         [2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, ...
nb_trunks                                                         2.0
max_length_bar                                              65.343277
nb_bars                                                            67
HasDuplicate        

In [33]:
mf.iloc[:-100]

Unnamed: 0,Layer,Model,Sex,Animal,file_name,file_path,swc_array,cells,barcodes,trees,nb_children,nb_trunks,max_length_bar,nb_bars,HasDuplicate,DuplicateIndices
0,All_Layers,1xKXA+SAFIT2_4h,M,BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_001_Filaments1_Trace_0021_nl_correcte...,../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_...,"[[1.0, 1.0, 45996.094, 20777.736, 15.529, 0.66...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[36.6312370300293, 36.374752044677734], [19.0...",<morphomics.cells.tree.tree.Tree object at 0x7...,"[2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, ...",2.0,65.343277,67,True,[56]
1,All_Layers,1xKXA+SAFIT2_4h,M,BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_003_LAYER23_Trace_0004_nl_corrected.swc,../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_...,"[[1.0, 1.0, 45921.652, 20605.705, 16.435, 0.68...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[35.974395751953125, 35.436771392822266], [33...",<morphomics.cells.tree.tree.Tree object at 0x7...,"[5.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",5.0,40.031033,47,True,[90]
2,All_Layers,1xKXA+SAFIT2_4h,M,BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_001_Filaments1_Trace_0065_nl_correcte...,../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_...,"[[1.0, 1.0, 46280.547, 20725.084, 30.928, 1.17...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[18.82964324951172, 20.38500213623047], [19.7...",<morphomics.cells.tree.tree.Tree object at 0x7...,"[3.0, 1.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",3.0,45.321716,52,True,[]
3,All_Layers,1xKXA+SAFIT2_4h,M,BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_004_LAYER4_Trace_0028_nl_corrected.swc,../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_...,"[[1.0, 1.0, 46182.828, 20928.957, 0.208, 1.774...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[38.67806625366211, 40.02254867553711], [38.8...",<morphomics.cells.tree.tree.Tree object at 0x7...,"[2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",2.0,46.971172,32,True,[29]
4,All_Layers,1xKXA+SAFIT2_4h,M,BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_001_Filaments1_Trace_0064_nl_correcte...,../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_...,"[[1.0, 1.0, 46191.117, 20768.512, 24.688, 0.96...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[23.935894012451172, 21.586734771728516], [42...",<morphomics.cells.tree.tree.Tree object at 0x7...,"[4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, ...",4.0,45.954884,39,True,[47]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4809,All_Layers,1xSaline+SAFIT2_4h,F,BL6_F_SAL+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_001_Filaments1_Trace_0087_nl_correcte...,../data_v1/All_Layers/1xSaline+SAFIT2_4h/F/BL6...,"[[1.0, 1.0, 56106.168, 20810.094, 14.847, 1.27...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[11.65161418914795, 7.394983768463135], [5.99...",<morphomics.cells.tree.tree.Tree object at 0x7...,"[5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",5.0,43.533615,54,True,[4778]
4810,All_Layers,1xSaline+SAFIT2_4h,F,BL6_F_SAL+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_001_Filaments1_Trace_0033_nl_correcte...,../data_v1/All_Layers/1xSaline+SAFIT2_4h/F/BL6...,"[[1.0, 1.0, 56039.555, 20806.439, 14.771, 0.66...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[29.36760902404785, 29.878097534179688], [36....",<morphomics.cells.tree.tree.Tree object at 0x7...,"[3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",3.0,53.405918,82,True,[4805]
4811,All_Layers,1xSaline+SAFIT2_4h,F,BL6_F_SAL+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_004_LAYER4_Trace_0021_nl_corrected.swc,../data_v1/All_Layers/1xSaline+SAFIT2_4h/F/BL6...,"[[1.0, 1.0, 56292.574, 20860.063, 33.722, 0.66...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[36.619075775146484, 38.496986389160156], [37...",<morphomics.cells.tree.tree.Tree object at 0x7...,"[2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",2.0,41.076477,23,True,[4758]
4812,All_Layers,1xSaline+SAFIT2_4h,F,BL6_F_SAL+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI...,Filament_001_Filaments1_Trace_0018_nl_correcte...,../data_v1/All_Layers/1xSaline+SAFIT2_4h/F/BL6...,"[[1.0, 1.0, 56247.246, 20748.594, 6.253, 1.166...",<morphomics.cells.neuron.neuron.Neuron object ...,"[[49.06952667236328, 52.567562103271484], [52....",<morphomics.cells.tree.tree.Tree object at 0x7...,"[5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",5.0,54.060230,66,True,[4828]


In [None]:
mf.keys()

In [None]:
mf_path = 'results/vectorization/Morphomics.PID_v1.pi_lm'
mf = load_obj(mf_path)
print(len(mf))
mf.groupby(['Model', 'Sex']).size()

In [None]:
len(mf)

In [None]:
my_pip = pipeline.Pipeline(Parameters_ID='lm_test', morphoframe={'lm_test':mf}, 
                           parameters={'Morphometrics':{"concatenate" : True,
                                                        "morphoframe_name":"lm_test"}})

In [None]:
my_pip.Morphometrics()

In [None]:
mf_clean = mf.drop(mf.loc[mf['file_path'] == '../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI_V1_3/Filament_001_Filaments1_Trace_0023_nl_corrected.swc'].index)


In [None]:
mf.keys()

In [None]:
mf.groupby(['Model', 'Sex']).size()

Check conditions

In [None]:
unique_combinations = mf[['Layer', 'Model', 'Sex']].drop_duplicates()
print(unique_combinations)


Transform cells into trees

In [None]:
mf['trees'] = mf['cells'].apply(lambda cell: cell.combine_neurites().neurites[0])

Number of trunks

In [None]:
mf['nb_children'] = mf['trees'].apply(lambda tree: tree.get_node_children_number())
mf['nb_trunks'] = mf['nb_children'].apply(lambda nb_children: nb_children[0])

In [None]:
plot_hist(mf, 'nb_trunks', is_log=True)

In [None]:
extreme_df = mf[mf['nb_trunks'] > 9]
for i, row in extreme_df.iterrows():
    neuron(row['cells'])

Longest Bar

In [None]:
mf['max_length_bar'] = mf['barcodes'].apply(lambda barcode: max(get_lengths(barcode)))

In [None]:
plot_hist(mf, 'max_length_bar')

In [None]:
extreme_df = mf[mf['max_length_bar'] >90]
for i, row in extreme_df.iterrows():
    neuron(row['cells'])
    print(row['Animal'])

Number of Bars

In [None]:
plot_hist(mf, 'nb_bars', is_log=True)

In [None]:
extreme_df = mf[mf['nb_bars'] > 240]
for i, row in extreme_df.iterrows():
    neuron(row['cells'])
    print(row['Animal'])

Duplicates

In [None]:
# First, apply the logic to find duplicates and create the 'duplicate' column
mf['duplicate'] = mf['trees'].apply(lambda tree: any(tree.is_equal(other_tree) for other_tree in mf['trees'] if other_tree is not tree))

# Filter the dataframe to return rows where 'duplicate' is True
duplicates_df = mf[mf['duplicate'] == True]

In [None]:
# List to store groups of indices for identical trees
groups = []
# Set to track rows that have already been grouped
visited = set()

# Iterate over the duplicate rows
for idx_i, row_i in duplicates_df.iterrows():
    if idx_i in visited:
        continue
    # Start a new group with the current row
    group_indices = [idx_i]
    # Compare current tree with all other trees in duplicates_df
    for idx_j, row_j in duplicates_df.iterrows():
        if idx_j in visited or idx_j == idx_i:
            continue
        if row_i['trees'].is_equal(row_j['trees']):
            group_indices.append(idx_j)
            visited.add(idx_j)
    # Mark the current row as visited
    visited.add(idx_i)
    groups.append(group_indices)

# Now, create a list of dataframes, one for each group
group_dfs = [duplicates_df.loc[indices] for indices in groups]

In [None]:
len(group_dfs)