In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from morphomics.io.io import load_obj, save_obj
from morphomics.persistent_homology.ph_analysis import get_lengths
from kxa_analysis import plot_hist
from morphomics.view.view import neuron
from morphomics import pipeline

In [None]:
mf_path = 'results/vectorization/Morphomics.PID_v1.pi_lm'
mf = load_obj(mf_path)
mf.groupby(['Model', 'Sex']).size()

In [None]:
len(mf)

In [None]:
import numpy as np
import pandas as pd

def find_fuzzy_duplicate_indices(df, column='Model', tolerance=0.1):
    n = len(df)
    duplicates_list = [[] for _ in range(n)]

    for i in range(n):
        arr_i = df.at[i, column]
        for j in range(n):
            if i != j:
                arr_j = df.at[j, column]
                
                # Check shape first
                if arr_i.shape == arr_j.shape:
                    # Check element-wise closeness
                    if np.allclose(arr_i, arr_j, rtol=0, atol=tolerance):
                        duplicates_list[i].append(j)

    return duplicates_list


In [None]:
mf['DuplicateIndices'] = find_fuzzy_duplicate_indices(mf, column='barcodes', tolerance=0.1)


In [None]:
def remove_higher_index_duplicates(df, duplicate_col='DuplicateIndices'):
    # Collect all indices to drop
    indices_to_drop = set()
    
    for i, duplicates in df[duplicate_col].items():
        for dup_idx in duplicates:
            if dup_idx > i:
                indices_to_drop.add(dup_idx)

    # Drop those indices and reset index
    df_cleaned = df.drop(index=indices_to_drop).reset_index(drop=True)
    return df_cleaned


In [None]:
mf_cleaned = remove_higher_index_duplicates(mf, duplicate_col='DuplicateIndices')


In [None]:
len(mf_cleaned)

In [None]:
mf_path = 'results/vectorization/Morphomics.PID_v1.pi_lm'
save_obj(mf_cleaned, mf_path)

In [None]:
mf.iloc[0]

In [None]:
mf.iloc[56]

In [None]:
mf.iloc[:-100]

In [None]:
mf.keys()

In [None]:
mf_path = 'results/vectorization/Morphomics.PID_v1.pi_lm'
mf = load_obj(mf_path)
print(len(mf))
mf.groupby(['Model', 'Sex']).size()

In [None]:
len(mf)

In [None]:
my_pip = pipeline.Pipeline(Parameters_ID='lm_test', morphoframe={'lm_test':mf}, 
                           parameters={'Morphometrics':{"concatenate" : True,
                                                        "morphoframe_name":"lm_test"}})

In [None]:
my_pip.Morphometrics()

In [None]:
mf_clean = mf.drop(mf.loc[mf['file_path'] == '../data_v1/All_Layers/1xKXA+SAFIT2_4h/M/BL6_M_KXA+SAFIT2_VGLUT2488_IBA568_CD68647_DAPI_V1_3/Filament_001_Filaments1_Trace_0023_nl_corrected.swc'].index)


In [None]:
mf.keys()

In [None]:
mf.groupby(['Model', 'Sex']).size()

Check conditions

In [None]:
unique_combinations = mf[['Layer', 'Model', 'Sex']].drop_duplicates()
print(unique_combinations)


Transform cells into trees

In [None]:
mf['trees'] = mf['cells'].apply(lambda cell: cell.combine_neurites().neurites[0])

Number of trunks

In [None]:
mf['nb_children'] = mf['trees'].apply(lambda tree: tree.get_node_children_number())
mf['nb_trunks'] = mf['nb_children'].apply(lambda nb_children: nb_children[0])

In [None]:
plot_hist(mf, 'nb_trunks', is_log=True)

In [None]:
extreme_df = mf[mf['nb_trunks'] > 9]
for i, row in extreme_df.iterrows():
    neuron(row['cells'])

Longest Bar

In [None]:
mf['max_length_bar'] = mf['barcodes'].apply(lambda barcode: max(get_lengths(barcode)))

In [None]:
plot_hist(mf, 'max_length_bar')

In [None]:
extreme_df = mf[mf['max_length_bar'] >90]
for i, row in extreme_df.iterrows():
    neuron(row['cells'])
    print(row['Animal'])

Number of Bars

In [None]:
plot_hist(mf, 'nb_bars', is_log=True)

In [None]:
extreme_df = mf[mf['nb_bars'] > 240]
for i, row in extreme_df.iterrows():
    neuron(row['cells'])
    print(row['Animal'])

Duplicates

In [None]:
# First, apply the logic to find duplicates and create the 'duplicate' column
mf['duplicate'] = mf['trees'].apply(lambda tree: any(tree.is_equal(other_tree) for other_tree in mf['trees'] if other_tree is not tree))

# Filter the dataframe to return rows where 'duplicate' is True
duplicates_df = mf[mf['duplicate'] == True]

In [None]:
# List to store groups of indices for identical trees
groups = []
# Set to track rows that have already been grouped
visited = set()

# Iterate over the duplicate rows
for idx_i, row_i in duplicates_df.iterrows():
    if idx_i in visited:
        continue
    # Start a new group with the current row
    group_indices = [idx_i]
    # Compare current tree with all other trees in duplicates_df
    for idx_j, row_j in duplicates_df.iterrows():
        if idx_j in visited or idx_j == idx_i:
            continue
        if row_i['trees'].is_equal(row_j['trees']):
            group_indices.append(idx_j)
            visited.add(idx_j)
    # Mark the current row as visited
    visited.add(idx_i)
    groups.append(group_indices)

# Now, create a list of dataframes, one for each group
group_dfs = [duplicates_df.loc[indices] for indices in groups]

In [None]:
len(group_dfs)