In [None]:
from pathlib import Path
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import scipy
import random

## Define brain, find trace data folder

In [None]:
#specify brain1 or brain2 below
brain = "brain1"

root_dir = Path(os.path.abspath('')).parents[1]
experiment_dir = os.path.join(root_dir, "axon_geometry")
data_dir = os.path.join(experiment_dir, "data", brain)
segments_swc_dir = os.path.join(data_dir, "segments_swc")
trace_data_dir = os.path.join(data_dir, "trace_data")
trace_data_dir = os.path.join(trace_data_dir, "14")
print(f"Directory where swcs reside: {segments_swc_dir}")

## Read trace data

In [None]:
max_id = 300


def classify_height(row):
    height = row["height"]
    if height <= 2:
        return height
    else:
        return 3

def numerical_class(row):
    _class = row["class"]
    if _class == "axon":
        return 0
    if _class == "collateral":
        return 1
    if _class == "terminal":
        return 2

def collect_data(verbose=False):
    df = pd.DataFrame(columns=["seg_id", "class", "height", "log_seg_length", "measure", "value", "log_value"])
    for i in np.arange(0, max_id):
        i = int(i)
        trace_data_path = os.path.join(trace_data_dir, "{}.npy".format(i))
        if os.path.exists(trace_data_path) is True:
            trace_data = np.load(trace_data_path, allow_pickle=True)
            if verbose:
                print("Loaded segment {}".format(i))
            
            primary_idxs = []
            collateral_idxs = []
            terminal_idxs = []

            for node_num, node in enumerate(trace_data):
                _class = node["class"]
                if _class == 'axon':
                    primary_idxs.append(node_num)
                elif _class == 'collateral':
                    collateral_idxs.append(node_num)
                elif _class == 'terminal':
                    terminal_idxs.append(node_num)

            primary_idx = random.choice(primary_idxs)
            collateral_idx = random.choice(collateral_idxs)
            terminal_idx = random.choice(terminal_idxs)
            
            for node_num in [primary_idx, collateral_idx, terminal_idx]:
                node = trace_data[node_num]
                
                seg_length = node["seg_length"]
                height = node["height"]
                _class = node["class"]
                mean_curvature = node["mean_curvature"]
                mean_torsion = node["mean_torsion"]
                
                log_seg_length = np.log10(seg_length)

                log_mean_curvature = np.log10(mean_curvature)
                df = df.append({"seg_id": i, "height": height, "class": _class, "log_seg_length": log_seg_length, "measure": "curvature", "value": mean_curvature, "log_value": log_mean_curvature}, ignore_index=True)

                log_mean_torsion = np.log10(mean_torsion)
                df = df.append({"seg_id": i, "height": height, "class": _class, "log_seg_length": log_seg_length, "measure": "torsion", "value": mean_torsion, "log_value": log_mean_torsion}, ignore_index=True)

        
    df["class"] = df.apply(numerical_class, axis=1)
    df["height_class"] = df.apply(classify_height, axis=1)
    return df
df = collect_data(verbose=True)


# Density of `log` segment lengths as a function of spline class

In [None]:
sns.set_theme()
sns.set_context("paper")
sns.set(font_scale=1.2)

g = sns.FacetGrid(df, col="measure", hue="class", height=4)
g.map(sns.kdeplot, "log_seg_length", bw_adjust=1.8)
g.set_axis_labels(r"$\log$ segment length", "density")
g.add_legend(title="Segment class")
legend_labels = ["primary", "collateral", "terminal"]
for t, l in zip(g._legend.texts, legend_labels): t.set_text(l)

plt.subplots_adjust(top=0.85)
g.fig.suptitle("Brain 1" if brain=="brain1" else "Brain 2")

axes = g.axes.flatten()
axes[0].set_title("curvature")
axes[1].set_title("torsion")
g.savefig(os.path.join(experiment_dir, "figures",  f"{brain}_seg_length_class_density.eps"))
g.savefig(os.path.join(experiment_dir, "figures", f"{brain}_seg_length_class_density.jpg"))

# Density of curvature/torsion as a function of spline class

In [None]:
g = sns.FacetGrid(df, col="measure", hue="class", height=4)
g.map(sns.kdeplot, "value", bw_adjust=2)

g.set_axis_labels(r"value", "density")

g.add_legend(title="Segment class")
legend_labels = ["primary", "collateral", "terminal"]
for t, l in zip(g._legend.texts, legend_labels): t.set_text(l)
    
#g.set(xlim=[-3, 0], xticks=[-3, -2, -1, 0], yticks=[0, 1, 2, 3])
g.set(xlim=[0, .2], xticks=[0, .1, .2])

plt.subplots_adjust(top=0.85)
g.fig.suptitle("Brain 1" if brain=="brain1" else "Brain 2")

axes = g.axes.flatten()
axes[0].set_title("curvature")
axes[1].set_title("torsion")

g.savefig(os.path.join(experiment_dir, "figures",  f"{brain}_mean_value_class_density.eps"))
g.savefig(os.path.join(experiment_dir, "figures", f"{brain}_mean_value_class_density.jpg"))

# Evaluate mean and variance of curvature/torsion distributions based on spline class

In [None]:
measures = ["curvature", "torsion"]
class_dict = {0: "primary", 1: "collateral", 2: "terminal"}

for measure in measures:
    measure_df = df[df["measure"] == measure]
    for class_id in class_dict:
        class_name = class_dict[class_id]
        class_values = measure_df[measure_df["class"] == class_id]["value"]
        class_mean = class_values.mean()
        class_var = class_values.var()
        print(f"{class_name} {measure}, mu = {class_mean}, var = {class_var}")

# Scatter plots of `log` curvature/torsion against `log` segment length as a function of spline class

In [None]:
g = sns.FacetGrid(df, row="measure", col="class", hue="class", height=4)
g.map(sns.scatterplot, "log_seg_length", "log_value", marker='.', s=50)
g.set_axis_labels(r"$\log$ segment length", r"$\log$ mean value")
g.set(ylim=[-4, 1])
plt.subplots_adjust(top=0.9)
g.fig.suptitle("Brain 1" if brain=="brain1" else "Brain 2")

axes = g.axes
for i, measure in enumerate(["curvature", "torsion"]):
    for j, _class in enumerate(["primary", "collateral", "terminal"]):
        ax = axes[i, j]
        ax.set_title(f"{measure} | {_class}")
        
g.savefig(os.path.join(experiment_dir, "figures",  f"{brain}_mean_value_class_scatter.eps"))
g.savefig(os.path.join(experiment_dir, "figures", f"{brain}_mean_value_class_scatter.jpg"))

## Pairwise Wilcoxon Sum Ranked test and Kolgomorov-Smirnoff test

In [None]:
def run_tests(df, verbose=False):
    p_df = pd.DataFrame(columns=["measure", "first_sample", "second_sample", "wilcoxon_p", "ks_p"])

    test_name = "ks"
    test_f = scipy.stats.kstest

    measures = ["curvature", "torsion"]

    classes = {0: "primary", 1: "collateral", 2: "terminal"}
    n = len(classes)
    matrix_pairs = np.triu(np.ones((n, n)), k=1)
    (coord_pairs_x, coord_pairs_y) = np.where(matrix_pairs == 1)

    for measure in measures:
        for x, y in zip(coord_pairs_x, coord_pairs_y):
            first_sample_query = df.loc[(df['class'] == x) & (df['measure'] == measure)]
            second_sample_query = df.loc[(df['class'] == y) & (df['measure'] == measure)]
                
            first_sample = first_sample_query["value"].to_numpy()
            second_sample = second_sample_query["value"].to_numpy()
                
            test_column = f"{test_name}_p"
            _, test_p = test_f(first_sample, second_sample)
            if verbose:
                print(f"{test_name} test, measure = {measure}, {classes[x]} vs {classes[y]}, p_value = {test_p}")
                
            p_df = p_df.append({"measure": measure, "first_sample": x, "second_sample": y, test_column: test_p}, ignore_index=True)
    return p_df
p_df = run_tests(df, verbose=True)

In [None]:
from tqdm import tqdm
num_iters = 100
pvals = np.zeros((6,num_iters))

for i in tqdm(range(num_iters)):
    df = collect_data()
    p_df = run_tests(df)
    pvals[:,i] = p_df['ks_p'].to_numpy()
print(np.median(pvals, axis=1))