In [None]:
import pickle
import itertools as it
import numpy as np
import pandas as pd
import networkx as nx
import os
import subprocess as sp
from pathlib import Path
import copy
import re
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import more_itertools as itx

In [None]:
categories = [
    0,
    1,
    1,
    1,
    1,
    1,
    3,
    2,
    2,
    3,
    1,
    1,
    2,
    2,
    2,
    3,
    2,
    2,
    2,
    2,
    2,
    3,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    3,
    3,
    2,
    2,
    1,
    1,
    2,
    2,
    2,
    1,
    1,
    2,
    1,
    2,
    2,
    1,
    2,
    2,
    1,
    2,
    2,
    2,
    1,
    1,
    1,
    1,
    2,
    2,
    2,
    2,
    1,
    1,
    1,
    1,
    1,
    1,
    2,
    1,
    1,
    2,
    3,
    2,
    2,
    2,
    2,
    2,
    1,
    2,
    2,
    3,
    2,
    2,
    2,
    2,
    1,
    2,
    2,
    1,
    2,
    4,
    1,
    1,
    2,
    2,
    2,
    4,
    2,
    4,
    2,
    4,
    2,
    2,
    2,
    2,
    2,
    2,
    4,
    4,
    4,
    4,
    4,
    2,
    4,
    2,
    4,
    2,
    2,
    4,
    4,
    2,
    3,
    3,
    2,
    2,
    2,
    2,
    4,
    3,
    3,
    3,
    3,
    4,
    1,
    3,
    4,
    2,
    0,
    4,
    1,
    1,
    3,
    1,
    1,
    2,
    3,
    2,
    4,
    4,
    4,
    2,
    1,
    0,
    1,
    1,
    2,
    3,
]

In [None]:
cats = {
    1: "HC",
    2: "FEP",
    3: "Treatment 3+ yr",
    4: "High risk"
}

In [None]:
from bids import BIDSLayout

# layout = BIDSLayout("results/prepdwi_recon", validate=False)
layout = BIDSLayout("../..", validate=False, database_path="../../.pybids")

In [None]:
import fury.io as fio
import fury.utils as futil
import fury.lib as flib
import nibabel as nib
def node_sizes(path):
    pld = fio.load_polydata(path)
    cluster_id = flib.numpy_support.vtk_to_numpy(pld.GetCellData().GetArray("parcel_idx"))
    counts = defaultdict(lambda: 0)
    for i in cluster_id:
        counts[i] += 1
    return counts

def node_sizes_relative(path):
    pld = fio.load_polydata(path)
    cluster_id = flib.numpy_support.vtk_to_numpy(pld.GetCellData().GetArray("parcel_idx"))
    counts = [0] * (max(cluster_id)+1)
    for i in cluster_id:
        counts[i] += 1
    spread = max(counts) - min(counts)
    for i in range(len(counts)):
        counts[i] = counts[i]/spread
    return counts


def degree_sizes_relative(degrees):
    spread = max(degrees.values()) - min(degrees.values())
    for i in range(len(degrees)):
        degrees[i] = degrees[i]/spread
    return degrees

def mesh_size(path):
    if Path(path).suffix in [".vtk", ".vtp"]:
        pld = fio.load_polydata(path)
        return len(futil.get_polydata_triangles(pld))
    pld = nib.load(path)
    return len(pld.agg_data('triangle'))

def node_size_at_points(df: pd.DataFrame, points, column):
    df = df.sort_values(column).reset_index(drop=True)
    num_rows = len(df[column])
    return pd.DataFrame({
        column: [df[column][int((num_rows-1) * point)] for point in points],
        "point": points
    })


In [None]:
def betweenness(b_vals, threshold=.5):
    vals = b_vals.values()
    b_range = max(vals) - min(vals)
    margin = b_range*threshold + min(vals)
    above = dict(filter(lambda val: val[1]>=margin, b_vals.items()))
    # return above
    # b = dict(zip(it.count(), sorted(b_vals)))
    df = pd.DataFrame({"betweenness": above})
    return df

In [None]:
with open("resources/tract-assignments/hemispheric", 'r') as f:
    data = f.readlines()

all_bundles = [re.search(r"^cluster_(\d+)\.vtp$", s)[1] for s in data]

### Utility Fuctions

In [None]:
from typing import Any
def filter_logile(matrix, bin: int, num_bins: int = 10):
    if bin >= num_bins:
        raise ValueError("bin must be less then num_bins")
    masked = np.ma.masked_equal(matrix, 0)
    log = np.ma.log10(masked)
    threshold = ((log.max() - log.min()) * bin / num_bins) + log.min()
    cp = copy.deepcopy(matrix)
    cp[log <= threshold] = 0
    return cp

def hex_to_rgb(hex_color: str) -> tuple:
    hex_color = hex_color.lstrip("#")
    if len(hex_color) == 3:
        hex_color = hex_color * 2
    return int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)

def get_lut(path):
    with open(path) as f:
        lines = [line.strip().split() for line in f.readlines()]
    return {int(key): val for key, val in zip(*list(zip(*lines))[0:2])}

def lut_label(data, path):
    lut = get_lut(path)
    return pd.DataFrame(data).rename(index=lut, columns=lut)



import plotly.express as px
import plotly.graph_objs as go
def distribution_plot(df, x, y: str):

    std_col = y+"_std"
    grouper = df.groupby(["category", x])
    ddf = pd.concat([
        grouper.mean(),
        grouper.std().rename({y: std_col}, axis="columns"),
    ], axis=1).reset_index().set_index("category")
    fig = px.line(
        ddf,
        x=x,
        y=y,
        color=ddf.index,
        width=800,
        height=600,
        labels={
            "x": "Nodes sorted by increasing degree",
            "node_size": "Node Size (# triangles)",
            "betweenness": "Betweenness",
            "degree": "Degree"
        },
        title=f"{y.capitalize().replace('_', ' ')} distribution",
    )
    buttons = []
    num_traces = len(ddf.index.unique())
    for i, cat in enumerate(ddf.index.unique()):
        fig.add_traces([
            go.Scatter(
                x=ddf.loc[cat, x],
                y=ddf.loc[cat, std_col]+ddf.loc[cat,y],
                mode="lines",
                line=dict(width=0),
            ),
            go.Scatter(
                x=ddf.loc[cat, x],
                y=ddf.loc[cat,y]-ddf.loc[cat, std_col],
                mode="lines",
                line=dict(width=0),
                fill='tonexty',
                fillcolor=f'rgba{(*hex_to_rgb(px.colors.qualitative.Plotly[i]), 0.2)}'
            )
        ])
        buttons.append({
            "method": 'restyle',
            "visible": True,
            "label": cat,
            "args": [{
                "visible": False
            }, [i, num_traces + i*2, num_traces + i*2 + 1]],
            "args2": [{
                "visible": True,
            }, [i, num_traces + i*2, num_traces + i*2 + 1]]
        })
    fig.update_layout(
        margin=dict(l=50, r=50, t=50, b=50),
        showlegend=False,
        updatemenus=[
            dict(
                type="buttons",
                direction="right",
                x=1,
                y=-0.2,
                showactive=True,
                buttons=buttons,
            )
        ]
    )
    return fig

def figures_to_html(figs, filename="dashboard.html"):
    with open(filename, 'w') as dashboard:
        dashboard.write("<html><head></head><body>" + "\n")
        for fig in figs:
            inner_html = fig.to_html().split('<body>')[1].split('</body>')[0]
            dashboard.write(inner_html)
        dashboard.write("</body></html>" + "\n")

def nb_cache(name, root=".", reset_cache=False):
    import inspect
    cache_file = Path(root, ".ipynb_cache", name).with_suffix(".pyc")
    def do_cache(func, *args, **kwargs):
        if cache_file.exists() and not reset_cache:
            with cache_file.open('rb') as f:
                return pickle.load(f)
        result = func(*args, **kwargs)
        with cache_file.open('wb') as f:
            pickle.dump(result, f)
        return result
    def wrapper(func) -> Any:
        def inner(*args, **kwargs):
            return do_cache(func, *args, **kwargs)
        return inner
    return wrapper

### Single Subject

Here, test a single subject and view the connection density as a histogram

In [None]:
c = np.genfromtxt(layout.get(subject="034", datatype="dwi", suffix="connectome")[0].path, delimiter=",")

In [None]:
filtered = filter_logile(c, 1)
np.fill_diagonal(filtered, 0)
G = nx.from_numpy_matrix(filtered)
import math
for edge in G.edges:
    G.edges[edge]["distance"] = -math.log10(G.edges[edge]["weight"])

In [None]:
sigma = nx.sigma(G)
sigma

In [None]:
lut = get_lut("resources/BN_Atlas_freesurfer/BN_Atlas_210_LUT.txt")
df = pd.DataFrame(c).rename(index=lut, columns=lut)
plt.figure(figsize=(20,20))
sns.heatmap(df, cmap="coolwarm", square=True)

In [None]:
# Weight distribution plot
bins = np.arange(np.sqrt(len(np.concatenate(filtered))))
bins = (bins - np.min(bins))/np.ptp(bins)
fig, axes = plt.subplots(1,2, figsize=(15,5))

# Distribution of raw weights
rawdist = sns.histplot(filtered.flatten(), bins=bins, kde=False, ax=axes[0])
rawdist.set(xlabel='Correlation Values', ylabel = 'Density Frequency')

# Probability density of log10
log10dist = sns.histplot(np.log10(filtered).flatten(), kde=False, ax=axes[1])
log10dist.set(xlabel='log(weights)')

### Multiple Subjects

Loop through all subjects and gather various metrics

In [None]:
def subject_graphs(filter_level = 1, drop = []):
    for bidsfile in layout.get(datatype="dwi", suffix="connectome"):
        sub = int(bidsfile.entities['subject'])
        if len(categories) <= sub:
            print(f"Dropped {sub} (out of range)")
            continue
        cat = categories[sub]
        if cat not in cats:
            print(f"Dropped {sub} (no diagnosis assigned)")
            continue
        c = np.genfromtxt(bidsfile.path, delimiter=",")
        np.fill_diagonal(c, 0)
        filtered = filter_logile(c, filter_level)
        df = lut_label(filtered, "resources/BN_Atlas_freesurfer/BN_Atlas_210_LUT.txt")
        df.drop(index="Unknown", columns="Unknown", inplace=True)
        dropped = drop(sub) if callable(drop) else drop
        df.drop(index=dropped, columns=dropped, inplace=True)
        G = nx.from_pandas_adjacency(df)
        for edge in G.edges:
            G.edges[edge]["distance"] = 1/G.edges[edge]["weight"]
        yield sub, cat, G, dropped


In [None]:
def subject_properties(sub, cat, G, drop_regions = []):
        try:
            return {
                "subject": sub,
                "category": cats[cat],
                "degree":np.mean([*zip(*G.degree)][1]),
                "num_regions": len(G.nodes),
                "dropped_regions": list(drop_regions),
                "num_connected_comps": nx.number_connected_components(G),
                "largest_connected_comp": len(max(nx.connected_components(G), key=len)),
                "density": nx.density(G),
                "transitivity": nx.transitivity(G),
                "efficiency": nx.global_efficiency(G)
            }
        except KeyError as err:
            print(sub)
            raise err

@nb_cache("subject_properties")
def subject_df(drop_regions = []):
    rows = []
    for sub, cat, G, dropped in subject_graphs(drop=drop_regions):
        try:
            rows.append(subject_properties(sub, cat, G, dropped))
        except KeyError as err:
            print(sub)
            raise err

    return pd.DataFrame(rows)

In [None]:
import plotly.express as px
df = subject_df()
fig = px.violin(
    df,
    x="category",
    color="category",
    y="density",
    points="outliers",
    width=584,
    height=400,
    labels={
        "num_regions": "# Regions",
        "category": "Group"
    },
    title="Number of nodes",
    hover_data=["subject"]
)
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)

In [None]:
@nb_cache("nodal_properties")
def nodal_properties():
    rows = []
    for sub, cat, G in subject_graphs(2):
        b = nx.betweenness_centrality(G, weight="distance")
        for node in G:
            rows.append({
                "node": node,
                "subject": sub,
                "category": cats[cat],
                "degree": G.degree[node],
                "clust_coeff": nx.clustering(G, nodes=node),
                "path_length": np.mean(list(nx.shortest_path_length(G, source=node, weight="distance").values())),
                "betweenness": b[node],
            })
    df = pd.DataFrame(rows)
    return df

def property_rank(df, columns=[], inverse_columns=[]):
    names = df.index.names[:-1]
    def rank(df, column, inverse=False):
        df.sort_values(by=[*names, column], ascending=not inverse, inplace=True)
        for cat in df.reset_index().set_index(names).index.unique():
            nodes = df.loc[cat].index
            for i, node in enumerate(nodes):
                df.loc[
                    (*itx.always_iterable(cat), node), f"{column}_rank"
                ] = i/len(nodes)
    for column in columns:
        rank(df, column)
    for column in inverse_columns:
        rank(df, column, inverse=True)
    return df

def hubness(df, threshold = None, ivars=["category"]):
    grouped = df.groupby([*ivars, "node"]).mean()
    cols = ["betweenness", "degree"]
    inv_cols = ["path_length", "clust_coeff"]
    ranked = property_rank(grouped, columns=cols, inverse_columns=inv_cols)
    ranked["hubness"] = 0
    for col in it.chain(cols, inv_cols):
        if threshold is None:
            ranked["hubness"] += ranked[f"{col}_rank"]
        else:
            ranked["hubness"] += (ranked[f"{col}_rank"] > threshold).astype(int)
    return ranked
    

In [None]:
col = "clust_coe"
indexed = property_rank(nodal_properties().set_index(["category", "subject", "node"]), [col])
distribution_plot(indexed, x=col+"_rank", y=col)

In [None]:
@nb_cache("hubness")
def get_hubs():
    return hubness(nodal_properties(), ivars=["category", "subject"])
hubs = get_hubs()

In [None]:
# cols = ["degree", "clust_coeff", "path_length", "betweenness"]

plt.figure(figsize=(30, 50))
plt.subplot(1, 3, 1)
table1 = (
    hubs
    .loc["HC"]
    .reset_index()
    .pivot(index="node", columns="subject", values="hubness")
)
sns.heatmap(table1, cmap="viridis", square=True, cbar=False)
plt.subplot(1, 3, (2,3))
table2 = (
    hubs
    .loc["FEP"]
    .reset_index()
    .pivot(index="node", columns="subject", values="hubness")
)
sns.heatmap(table2, cmap="viridis", square=True, cbar=False)

In [None]:
def get_drop_func(i, hub_df):
    indexed = (
        hub_df
        .reset_index()
        .set_index(["subject", "node"])
        .sort_values(["subject", "hubness"], ascending=False)
    )
    def drop_func(sub):
        return indexed.loc[sub].index[:i]
    return drop_func
    

def _get_subject_df(x):
    return subject_df(drop_regions=get_drop_func(x, hubs))
import multiprocessing as mp
@nb_cache("attack_analysis")
def attack_analysis():
    with mp.Pool(processes=32) as pool:
        dfs = pool.map(
            _get_subject_df,
            range(hubs.reset_index().groupby("subject").count().max()[0])
        )
    # for i in range(hubs.reset_index().groupby("subject").count().max()[0]):
    #     dfs.append(subject_df(drop_regions = get_drop_func(i, hubs)))
    return pd.concat(dfs, axis=0)

In [None]:
df = attack_analysis()
df["num_dropped"] = df["num_regions"].max() - df["num_regions"]
figures_to_html([
    distribution_plot(df, x="num_dropped", y=col) 
    for col in [
        "transitivity",
        "efficiency",
        "density",
        "num_connected_comps",
        "largest_connected_comp",
        "degree",
    ]
])

In [None]:
# cols = ["degree", "clust_coeff", "path_length", "betweenness"]
df = nodal_properties()

table = (
    hubness(df)
    .reset_index()
    .pivot(index="node", columns="category", values="hubness")
    .reindex(columns=["HC", "FEP", "Treatment 3+ yr", "High risk"])
)
plt.figure(figsize=(20,40))
sns.heatmap(table, cmap="viridis", square=True)

In [None]:
bundle_counts = [dict(zip(all_bundles, it.repeat(0))) for _ in range(4)]
major_edges = {}
for bidsfile in layout.get(suffix="connectome", hemi="L"):
    with open(bidsfile.path, 'rb') as f:
        d = pickle.load(f)
    connectome = d["connectome"]
    data = copy.deepcopy(connectome.matrix)
    np.fill_diagonal(data, 0)
    G = nx.from_numpy_matrix(data)
    sub = int(bidsfile.entities['subject'])
    cat = categories[sub]
    for edge in G.edges:
        G.edges[edge]["distance"] = 1/G.edges[edge]["weight"]
    edges = betweenness(nx.edge_betweenness_centrality(G, weight="distance"), threshold=0.9)
    f = {}
    for x in edges:
        p = connectome.get_bundles_of_edge(x)
        total = sum(p.values())
        for k in p:
            frac = p[k] / total
            if frac > 0.1:
                f[k] = frac
    major_edges[sub] = set(f.keys())
    
    bundles = [re.search(r"^cluster_(\d+)$", str(Path(d["bundle_paths"][x]).stem))[1] for x in f.keys()]
    for bundle in bundles:
        bundle_counts[cat-1][bundle] += 1

In [None]:
import plotly.express as px
df = pd.concat([
    pd.DataFrame({"counts": bundle_counts[x], "category": cats[x+1]}) for x in range(4)
])
fig = px.bar(
    df,
    y="counts",
    color="category",
    labels={
        "index": "Bundle ID"
    },
    title="Most central bundles across subjects",
    width=1178,
    height=400,
)

fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)

In [None]:
tract_assignments = {}
for path in Path("resources/tract-assignments").iterdir():
    if path.name in ["commissural", "hemispheric"]:
        continue
    with path.open('r') as f:
        bundles = [re.search(r"^cluster_(\d+)\.vtp$", s)[1] for s in f.readlines()]
    tract_assignments[path.name] = bundles


In [None]:
from collections import defaultdict
agg = df.set_index("category", append=True).groupby(level=0).sum()["counts"]
agg = agg.where(agg > 0).dropna()
key_tracts = defaultdict(list)
for bundle in agg.where(agg > agg.quantile(0.7)).dropna().index:
    for name, tract in tract_assignments.items():
        if bundle in tract:
            key_tracts[name].append(bundle)

In [None]:
dict(zip(tract_assignments["T_Sup-F"], it.count()))["00408"]

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

subject = "008"

with open(layout.get(subject=subject, suffix="connectome", hemi="L")[0].path, 'rb') as f:
    connectome = pickle.load(f)
matrix = copy.deepcopy(connectome["connectome"].matrix)
np.fill_diagonal(matrix, 0)
G = nx.from_numpy_matrix(matrix)
for edge in G.edges:
    G.edges[edge]["distance"] = 1/G.edges[edge]["weight"]

df = betweenness(nx.betweenness_centrality(G, weight="distance"), threshold=0)
path = layout.get(subject=subject, suffix="parcellation", extension=".vtk", hemi="L")[0].path

df = pd.concat([
    df,
    node_sizes(path),
    pd.DataFrame({"degree": list(dict(G.degree).values())})
], axis=1)
df = df.sort_values(["betweenness"], ascending=True).reset_index(drop=True)

fig  = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(x=df.index, y=df["node_size"], mode="markers"),
    secondary_y=True
)
fig.add_trace(
    go.Scatter(x=df.index, y=df["betweenness"], mode="markers"),
    secondary_y=False
)


In [None]:
raw_layout.get(subject="001", suffix="smoothwm", extension=".surf.gii", hemi="L")

In [None]:
def graphs():
    for bidsfile in layout.get(suffix="connectome"):
        with open(bidsfile.path, 'rb') as f:
            data = pickle.load(f)
        np.fill_diagonal(data, 0)
        G = nx.from_numpy_matrix(data)
        yield G

In [None]:
def betweenness(b_vals, threshold=.5):
    vals = b_vals.values()
    b_range = max(vals) - min(vals)
    margin = b_range*threshold + min(vals)
    above = dict(filter(lambda val: val[1]>margin, b_vals.items()))
    return list(above.keys())
    return len(above) / len(b_vals)

    # b = dict(zip(it.count(), sorted(b_vals)))
    # df = pd.DataFrame({"betweenness": b})
    # return df

In [None]:
rows = []
for bidsfile in layout.get(suffix="connectome", hemi="L"):
    with open(bidsfile.path, 'rb') as f:
        connectome = pickle.load(f)
    data = copy.deepcopy(connectome["connectome"].matrix)
    np.fill_diagonal(data, 0)
    G = nx.from_numpy_matrix(data)
    sub = int(bidsfile.entities['subject'])
    cat = categories[sub]
    for edge in G.edges:
        G.edges[edge]["distance"] = 1/G.edges[edge]["weight"]
    b_vals = nx.betweenness_centrality(G, weight="distance")
    
    sizes = node_sizes_relative(layout.get(subject=bidsfile.entities['subject'], suffix="parcellation", hemi="L")[0].path)
    degrees = degree_sizes_relative(dict(G.degree(weight="weight")))
    for threshold in np.arange(0, 1, 0.1):
        b_nodes = betweenness(b_vals, threshold)
        smallest = min(sizes[i] for i in b_nodes)
        lowest_degree = min(degrees[i] for i in b_nodes)
        
        # weight_above = sum([G.edges[edge]["weight"] for edge in high_edges])
        # total_weight = sum(list(zip(*G.edges(data="weight")))[2])
        rows.append({
            "sub": sub,
            "category": cat,
            "threshold": threshold,    
            "smallest_node": smallest,
            "lowest_degree": lowest_degree
        })

df = pd.DataFrame(rows)
df["category"] = df["category"].astype("uint32")

In [None]:
import plotly.express as px

grouped = df.groupby(["category", "threshold"]).mean()
dev = df.groupby(["category", "threshold"]).std()
fig = px.line(
    grouped,
    x=grouped.index.get_level_values("threshold"),
   	y="smallest_node",
   	color=grouped.index.get_level_values("category"),
    width=584,
    height=400,
    labels={
        "x": "Betweenness (percentile)",
        "smallest_node": "Smallest Node (percentile)",
        "degree": "Degree",
        "lowest_degree": "Lowest Degree (percentile)",
    },
    title="Minimum size of nodes of given betweenness",
    error_y=dev["lowest_degree"],
    markers=True
)
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)

In [None]:
rows = []
for bidsfile in layout.get(suffix="connectome", hemi="L"):
    with open(bidsfile.path, 'rb') as f:
        connectome = pickle.load(f)
    data = copy.deepcopy(connectome.matrix)
    np.fill_diagonal(data, 0)
    G = nx.from_numpy_matrix(data)
    sub = int(bidsfile.entities['subject'])
    cat = categories[sub]
    for edge in G.edges:
        G.edges[edge]["distance"] = 1/G.edges[edge]["weight"]
    b_vals = nx.betweenness_centrality(G, weight="distance")
    
    sizes = node_sizes(layout.get(subject=bidsfile.entities['subject'], suffix="parcellation", hemi="L")[0].path)
    ddf = pd.concat([
        sizes,
        pd.DataFrame({
            "degree": [d for _, d in G.degree(weight="weight")],
            "betweenness": list(b_vals.values()),
        })
    ], axis=1)
    ddf["subject"] = sub
    ddf["category"] = cat
    rows.append(ddf)

df = pd.concat(rows)


In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


fig = px.scatter(
    df[df["category"] == 1],
    x="degree",
   	y="betweenness",
    width=584,
    height=400,
    labels={
        "x": "Betweenness (proportion)",
        "smallest_node": "Smallest Node (proportion)",
        "node_size": "Node size",
        "degree": "Degree",
        "betweenness": "Betweenness",
    },
    title="Correlation between degree and betweenness",
    trendline="ols",
    color_discrete_sequence=["darkslategray"],
    opacity=0.6,
    trendline_color_override="blue"
)
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False,
    width=584,
    height=400,

)

In [None]:
px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared

In [None]:
def get_spaced_elements(arr, count):
    result = []
    for i in np.round(np.linspace(0, len(arr) - 1, count)).astype(int):
        result.append(arr[i])
    return result

In [None]:
from collections import defaultdict
num_points = 20
rows = []
for bidsfile in layout.get(suffix="connectome", hemi="L"):
    with open(bidsfile.path, 'rb') as f:
        connectome = pickle.load(f)
    data = copy.deepcopy(connectome["connectome"].matrix)
    np.fill_diagonal(data, 0)
    G = nx.from_numpy_matrix(data)
    sub = int(bidsfile.entities['subject'])
    cat = categories[sub]
    for edge in G.edges:
        G.edges[edge]["distance"] = 1/G.edges[edge]["weight"]

    b_vals = sorted(nx.betweenness_centrality(G, weight="distance").values())
    sizes = sorted(node_sizes(layout.get(subject=bidsfile.entities['subject'], suffix="parcellation", hemi="L")[0].path).values())
    degrees = sorted(dict(G.degree(weight="weight")).values())
    rows.append(
        pd.DataFrame(
            {
                "betweenness": get_spaced_elements(b_vals, num_points),
                "node_size": get_spaced_elements(sizes, num_points),
                "degree": get_spaced_elements(degrees, num_points),
            },
            index=range(num_points),
        ).assign(subject=sub, category=cat)
    )
    
df = pd.concat(rows)

In [None]:
column = "degree"
gb = (
    df.assign(index=df.index.to_series() / (num_points - 1))
    .groupby(["category", "index"])
)
ddf = pd.concat([gb.mean(), gb.std().rename(lambda n: str(n)+"_std", axis=1)], axis=1)
ddf = ddf.sort_values(column).reset_index()

In [None]:
import plotly.express as px
import plotly.graph_objs as go

fig = px.line(
    ddf,
    x="index",
    y=column,
    color="category",
    width=584,
    height=400,
    labels={
        "x": "Nodes sorted by increasing betweeness",
        "node_size": "Node Size (# triangles)",
        "betweenness": "Betweenness",
        "degree": "Degree"
    },
    title="Degree distribution",
)
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)
for i in ddf["category"].unique():
    dddf = ddf[ddf["category"] == i]

    fig.add_traces([
        go.Scatter(
            x=dddf["index"],
            y=dddf[column+"_std"]+dddf[column],
            mode="lines",
            line=dict(width=0),
        ),
        go.Scatter(
            x=dddf["index"],
            y=dddf[column]-dddf[column+"_std"],
            mode="lines",
            line=dict(width=0),
            fill='tonexty',
            fillcolor=f'rgba{(*hex_to_rgb(px.colors.qualitative.Plotly[i-1]), 0.2)}'
        )
    ])
fig

In [None]:
import plotly.express as px
import plotly.graph_objs as go

grouped = df.groupby(["category", "point"]).mean()
dev = df.groupby(["category", "point"]).std()
dev_up = grouped + dev
dev_down = grouped - dev
x = grouped.index.get_level_values("point")
color = grouped.index.get_level_values("category")
fig = px.line(
    grouped,
    x=grouped.index.get_level_values("point"),
    y=column,
    color=grouped.index.get_level_values("category"),
    width=584,
    height=400,
    labels={
        "x": "Nodes sorted by increasing betweeness",
        "node_size": "Node Size (# triangles)",
        "degree": "Degree"
    },
    title="Node size distribution",
    error_y=dev[column],
    markers=True
)
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)

In [None]:
rows = []
for bidsfile in layout.get(suffix="connectome", hemi="L"):
    with open(bidsfile.path, 'rb') as f:
        connectome = pickle.load(f)
    data = copy.deepcopy(connectome.matrix)
    np.fill_diagonal(data, 0)
    G = nx.from_numpy_matrix(data)
    sub = int(bidsfile.entities['subject'])
    cat = categories[sub]
    
    size = mesh_size(layout.get(subject=bidsfile.entities['subject'], suffix="parcellation", hemi="L")[0].path)
    total = mesh_size(raw_layout.get(subject=bidsfile.entities['subject'], suffix='smoothwm', hemi="L")[0].path)

    rows.append({
        "sub": sub,
        "category": cats[cat],
        "used": size/total,
        "lost_fibers": connectome.lost_fibers if connectome.lost_fibers > 0 else np.NaN,
        "num_fibers": np.sum(data),
    })

df = pd.DataFrame(rows)

In [None]:
df

In [None]:
import plotly.express as px

fig = px.violin(
    df,
    x="category",
    color="category",
    y="num_fibers",
    points="outliers",
    width=584,
    height=400,
    labels={
        "num_regions": "# Regions",
        "category": "Group",
        "used": "proportion of triangles",
        "num_fibers": "Number of fibers",
    },
    title="Number of fibers in connectome"
)
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)