In [None]:
### Import Libraries.

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from networkx.algorithms import community

In [None]:
### Load Network Files.
# Find all Excel Files Matching the Pattern. (e.g., "C9ALS*.xlsx")

folder_path = "/folder/"
excel_files = glob(os.path.join(folder_path, "C9ALS*.xlsx"))  # or "sALS*.xlsx", "Control*.xlsx"

In [None]:
### Load, Annotate, and Store Each File.

data_list = {}

for file_path in excel_files:
    df = pd.read_excel(file_path)

    file_name = os.path.basename(file_path)
    file_name_clean = file_name.replace("Filtered_CellOracle_", "").replace(".xlsx", "")

    df["Cluster_Column"] = file_name_clean

    data_list[file_name_clean] = df

for name, df in data_list.items():
    print(f"\nPreview of {name}:")
    print(df.head())

In [None]:
### Merge All into a Single Dataframe and Clean Up.

merged_data = pd.concat(data_list.values(), ignore_index = True)
merged_data["Cluster_Column"] = merged_data["Cluster_Column"].str.replace(".xlsx", "", regex = False)

In [None]:
### Save Merged File.

output_path = os.path.join(folder_path, "Filtered_CellOracle_All_By_Cluster.xlsx")
merged_data.to_excel(output_path, index = False)
print(f"\nMerged dataset saved to: {output_path}")

In [None]:
### Function to Filter Genes Based on pct of Cells Expressing the Gene.

def filter_genes_by_subcluster_expression(adata: ad, gene_list: list, subcluster_key: str = 'Subclusters', min_frac: float = 0.10) -> list:

    genes_present = [g for g in gene_list if g in adata.var_names]
    if len(genes_present) == 0:
        raise ValueError("None of the genes in gene_list are present in adata.var_names")

    filtered_genes = []

    for gene in genes_present:
        keep_gene = True
        for sub in adata.obs[subcluster_key].unique():
            cells = adata.obs_names[adata.obs[subcluster_key] == sub]
            expr = adata[cells, gene].X

            if hasattr(expr, "toarray"):
                expr = expr.toarray().flatten()
            else:
                expr = np.array(expr).flatten()

            frac_expressing = np.sum(expr > 0) / len(expr)
            if frac_expressing < min_frac:
                keep_gene = False
                break

        if keep_gene:
            filtered_genes.append(gene)

    return filtered_genes

gene_list = [

]
filtered_genes = filter_genes_by_subcluster_expression(adata, gene_list, subcluster_key = 'Cluster_Column', min_frac = 0.10)
print("Genes kept:", filtered_genes)

filtered_unique = [g for g in filtered_genes if g not in genes]
print(filtered_unique)
len(filtered_genes)

In [None]:
## Function: Create "Edge" Identifier.

def prepare_edges(df):
    df = df.copy()
    df['edge'] = df.apply(lambda x: tuple(sorted([x['source'], x['target']])), axis = 1)
    return df

control = prepare_edges(network_mc_sc_control)
disease = prepare_edges(network_sc_c9als_sals) ### Or network_mc_c9als/network_mc_sals

In [None]:
### Merge Edges and Compute Diffs.

merged = pd.merge(
    control[["edge", "coef_mean", "p", "-logp"]],
    disease[["edge", "coef_mean", "p", "-logp"]],
    on="edge",
    suffixes=("_ctrl", "_dis")
)

merged["delta_coef"] = merged["coef_mean_dis"] - merged["coef_mean_ctrl"]
merged["delta_logp"] = merged["-logp_dis"] - merged["-logp_ctrl"]
merged["z_diff"] = (merged["delta_coef"] - merged["delta_coef"].mean()) / merged["delta_coef"].std()

diff_edges = merged.query("abs(delta_coef) > 0.1 and abs(delta_logp) > 1")
diff_edges.sort_values('delta_coef', ascending = False).head(10)

In [None]:
### Edge Level DNA.

G = nx.Graph()
for _, row in diff_edges.iterrows():
    G.add_edge(row["edge"][0], row["edge"][1], weight = row["delta_coef"])

pos_kamada = nx.kamada_kawai_layout(G)
weights = [G[u][v]["weight"] for u, v in G.edges()]

plt.figure(figsize = (8, 8))
nx.draw(
    G,
    pos_kamada,
    with_labels=True,
    node_color="lightgray",
    edge_color=weights,
    width = [abs(w) * 5 for w in weights],
    edge_cmap = plt.cm.coolwarm,
    font_size = 10
)

sm = plt.cm.ScalarMappable(
    cmap=plt.cm.coolwarm,
    norm=plt.Normalize(vmin = min(weights), vmax = max(weights))
)
sm.set_array([])
cbar = plt.colorbar(sm, shrink = 0.7)
cbar.set_label("Δ Coefficient (Disease - Control)", fontsize = 12)

plt.title("Differential Network (Disease vs Control)", fontsize = 14)
plt.show()

In [None]:
### Lost / Gained Edges.

sig_ctrl = set(control.query("p < 0.05")["edge"])
sig_dis = set(disease.query("p < 0.05")["edge"])

lost_edges = sig_ctrl - sig_dis
gained_edges = sig_dis - sig_ctrl

In [None]:
### Node Level DNA.

G_ctrl = nx.from_pandas_edgelist(control, "source", "target", ["coef_mean"])
G_dis = nx.from_pandas_edgelist(disease, "source", "target", ["coef_mean"])

deg_ctrl = pd.Series(dict(G_ctrl.degree()), name = "degree_ctrl")
deg_dis = pd.Series(dict(G_dis.degree()), name = "degree_dis")

deg_change = pd.concat([deg_ctrl, deg_dis], axis = 1).fillna(0)
deg_change["delta_degree"] = deg_change["degree_dis"] - deg_change["degree_ctrl"]

In [None]:
### Betweenness & Centrality.

centrality_ctrl = nx.betweenness_centrality(G_ctrl, weight = "coef_mean")
centrality_dis = nx.betweenness_centrality(G_dis, weight = "coef_mean")

centrality_df = pd.DataFrame({"ctrl": centrality_ctrl, "dis": centrality_dis}).fillna(0)
centrality_df["delta_centrality"] = centrality_df["dis"] - centrality_df["ctrl"]

centrality_df = pd.DataFrame({
    'ctrl': centrality_ctrl,
    'dis': centrality_dis
}).fillna(0)
centrality_df['delta_centrality'] = centrality_df['dis'] - centrality_df['ctrl']
centrality_df.sort_values('delta_centrality', ascending = False).head(10)

In [None]:
### Network Topology Summary.

communities_ctrl = community.greedy_modularity_communities(G_ctrl)
communities_dis  = community.greedy_modularity_communities(G_dis)


metrics = {
    "num_nodes": [G_ctrl.number_of_nodes(), G_dis.number_of_nodes()],
    "num_edges": [G_ctrl.number_of_edges(), G_dis.number_of_edges()],
    "density": [nx.density(G_ctrl), nx.density(G_dis)],
}
print(pd.DataFrame(metrics, index=["Control", "Disease"]))

In [None]:
### Degree Change Plot.

deg_change_sorted = deg_change.sort_values("delta_degree")

plt.figure(figsize = (10, 5))
plt.bar(deg_change_sorted.index, deg_change_sorted["delta_degree"])
plt.xticks(rotation = 90)
plt.title("Change in Node Degree (Disease - Control)")
plt.ylabel("Δ Degree")
plt.show()

In [None]:
### Filter Top Change Nodes.

source_nodes = set(control["source"]).union(set(disease["source"]))

deg_change_top = deg_change[
    ((deg_change["delta_degree"] > 2) | (deg_change["delta_degree"] < -2)) ## Change the Parameter.
    & (deg_change.index.isin(source_nodes))
].sort_values("delta_degree")

plt.figure(figsize=(10, 3))
plt.bar(deg_change_top.index, deg_change_top["delta_degree"], color = "steelblue")
plt.xticks(rotation=90)
plt.title("Change in Node Degree — Source Nodes Only")
plt.ylabel("Δ Degree")
plt.tight_layout()
plt.show()

In [None]:
### Combined Control vs Disease Edges.

control["condition"] = "Control"
disease["condition"] = "Disease"

combined = pd.concat([control, disease], ignore_index = True)

plt.figure(figsize = (8, 5))
sns.boxplot(data = combined, x = "condition", y = "coef_mean", palette = "Set2")
plt.title("Distribution of Edge Coefficients (Control vs Disease)")
plt.show()

In [None]:
### Centrality Summary.

metrics_df = pd.DataFrame({
    "degree_ctrl": pd.Series(dict(G_ctrl.degree())),
    "degree_dis": pd.Series(dict(G_dis.degree())),
    "betweenness_ctrl": pd.Series(nx.betweenness_centrality(G_ctrl, weight = "coef_mean")),
    "betweenness_dis": pd.Series(nx.betweenness_centrality(G_dis, weight = "coef_mean")),
}).fillna(0)

metrics_melted = metrics_df.melt(var_name = "metric_condition", value_name = "value")
metrics_melted[["metric", "condition"]] = metrics_melted["metric_condition"].str.split("_", expand = True)

plt.figure(figsize = (8, 5))
sns.boxplot(
    data = metrics_melted[metrics_melted["metric"] == "degree"],
    x = "condition",
    y = "value",
    palette = "Set2"
)
plt.title("Node Degree Distribution (Control vs Disease)")
plt.show()

In [None]:
### Monte Carlo Test.

def monte_carlo_network_test(G_ctrl, G_dis, n_iter = 1000, seed = None):
    rng = np.random.default_rng(seed)

    deg_diffs = []
    bet_diffs = []

    deg_ctrl_val = np.mean(list(nx.degree_centrality(G_ctrl).values()))
    deg_dis_val = np.mean(list(nx.degree_centrality(G_dis).values()))

    bet_ctrl_val = np.mean(list(nx.betweenness_centrality(G_ctrl, weight = "coef_mean").values()))
    bet_dis_val = np.mean(list(nx.betweenness_centrality(G_dis, weight = "coef_mean").values()))

    obs_deg_diff = deg_dis_val - deg_ctrl_val
    obs_bet_diff = bet_dis_val - bet_ctrl_val

    for _ in range(n_iter):
        Gc = G_ctrl.copy()
        Gd = G_dis.copy()

        nx.double_edge_swap(Gc, nswap=int(Gc.number_of_edges() * 0.2), max_tries=Gc.number_of_edges() * 5)
        nx.double_edge_swap(Gd, nswap=int(Gd.number_of_edges() * 0.2), max_tries=Gd.number_of_edges() * 5)

        deg_diffs.append(
            np.mean(list(nx.degree_centrality(Gd).values())) -
            np.mean(list(nx.degree_centrality(Gc).values()))
        )

        bet_diffs.append(
            np.mean(list(nx.betweenness_centrality(Gd, weight="coef_mean").values())) -
            np.mean(list(nx.betweenness_centrality(Gc, weight="coef_mean").values()))
        )

    p_deg = np.mean(np.abs(deg_diffs) >= np.abs(obs_deg_diff))
    p_bet = np.mean(np.abs(bet_diffs) >= np.abs(obs_bet_diff))

    return {
        "obs_deg_diff": obs_deg_diff,
        "p_deg": p_deg,
        "obs_bet_diff": obs_bet_diff,
        "p_bet": p_bet,
        "null_deg": np.array(deg_diffs),
        "null_bet": np.array(bet_diffs)
    }


result = monte_carlo_network_test(G_ctrl, G_dis, n_iter = 1000, seed = 42)

print(f"Observed Δ Degree Centrality: {result['obs_deg_diff']:.4f}, Monte Carlo p = {result['p_deg']:.4f}")
print(f"Observed Δ Betweenness: {result['obs_bet_diff']:.4f}, Monte Carlo p = {result['p_bet']:.4f}")

In [None]:
### Plots Δ Degree

colors = ['#7F1734' if x > 0 else '#556B2F' for x in deg_change_top['delta_degree']]

sns.set(style = "white")

fig, ax = plt.subplots(figsize = (7, 4))

bars = ax.bar(
    deg_change_top.index, 
    deg_change_top['delta_degree'], ### or delta_betweenness
    color = colors, 
    alpha = 0.7,
    edgecolor = 'none'
)

ax.axhline(0, color = '#404040', linewidth = 1.5, linestyle = '--')

for i, val in enumerate(deg_change_top['delta_degree']):
    jitter_x = np.random.normal(i, 0.08, 1)
    ax.scatter(
        jitter_x, val,
        color = colors[i],
        alpha = 0.1,
        s = 8,
        edgecolor = 'none'
    )

for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)
for spine in ['bottom', 'left']:
    ax.spines[spine].set_linewidth(1.5)
    ax.spines[spine].set_color("#404040")

ax.set_xticks(range(len(deg_change_top.index)))
ax.set_xticklabels(deg_change_top.index, rotation = 45, ha = 'right', fontsize = 8, fontweight = 'bold', color = '#404040')
yticks = ax.get_yticks()
ax.set_yticklabels([f"{y:.2f}" for y in yticks], fontsize = 10, fontweight = 'bold', color = '#404040')

ax.set_xlabel('Source Nodes', fontsize = 10, fontweight = 'bold', color = '#404040')
ax.set_ylabel('Δ Degree (Disease - Control)', fontsize = 10, fontweight = 'bold', color = '#404040')
ax.set_title('Change in Node Degree — Source Nodes', fontsize = 12, fontweight = 'bold', color = '#404040')

plt.tight_layout()
fig.savefig("Delta_Degree_Source_Nodes.png", dpi = 800, bbox_inches = "tight")
plt.show()