In [None]:
### Import Libraries.

import os
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [None]:
# Load Data.

os.chdir("/folder/")
adata = ad.read_h5ad("adata.h5ad")

In [None]:
### Define Helper Functions.

def aggregate_cluster(df, cluster_col, clusters_of_interest, region_col = 'Region', sample_col = 'Sample_ID'):

    df = df.copy()
    df = df[df[cluster_col].isin(clusters_of_interest)]
    
    results = {}
    for cluster in clusters_of_interest:
        col_name = f'is_{cluster.lower()}'
        df[col_name] = (df[cluster_col] == cluster).astype(int)
        agg = df.groupby([sample_col, region_col]).agg(
            total=(col_name, 'count'),
            successes=(col_name, 'sum')
        ).reset_index()
        agg['prop'] = agg['successes'] / agg['total']
        agg = agg[(agg['successes'] > 0) & (agg['successes'] < agg['total'])]
        results[cluster] = agg
    return results



def check_normality(agg, region_col = 'Region', cluster_name = 'Cluster'):

    for region in agg[cluster_name].keys():
        sub = agg[cluster_name][agg[cluster_name]['Region'] == region]
        props = sub['prop'].values
        print(f"\nRegion: {region}")
        print(f"n samples = {len(props)}")
        print(f"Mean proportion {cluster_name.lower()} = {props.mean():.3f} ± {props.std(ddof = 1):.3f}")
        stat, p = stats.shapiro(props)
        print(f"Shapiro–Wilk W = {stat:.3f}, p = {p:.3g}")
        if p < 0.05:
            print("→ Data deviate from normality (use Wilcoxon).")
        else:
            print("→ Data approximately normal (t-test could also be used).")
        sns.histplot(props, kde=True)
        plt.title(f"{region}: per-sample {cluster_name.lower()} proportion")
        plt.xlabel(f"Proportion {cluster_name.lower()}")
        plt.show()

        

def mann_whitney_between_regions(agg, cluster_name):

    agg_cluster = agg[cluster_name]
    brain_props = agg_cluster.loc[agg_cluster['Region'] == 'Brain', 'prop'].values
    spinal_props = agg_cluster.loc[agg_cluster['Region'] == 'Spinal_Cord', 'prop'].values
    
    print(f"\nBrain samples: n = {len(brain_props)}, mean = {brain_props.mean():.3f}")
    print(f"Spinal cord samples: n = {len(spinal_props)}, mean = {spinal_props.mean():.3f}")
    
    u_stat, p_value = stats.mannwhitneyu(brain_props, spinal_props, alternative='two-sided')
    print(f"\nMann–Whitney U = {u_stat:.3f}, p = {p_value:.3g}")
    
    if p_value < 0.05:
        direction = "higher" if brain_props.mean() > spinal_props.mean() else "lower"
        print(f"→ Significant difference: {cluster_name} in brain is {direction} than in spinal cord.")
    else:
        print(f"→ No significant difference for {cluster_name} between regions.")
    
    return pd.DataFrame({
        'Region': ['Brain', 'Spinal_Cord'],
        'n_samples': [len(brain_props), len(spinal_props)],
        'mean_prop': [brain_props.mean(), spinal_props.mean()],
        'median_prop': [np.median(brain_props), np.median(spinal_props)]
    })

In [None]:
### Activation_State Analysis.

ACT = 'Activation_States'
df_sc = adata.obs.copy()
df_sc = df_sc[(df_sc["Status"] == "Control") & (df_sc["Enrichment"] == "No")]
df_sc = df_sc[[ACT, 'Region', 'Sample_ID']]
df_sc = df_sc[df_sc[ACT].isin(['Reactive_Oligos', 'Homeostatic_Oligos'])]

agg_sc = aggregate_cluster(df_sc, ACT, ['Reactive_Oligos', 'Homeostatic_Oligos'])
check_normality(agg_sc, cluster_name='Reactive_Oligos')
check_normality(agg_sc, cluster_name='Homeostatic_Oligos')

summary_reactive = mann_whitney_between_regions(agg_sc, 'Reactive_Oligos')
summary_homeostatic = mann_whitney_between_regions(agg_sc, 'Homeostatic_Oligos')

print(summary_reactive)
print(summary_homeostatic)

In [None]:
### Basic_Clusters Analysis.

ACT = 'Basic_Clusters'
df_bc = adata.obs.copy()
df_bc = df_bc[(df_bc["Status"] == "Control") & (df_bc["Enrichment"] == "No")]
df_bc = df_bc[[ACT, 'Region', 'Sample_ID']]
df_bc = df_bc[df_bc[ACT].isin(['Oligos_OPALIN+', 'Oligos_RBFOX1+'])]

agg_bc = aggregate_cluster(df_bc, ACT, ['Oligos_OPALIN+', 'Oligos_RBFOX1+'])
check_normality(agg_bc, cluster_name='Oligos_OPALIN+')
check_normality(agg_bc, cluster_name='Oligos_RBFOX1+')

summary_opalin = mann_whitney_between_regions(agg_bc, 'Oligos_OPALIN+')
summary_rbfox1 = mann_whitney_between_regions(agg_bc, 'Oligos_RBFOX1+')

print(summary_opalin)
print(summary_rbfox1)