In [None]:
### Import Libraries.

import os
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import anndata as ad
import scanpy as sc
import matplotlib.pyplot as plt

from sccoda.util import comp_ana as mod
from sccoda.util import cell_composition_data as dat
from sccoda.util import data_visualization as viz

In [3]:
#### Load Data.

os.chdir("/folder/")
adata_all = ad.read_h5ad("adata.h5ad")

In [None]:
### Function for scCODA Analysis.

def run_sccoda_analysis(
    adata,
    region,
    cluster_key,
    status_pair = ("Control", "sALS"),
    reference_cell_type = "Fibroblasts",
    n_iter = 20000
):
   
    print(f"\nRunning scCODA for region = {region}, contrast = {status_pair}, clusters = {cluster_key}")

    adata_sub = adata[
        (adata.obs["Enrichment"] == "No") &
        (adata.obs["Region"] == region)
    ].copy()

    counts_df = (
        adata_sub.obs
        .groupby(["Sample_ID", cluster_key])
        .size()
        .unstack(fill_value = 0)
        .reset_index()
    )

In [None]:
### Create SCODA Data Object.
    data = dat.from_pandas(counts_df, covariate_columns = ["Sample_ID"])

    meta = adata_sub.obs[["Sample_ID", "Status"]].drop_duplicates()
    meta = meta.sort_values(
        by = "Sample_ID",
        key = lambda x: pd.Categorical(x, categories = counts_df["Sample_ID"], ordered = True)
    ).reset_index(drop = True)

    data.obs = data.obs.merge(meta[["Sample_ID", "Status"]], on = "Sample_ID", how = "left")

In [None]:
### Subset to Selected Diagnostic Groups.

data_sub = data[data.obs["Status"].isin(status_pair)]

In [None]:
### Initial visualization

viz.boxplots(data_sub, feature_name = "Status")
plt.show()

In [None]:
### Cycle Through all Possible Reference Cell Types.

cell_types = data_sub.var.index
results_cycle = pd.DataFrame(index = cell_types, columns = ["times_credible"]).fillna(0)

for ref_ct in cell_types:
    print(f"Reference: {ref_ct}")
    model_temp = mod.CompositionalAnalysis(
        data_sub, formula = "Status", reference_cell_type = ref_ct
    )
    temp_results = model_temp.sample_hmc(num_results = n_iter)
    cred_eff = temp_results.credible_effects()
    cred_eff.index = cred_eff.index.droplevel(level = 0)
    results_cycle["times_credible"] += cred_eff.astype("int")
    
    ### Compute Proportion Credible
    results_cycle["pct_credible"] = results_cycle["times_credible"] / len(cell_types)
    results_cycle["is_credible"] = results_cycle["pct_credible"] >= 0.5
    
    print("\nFinal credible cell types:")
    print(results_cycle[results_cycle["is_credible"]])
    
    return results_cycle

In [None]:
### Run Example.

results_brain_sals = run_sccoda_analysis(adata_all, region = "Brain",
                                         cluster_key = "Superclusters",
                                         status_pair = ("Control", "sALS"))