##  scCODA - Compositional analysis of labeled single-cell data

## Imports

In [3]:
#import warnings
#warnings.filterwarnings("ignore")

#import mudata as mu
#import matplotlib.pyplot as plt
#import scanpy as sc
#import pertpy as pt
#import pandas as pd
#import os
#plt.rcParams['figure.figsize'] = (12, 10)

In [28]:
import warnings
import pickle as pkl
import pandas as pd
import scanpy as sc
import numpy as np
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import pyplot as plt
from sccoda.util import cell_composition_data as dat
from sccoda.util import comp_ana as mod
from sccoda.util import data_visualization as viz

## Input

In [39]:
# path of partly preprocessed HLCA file used in this notebook:
adata_path = "/home/h/hollenberg/MaPra/adata_radiation_processed.h5ad"

# add annotation when not in adata
# None if not 
add_annotation = "/home/h/hollenberg/MaPra/radiation_obs_annotation.csv"

# specify columns 
cell_type_identifier="annotation"
sample_identifier="SAMP_ID"
covariate_obs="SAMP_condition"
group1 = "RAD"
group2 = "SHAM"
reference_cell_type="automatic"
fdr = 0.4
lfc_thresh = 0.5

# path to results directory: (has to exist)
dir_out = "/home/h/hollenberg/MaPra/sccoda/"


## Code

In [70]:
adata = sc.read(adata_path)
# load annotation if not in adata
if add_annotation is not None:
    adata.obs = pd.read_csv(add_annotation)
    

meta = adata.obs[[covariate_obs, sample_identifier]].drop_duplicates().set_index(sample_identifier)
scc_dat = dat.from_scanpy(
    adata,
    cell_type_identifier=cell_type_identifier,
    sample_identifier=sample_identifier,
    covariate_df=meta
)
scc_dat
sccoda_mod = mod.CompositionalAnalysis(
    scc_dat,
    formula=covariate_obs,
    reference_cell_type=reference_cell_type,
)
sccoda_res = sccoda_mod.sample_hmc()
#scc_dat.write(output_adata)

Automatic reference selection! Reference cell type set to NK cells
Zero counts encountered in data! Added a pseudocount of 0.5.


100%|████████████████████████████████████| 20000/20000 [02:23<00:00, 139.04it/s]


MCMC sampling finished. (182.777 sec)
Acceptance rate: 42.9%


In [71]:
# load adata
#adata = sc.read(adata_path)

# load annotation if not in adata
#if add_annotation is not None:
#    adata.obs = pd.read_csv(add_annotation)

# build sccoda model
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(adata, type="cell_level", generate_sample_level=True, 
                                cell_type_identifier=cell_type_identifier,
                                sample_identifier=sample_identifier, covariate_obs=[covariate_obs])

# new combi 
group1_group2 = group1 + "_" + group2
sccoda_data.mod[group1_group2] = sccoda_data["coda"][sccoda_data["coda"].obs[covariate_obs].isin([group1, group2])].copy()

In [72]:
sc.settings.set_figure_params(
    dpi=100,
    color_map='plasma',
    dpi_save=200,
    vector_friendly=True,
    frameon=False,
    fontsize=10,
    figsize=(8,6),
    format='png',
)
# set thresholds
sccoda_res.set_fdr(est_fdr=fdr)

In [73]:
# write model summary
with open(os.path.join(dir_out,"sccoda_summary.txt"), 'w') as f:
    print(sccoda_res.summary_extended(), file=f)
    cred_effects = sccoda_res.credible_effects()
    print(cred_effects, file=f)

    # plot significant results
    with PdfPages(os.path.join(dir_out, "sccoda_" + adata_path.split("/")[-1].split(".")[0] + ".pdf")) as pdf:

        plt.rcParams['figure.figsize'] = (16, 10)
        pt.pl.coda.boxplots(sccoda_data, modality_key=group1_group2, feature_name=covariate_obs, add_dots=True)
        pdf.savefig(bbox_inches='tight')
        plt.close()

        covariates = cred_effects.index.get_level_values('Covariate').unique()
        for covariate in covariates:
            plt.rcParams['figure.figsize'] = (8,6)
            effects_df = sccoda_res.effect_df.loc[covariate]
            effects_df = effects_df[np.abs(effects_df["log2-fold change"]) >= lfc_thresh]
            effects_df = effects_df.loc[cred_effects[covariate]].reset_index()
            effects_df = effects_df.sort_values("log2-fold change", ascending=False)

            print(effects_df, file=f)

            err_min = np.abs(effects_df['HDI 3%'] - effects_df['Final Parameter'])
            err_max = np.abs(effects_df['HDI 97%'] - effects_df['Final Parameter'])
    
            if effects_df.shape[0] == 0:
                print(f'skip {covariate}')
                continue
    
            # Final parameter
            # plt.bar(
            #     data=effects_df,
            #     x="Cell Type",
            #     height="Final Parameter",
            # )
            plt.errorbar(
                data=effects_df,
                x="Cell Type",
                y="Final Parameter",
                yerr=[err_min, err_max],
                fmt='o',
            )
            plt.axhline(y=0, color='black', linestyle='-')
            plt.title(f'Final Parameter {covariate} FDR={fdr}')
            plt.xticks(rotation=90)
            plt.ylabel ('Final Parameter')
            pdf.savefig(bbox_inches='tight')
            plt.close()

            # significance
            sns.barplot(
                data=effects_df,
                x="Cell Type",
                y="log2-fold change",
                order=effects_df['Cell Type'],
            ).set(title=f'{covariate} FDR={fdr}')
            plt.xticks(rotation=90)
            plt.ylabel ('Log2 Fold Change')
            pdf.savefig(bbox_inches='tight')
            plt.close()
    
    
        # add metadata
        d = pdf.infodict()
        d['Title'] = f'scCODA on {adata_path.split("/")[-1]}'

Compositional Analysis summary (extended):

Data: 71 samples, 19 cell types
Reference index: 14
Formula: SAMP_condition
Spike-and-slab threshold: 0.261

MCMC Sampling: Sampled 20000 chain states (5000 burnin samples) in 182.777 sec. Acceptance rate: 42.9%

Intercepts:
                         Final Parameter  HDI 3%  HDI 97%     SD   
Cell Type                                                          
AT1                                0.334   0.150    0.510  0.099  \
AT2                               -0.187  -0.423    0.052  0.130   
Adventitial fibroblast            -0.024  -0.263    0.195  0.121   
Aerocytes                          0.962   0.762    1.142  0.106   
Alveolar macrophages               2.723   2.607    2.843  0.064   
B-cells                            3.113   3.014    3.215  0.056   
Dendritic cell CCR7                0.451   0.236    0.663  0.122   
Dendritic cells                    1.714   1.582    1.832  0.067   
Dendritic cells pDC                1.203   1.080   