# Breast cancer data showcase

In [1]:
%load_ext autoreload
%autoreload 2
from multi_condition_comparisions.tl.de import StatsmodelsDE

import decoupler as dc
import scanpy as sc

  from .autonotebook import tqdm as notebook_tqdm
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


The breast cancer single nuclei dataset was downloaded from cellxgene.

In [2]:
import cellxgene_census

cellxgene_census.download_source_h5ad("cd74e95e-6583-4875-a0ba-f2eae5a1e5a6.h5ad", to_path="breast_cancer.h5ad")

adata = sc.read_h5ad("./cd74e95e-6583-4875-a0ba-f2eae5a1e5a6.h5ad")

In [3]:
adata.layers["counts"] = adata.raw.X
adata.layers["normalised_counts"] = adata.X

In [4]:
adata.obs_keys

<bound method AnnData.obs_keys of AnnData object with n_obs × n_vars = 117346 × 33234
    obs: 'mapped_reference_assembly', 'mapped_reference_annotation', 'alignment_software', 'donor_id', 'self_reported_ethnicity_ontology_term_id', 'donor_living_at_sample_collection', 'donor_menopausal_status', 'organism_ontology_term_id', 'sample_uuid', 'sample_preservation_method', 'tissue_ontology_term_id', 'development_stage_ontology_term_id', 'sample_derivation_process', 'sample_source', 'donor_BMI_at_collection', 'suspension_derivation_process', 'suspension_dissociation_reagent', 'suspension_uuid', 'suspension_type', 'library_uuid', 'assay_ontology_term_id', 'library_starting_quantity', 'sequencing_platform', 'is_primary_data', 'cell_type_ontology_term_id', 'author_cell_type', 'disease_ontology_term_id', 'sex_ontology_term_id', 'procedure_group', 'tissue_location', 'nCount_RNA', 'nFeature_RNA', 'percent.mito', 'sample_id', 'age_group', 'bmi_group', 'breast_density', 'cell_type', 'assay', 'diseas

In [5]:
pbulk = dc.get_pseudobulk(
    adata,
    sample_col="donor_id",
    groups_col="cell_type",
    layer="normalised_counts",
    mode="sum",
    min_cells=10,
    min_counts=1000,
)

pbulk

  cols = obs.groupby([sample_col, groups_col]).apply(lambda x: x.apply(lambda y: len(y.unique()) == 1)).all(0)


AnnData object with n_obs × n_vars = 183 × 30735
    obs: 'mapped_reference_assembly', 'mapped_reference_annotation', 'alignment_software', 'donor_id', 'self_reported_ethnicity_ontology_term_id', 'donor_living_at_sample_collection', 'donor_menopausal_status', 'organism_ontology_term_id', 'sample_preservation_method', 'tissue_ontology_term_id', 'development_stage_ontology_term_id', 'sample_derivation_process', 'sample_source', 'donor_BMI_at_collection', 'suspension_derivation_process', 'suspension_dissociation_reagent', 'suspension_type', 'assay_ontology_term_id', 'library_starting_quantity', 'sequencing_platform', 'is_primary_data', 'cell_type_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'procedure_group', 'age_group', 'bmi_group', 'breast_density', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'psbulk_n_cells', 'psbulk_counts'
    var: 'feature_is_filtered', 'vst.mean', 'vst.variance', 'vst.varia

In [6]:
mod = StatsmodelsDE(pbulk, "~ disease:cell_type")

In [7]:
mod.fit()

100%|██████████| 30735/30735 [01:42<00:00, 298.45it/s]


In [8]:
mod.design

Unnamed: 0,Intercept,cell_type[T.adipocyte of breast],cell_type[T.basal cell],cell_type[T.endothelial cell of lymphatic vessel],cell_type[T.endothelial cell of vascular tree],cell_type[T.fibroblast],cell_type[T.luminal epithelial cell of mammary gland],cell_type[T.mast cell],cell_type[T.myeloid cell],cell_type[T.pericyte],disease[T.normal]:cell_type[T.T cell],disease[T.normal]:cell_type[T.adipocyte of breast],disease[T.normal]:cell_type[T.basal cell],disease[T.normal]:cell_type[T.endothelial cell of lymphatic vessel],disease[T.normal]:cell_type[T.endothelial cell of vascular tree],disease[T.normal]:cell_type[T.fibroblast],disease[T.normal]:cell_type[T.luminal epithelial cell of mammary gland],disease[T.normal]:cell_type[T.mast cell],disease[T.normal]:cell_type[T.myeloid cell],disease[T.normal]:cell_type[T.pericyte]
P01_T cell,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
P12_T cell,1.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
P133_T cell,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
P134_T cell,1.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
P135_T cell,1.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P38_pericyte,1.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
P39_pericyte,1.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
P40_pericyte,1.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
P41_pericyte,1.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1


In [9]:
mod.test_contrasts(
    {"T cells": mod.cond(disease="breast cancer", cell_type="T cell") - mod.cond(disease="normal", cell_type="T cell")},
    {
        "fibroblasts": mod.cond(disease="breast cancer", cell_type="fibroblast")
        - mod.cond(disease="normal", cell_type="fibroblast")
    },
)

TypeError: BaseMethod.test_contrasts() takes 2 positional arguments but 3 were given