### reasoning:

1. I don't know if having cycling TME is useful in the spatial data? because for example cycling T cells contains lots of subtypes of T cells. 
2. Mast cells only present in Che-Cells. Not in Wu data in the publication and not in our BCI data. Upon integration, 7 Wu cells and 1 BCI cell get annotated as Mast.

So maybe we should estimate signatures without cycling TME and Mast cells

In [1]:
import scanpy as sc
import pandas as pd
from pathlib import Path
import anndata as ad
import numpy as np
import os

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

DPI = 300
FONTSIZE = 20  # 42

sc.settings.set_figure_params(
    scanpy=True, dpi=100, transparent=True, vector_friendly=True, dpi_save=DPI
)
from matplotlib import rcParams

rcParams["pdf.fonttype"] = 42

In [2]:
DIR2SAVE = Path(
    "/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/cell2location/prepareInputRNA/"
)
DIR2SAVE.mkdir(parents=True, exist_ok=True)

In [3]:
adata_liver = sc.read_h5ad(
    DIR2SAVE.joinpath("Multiome_Che_Wu_CRC_LM_annotations_raw.h5ad")
)

In [4]:
adata_liver.obs.Annotation_scVI_detailed.value_counts()

CD4 Th               10325
CD8 Tem               8683
Treg                  7655
CD8 Tex               7531
CD4 Th HSP            7246
CD8 Tem HSP           6178
CD4 Tn                4778
SPP1 Mac              4619
MAIT                  4463
TA1                   4116
CD8 Tem GZMB          3269
NK1                   3178
CD4 Tfh               3010
Neutrophil            2892
Hypoxia               2481
NK2                   2321
B                     2116
NLRP3 Mac             2075
C1QC Mac              2006
CD4 Th17              1937
Stem (NOTUM high)     1933
Colonocyte            1913
T cycling             1774
gdT                   1761
Plasma                1610
cDC2                  1543
Stem                  1400
NKT                   1380
Intermediate          1319
FCN1 Mono              999
pEMT                   998
UPR                    759
Kupffer                600
PLTP LYVE1 Mac         574
IL1B Mac               550
Myeloid cycling        513
ipEMT                  469
E

In [5]:
adata_liver.shape

(114321, 39559)

### Remove cycling TME

In [6]:
adata_liver.obs.Annotation_scVI_detailed.str.contains("cycling").sum()

2287

In [7]:
adata_liver[
    adata_liver.obs.Annotation_scVI_detailed.isin(["T cycling", "Myeloid cycling"])
]

View of AnnData object with n_obs × n_vars = 2287 × 39559
    obs: 'Patient', 'Sample', 'Tissue', 'Therapy', 'cell_source', 'Annotation_scVI', 'Annotation_scVI_detailed', 'Annotation_source', 'technology'
    var: 'n_cells-Nasrine', 'n_cells'

In [8]:
adata_no_cycling = adata_liver[
    ~adata_liver.obs.Annotation_scVI_detailed.isin(["T cycling", "Myeloid cycling"])
].copy()

In [9]:
### save to file
adata_no_cycling.write(
    DIR2SAVE.joinpath("Multiome_Che_Wu_CRC_LM_annotations_raw_no_cycling_TME.h5ad")
)

In [10]:
adata_no_cycling.shape

(112034, 39559)

### Remove mast

In [11]:
adata_no_cycling.obs.Annotation_scVI_detailed.str.contains("Mast").sum()

85

In [12]:
adata_no_cycling_mast = adata_no_cycling[
    adata_no_cycling.obs.Annotation_scVI_detailed != "Mast"
].copy()

In [13]:
adata_no_cycling_mast.shape

(111949, 39559)

In [14]:
adata_no_cycling_mast.obs.Annotation_scVI_detailed.value_counts()

CD4 Th               10325
CD8 Tem               8683
Treg                  7655
CD8 Tex               7531
CD4 Th HSP            7246
CD8 Tem HSP           6178
CD4 Tn                4778
SPP1 Mac              4619
MAIT                  4463
TA1                   4116
CD8 Tem GZMB          3269
NK1                   3178
CD4 Tfh               3010
Neutrophil            2892
Hypoxia               2481
NK2                   2321
B                     2116
NLRP3 Mac             2075
C1QC Mac              2006
CD4 Th17              1937
Stem (NOTUM high)     1933
Colonocyte            1913
gdT                   1761
Plasma                1610
cDC2                  1543
Stem                  1400
NKT                   1380
Intermediate          1319
FCN1 Mono              999
pEMT                   998
UPR                    759
Kupffer                600
PLTP LYVE1 Mac         574
IL1B Mac               550
ipEMT                  469
ECM CAF                441
HSP Mono               387
H

In [15]:
adata_no_cycling_mast.write(
    DIR2SAVE.joinpath(
        "Multiome_Che_Wu_CRC_LM_annotations_raw_no_cycling_TME_no_Mast.h5ad"
    )
)