In [3]:
from io import StringIO
import os

import numpy as np
import scanpy as sc
import pandas as pd

# Make minimal obs

## Read adata with no duplicates

In [4]:
h5ad = os.path.join(
    "/home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/",
    "tabula-muris-senis-droplet-official-raw-obj--no-duplicate-barcodes-per-seq-run--minimal-obs-unified-celltypes.h5ad",
) 
adata = sc.read(h5ad)
adata

AnnData object with n_obs × n_vars = 238915 × 20138
    obs: 'age', 'cell_id', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'n_genes', 'sex', 'subtissue', 'tissue', 'species', 'species_latin', 'channel', 'sequencing_run', 'cell_barcode', 'n_counts', 'individual', 'narrow_group', 'broad_group', 'compartment_group'
    var: 'n_cells'

## Read in re-annotated mouse data

In [5]:
# h5ad = "/home/olga/googledrive/TabulaMicrocebus/data/cross-species/TMS_limb_muscle_droplet_updated_annotations/tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle_updated-annotations.h5ad"
h5ad = "/home/olga/googledrive/TabulaMicrocebus/data/cross-species/unified_annotations/tabula-muris-senis_10X_Limb_Muscle_updated-annotations.h5ad"
muscle = sc.read(h5ad)
muscle

AnnData object with n_obs × n_vars = 28867 × 20138
    obs: 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'n_genes', 'sex', 'subtissue', 'tissue', 'tissue_free_annotation', 'n_counts', 'louvain', 'leiden', 'TMS_muscle_ADM_free_annotation', 'TMS_muscle_CE_compartment', 'TMS_muscle_CE_free_annotation'
    var: 'n_cells', 'means', 'dispersions', 'dispersions_norm', 'highly_variable'
    uns: 'TMS_muscle_CE_compartment_colors', 'TMS_muscle_CE_free_annotation_colors', 'age_colors', 'cell_ontology_class_colors', 'leiden', 'louvain', 'neighbors', 'pca'
    obsm: 'X_pca', 'X_tsne', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

In [6]:
muscle.obs.age.value_counts()

24m    7178
1m     5757
18m    5569
3m     3550
21m    3528
30m    3285
Name: age, dtype: int64

In [44]:
muscle.obs.query('TMS_muscle_CE_free_annotation == "slow muscle cell"').TMS_muscle_CE_free_annotation.value_counts().head(1)

slow muscle cell    5
Name: TMS_muscle_CE_free_annotation, dtype: int64

In [39]:
muscle.obs.query('TMS_muscle_CE_free_annotation == "slow muscle cell"').age.value_counts()

3m     2
24m    1
21m    1
1m     1
30m    0
18m    0
Name: age, dtype: int64

### Remove 1m and 3m data

In [7]:
muscle_no1m_no3m = muscle[~muscle.obs.age.isin(('1m', '3m'))]
muscle_no1m_no3m

View of AnnData object with n_obs × n_vars = 19560 × 20138
    obs: 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'n_genes', 'sex', 'subtissue', 'tissue', 'tissue_free_annotation', 'n_counts', 'louvain', 'leiden', 'TMS_muscle_ADM_free_annotation', 'TMS_muscle_CE_compartment', 'TMS_muscle_CE_free_annotation'
    var: 'n_cells', 'means', 'dispersions', 'dispersions_norm', 'highly_variable'
    uns: 'TMS_muscle_CE_compartment_colors', 'TMS_muscle_CE_free_annotation_colors', 'age_colors', 'cell_ontology_class_colors', 'leiden', 'louvain', 'neighbors', 'pca'
    obsm: 'X_pca', 'X_tsne', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

### Set `free_annotation` as the updated one to be consistent with lung

In [8]:
new_annotation_col = 'TMS_muscle_CE_free_annotation'

muscle_no1m_no3m.obs['free_annotation'] = muscle_no1m_no3m.obs[new_annotation_col]

Trying to set attribute `.obs` of view, copying.


In [36]:
muscle_no1m_no3m.obs.free_annotation.value_counts()

fibroadipogenic progenitor cell        6483
capillary cell                         2499
tendon cell                            1944
skeletal muscle satellite stem cell    1176
macrophage                              793
                                       ... 
T cell (PF CD4- CD8+)                     5
unknown_epithelial_stromal (KRT5+)        5
unknown_lymphoid (FCER1G+)                5
slow muscle cell                          2
tendon cell_chondrocyte (COL2A1+)         2
Name: free_annotation, Length: 61, dtype: int64

## Make new adata with muscle replaced

In [9]:
adata.obs.tissue.value_counts()

Marrow             39125
Spleen             34853
Limb_Muscle        28176
Lung               23802
Kidney             21038
Tongue             20271
Mammary_Gland      11954
Thymus              9082
Bladder             8752
Heart_and_Aorta     8253
Trachea             7901
Liver               7052
Fat                 6534
Pancreas            5969
Skin                4308
Large_Intestine     1845
Name: tissue, dtype: int64

In [10]:
adata_no_muscle = adata[adata.obs.tissue != "Limb_Muscle"]
adata_no_muscle

View of AnnData object with n_obs × n_vars = 210739 × 20138
    obs: 'age', 'cell_id', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'n_genes', 'sex', 'subtissue', 'tissue', 'species', 'species_latin', 'channel', 'sequencing_run', 'cell_barcode', 'n_counts', 'individual', 'narrow_group', 'broad_group', 'compartment_group'
    var: 'n_cells'

### Create muscle-only subset of original data and remove 1m and 3m

In [11]:
adata_muscle = adata[adata.obs.tissue == "Limb_Muscle"]
adata_muscle_no_1m_3m = adata_muscle[~adata_muscle.obs.age.isin(('1m', '3m'))]
adata_muscle_no_1m_3m

View of AnnData object with n_obs × n_vars = 19164 × 20138
    obs: 'age', 'cell_id', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'n_genes', 'sex', 'subtissue', 'tissue', 'species', 'species_latin', 'channel', 'sequencing_run', 'cell_barcode', 'n_counts', 'individual', 'narrow_group', 'broad_group', 'compartment_group'
    var: 'n_cells'

In [12]:
adata_muscle_no_1m_3m.obs['free_annotation'].head()

index
AAACCTGAGAAACCGC-1-14-0-0                 nan
AAACCTGCAATAACGA-1-14-0-0                 nan
AAACCTGCAGCGTAAG-1-14-0-0    chondrocyte-like
AAACCTGGTAGCTTGT-1-14-0-0    chondrocyte-like
AAAGCAAGTCTGGTCG-1-14-0-0                 nan
Name: free_annotation, dtype: category
Categories (3, object): ['chondrocyte-like', 'nan', 'unknown']

In [13]:
adata_muscle_no_1m_3m.obs['free_annotation'].value_counts()

nan                 14334
chondrocyte-like     3763
unknown              1067
Name: free_annotation, dtype: int64

### Update just the `free_annotation` column in the obs of the muscle data, no 1m or 3m

In [14]:
muscle_no1m_no3m.obs[new_annotation_col].value_counts()

fibroadipogenic progenitor cell        6483
capillary cell                         2499
tendon cell                            1944
skeletal muscle satellite stem cell    1176
macrophage                              793
                                       ... 
T cell (PF CD4- CD8+)                     5
unknown_epithelial_stromal (KRT5+)        5
unknown_lymphoid (FCER1G+)                5
slow muscle cell                          2
tendon cell_chondrocyte (COL2A1+)         2
Name: TMS_muscle_CE_free_annotation, Length: 61, dtype: int64

In [15]:
adata_muscle_no_1m_3m.obs['free_annotation'] = muscle_no1m_no3m.obs.loc[adata_muscle_no_1m_3m.obs.index, new_annotation_col]

Trying to set attribute `.obs` of view, copying.


In [16]:
muscle_no1m_no3m.obs.loc[adata_muscle_no_1m_3m.obs.index, new_annotation_col].value_counts()

fibroadipogenic progenitor cell        6349
capillary cell                         2458
tendon cell                            1899
skeletal muscle satellite stem cell    1161
macrophage                              773
                                       ... 
basophil                                  5
unknown_epithelial_stromal (KRT5+)        5
unknown_lymphoid (FCER1G+)                5
slow muscle cell                          2
tendon cell_chondrocyte (COL2A1+)         2
Name: TMS_muscle_CE_free_annotation, Length: 61, dtype: int64

### Concatenate

In [17]:
adata_updated_muscle = adata_no_muscle.concatenate(adata_muscle_no_1m_3m)
adata_updated_muscle

AnnData object with n_obs × n_vars = 229903 × 20138
    obs: 'age', 'cell_id', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'n_genes', 'sex', 'subtissue', 'tissue', 'species', 'species_latin', 'channel', 'sequencing_run', 'cell_barcode', 'n_counts', 'individual', 'narrow_group', 'broad_group', 'compartment_group', 'batch'
    var: 'n_cells'

## Add unified cell type groups: Muscle first

In [18]:
import unified_annotations

muscle_grouping = unified_annotations.get_celltype_converter(
    "Muscle",
    (
        "Mouse",
        "Tabula Muris Senis",
        f"{new_annotation_col} (remove 1 and 3 month old mice)",
    ),
)
print(muscle_grouping.shape)
muscle_grouping

(61, 4)


Unnamed: 0_level_0,narrow_group,broad_group,compartment_group,tissue
"(Mouse, Tabula Muris Senis, TMS_muscle_CE_free_annotation (remove 1 and 3 month old mice))",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
B cell,B cell,B cell,lymphoid,Muscle
B cell (ZBTB32+),B cell,B cell,lymphoid,Muscle
B cell (PF ZBTB32+),B cell (PF),B cell,lymphoid,Muscle
plasma cell,plasma cell,plasma cell,lymphoid,Muscle
T cell (CD4- CD8- TMEM176A+),T cell,T cell,lymphoid,Muscle
...,...,...,...,...
unknown_epithelial_stromal (KRT5+),unknown_epithelial_stromal (KRT5+),unknown_epithelial_stromal (KRT5+),epithelial,Muscle
doublets_endothelial_stromal,doublet,doublet,,Muscle
doublets_lymphoid_endothelial,doublet,doublet,,Muscle
doublets_myeloid_endothelial,doublet,doublet,,Muscle


## Concatenate grouping

In [19]:
obs_muscle = adata_updated_muscle.obs.query('tissue == "Limb_Muscle"')
# obs_muscle.free_annotation.cat.remove_unused_categories(inplace=True)
obs_muscle.free_annotation.value_counts().sort_index()

B cell                                             617
B cell (PF ZBTB32+)                                  6
B cell (ZBTB32+)                                   193
Schwann cell (NGFR+)                                51
T cell (CD4+ CD8-)                                  94
                                                  ... 
unknown_stromal (CLDN1+ COL9A2-)                   135
unknown_stromal (CLDN1- COL9A2+)                   180
vascular associated smooth muscle cell (ACTG2+)    105
vascular associated smooth muscle cell (PLN+)      128
vein cell                                          758
Name: free_annotation, Length: 61, dtype: int64

### Make sure all muscle cell subtypes are there

In [31]:
obs_muscle.free_annotation[obs_muscle.free_annotation.str.contains('muscle cell')].unique()

array(['fast muscle cell',
       'vascular associated smooth muscle cell (PLN+)',
       'pericyte_vascular associated smooth muscle cell',
       'vascular associated smooth muscle cell (ACTG2+)',
       'slow muscle cell'], dtype=object)

### How many of each muscle cell?

In [32]:
obs_muscle.free_annotation[obs_muscle.free_annotation.str.contains('muscle cell')].value_counts()

pericyte_vascular associated smooth muscle cell    184
fast muscle cell                                   144
vascular associated smooth muscle cell (PLN+)      128
vascular associated smooth muscle cell (ACTG2+)    105
slow muscle cell                                     2
Name: free_annotation, dtype: int64

## Replace only the Muscle's `narrow_group`, `broad_group`, `comaprtment_group`

In [33]:
obs_muscle[muscle_grouping.columns] = 'nan'
obs_muscle.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0_level_0,age,cell_id,cell_ontology_class,cell_ontology_id,free_annotation,n_genes,sex,subtissue,tissue,species,species_latin,channel,sequencing_run,cell_barcode,n_counts,individual,narrow_group,broad_group,compartment_group,batch
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AAACCTGAGAAACCGC-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGAGAAACCGC,macrophage,,classical monocyte,2506.0,female,Pre-Sort,,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGAGAAACCGC,1,mouse_10_18-F-50,,,,1
AAACCTGCAATAACGA-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGCAATAACGA,endothelial cell,,capillary cell,1361.0,female,Pre-Sort,,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGCAATAACGA,1,mouse_10_18-F-50,,,,1
AAACCTGCAGCGTAAG-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGCAGCGTAAG,mesenchymal stem cell,,tendon cell,1482.0,female,Pre-Sort,,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGCAGCGTAAG,1,mouse_10_18-F-50,,,,1
AAACCTGGTAGCTTGT-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGGTAGCTTGT,mesenchymal stem cell,,fibroadipogenic progenitor cell,1652.0,female,Pre-Sort,,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGGTAGCTTGT,1,mouse_10_18-F-50,,,,1
AAAGCAAGTCTGGTCG-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAAGCAAGTCTGGTCG,mesenchymal stem cell,,fibroadipogenic progenitor cell,1069.0,female,Pre-Sort,,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAAGCAAGTCTGGTCG,1,mouse_10_18-F-50,,,,1


In [34]:
for x in sorted(muscle_grouping.index.symmetric_difference(obs_muscle.free_annotation.unique())):
    print(x)

In [35]:
for free_annotation, df in obs_muscle.groupby("free_annotation"):
    for group_name, group_value in muscle_grouping.loc[free_annotation].items():
        obs_muscle.loc[df.index, group_name] = obs_muscle.loc[
            df.index, group_name
        ].replace({"nan": group_value}).astype(str)
        
obs_muscle.narrow_group.value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


fibroadipogenic progenitor cell                         6349
capillary cell                                          2458
tendon cell                                             1899
skeletal muscle satellite stem cell                     1161
B cell                                                   810
macrophage                                               773
vein cell                                                758
artery cell                                              752
doublet                                                  590
artery cell_capillary cell (RBP7+)                       279
fibroadipogenic progenitor cell_tendon cell (SFRP2+)     271
lymphatic cell                                           261
classical monocyte                                       239
vascular associated smooth muscle cell                   233
T cell (CD8+)                                            230
unknown_myeloid (CD209A+)                                224
pericyte_vascular associ

In [25]:
sorted(obs_muscle.narrow_group.unique())

['B cell',
 'B cell (PF)',
 'Schwann cell (NGFR+)',
 'T cell',
 'T cell (CD4+)',
 'T cell (CD8+)',
 'T cell (PF CD4+)',
 'T cell (PF CD8+)',
 'T cell (regulatory CD4+)',
 'artery cell',
 'artery cell_capillary cell (RBP7+)',
 'basophil',
 'capillary cell',
 'classical monocyte',
 'conventional dendritic cell',
 'conventional dendritic cell (PF)',
 'doublet',
 'erythroid lineage cell',
 'fast muscle cell',
 'fibroadipogenic progenitor cell',
 'fibroadipogenic progenitor cell_tendon cell (SFRP2+)',
 'innate lymphoid cell',
 'lymphatic cell',
 'macrophage',
 'mature dendritic cell',
 'myelinating Schwann cell',
 'natural killer cell',
 'neutrophil',
 'neutrophil (PF)',
 'neutrophil_monocyte',
 'non-classical monocyte',
 'pericyte',
 'pericyte_vascular associated smooth muscle cell',
 'plasma cell',
 'plasmacytoid dendritic cell',
 'skeletal muscle satellite stem cell',
 'skeletal muscle satellite stem cell (MYOG+)',
 'slow muscle cell',
 'tendon cell',
 'tendon cell_chondrocyte (COL2A1+)'

In [26]:
obs_muscle.head()

Unnamed: 0_level_0,age,cell_id,cell_ontology_class,cell_ontology_id,free_annotation,n_genes,sex,subtissue,tissue,species,species_latin,channel,sequencing_run,cell_barcode,n_counts,individual,narrow_group,broad_group,compartment_group,batch
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AAACCTGAGAAACCGC-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGAGAAACCGC,macrophage,,classical monocyte,2506.0,female,Pre-Sort,Muscle,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGAGAAACCGC,1,mouse_10_18-F-50,classical monocyte,monocyte,myeloid,1
AAACCTGCAATAACGA-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGCAATAACGA,endothelial cell,,capillary cell,1361.0,female,Pre-Sort,Muscle,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGCAATAACGA,1,mouse_10_18-F-50,capillary cell,capillary cell,endothelial,1
AAACCTGCAGCGTAAG-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGCAGCGTAAG,mesenchymal stem cell,,tendon cell,1482.0,female,Pre-Sort,Muscle,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGCAGCGTAAG,1,mouse_10_18-F-50,tendon cell,tendon cell,stromal,1
AAACCTGGTAGCTTGT-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAACCTGGTAGCTTGT,mesenchymal stem cell,,fibroadipogenic progenitor cell,1652.0,female,Pre-Sort,Muscle,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAACCTGGTAGCTTGT,1,mouse_10_18-F-50,fibroadipogenic progenitor cell,fibroadipogenic progenitor cell,stromal,1
AAAGCAAGTCTGGTCG-1-14-0-0-1,18m,MACA_18m_F_MUSCLE_50_pre_sort_AAAGCAAGTCTGGTCG,mesenchymal stem cell,,fibroadipogenic progenitor cell,1069.0,female,Pre-Sort,Muscle,Mouse,Mus musculus,MACA_18m_F_MUSCLE_50_pre_sort,171005_A00111_0072_AH3YLGDMXX,AAAGCAAGTCTGGTCG,1,mouse_10_18-F-50,fibroadipogenic progenitor cell,fibroadipogenic progenitor cell,stromal,1


## Add new obs

In [27]:
adata_updated_muscle_new_obs = adata_updated_muscle.copy()
adata_updated_muscle_new_obs.obs.loc[obs_muscle.index] = obs_muscle

# Write mouse adata with minimal adata and new grouping

In [28]:
h5ad = os.path.join(
    "/home/olga/data_lg/data_sm_copy/czb-tabula-muris-senis/Data-objects/",
    "tabula-muris-senis-droplet-official-raw-obj--no-duplicate-barcodes-per-seq-run--minimal-obs-unified-celltypes--june2021.h5ad",
) 
adata_updated_muscle_new_obs.write(h5ad)

... storing 'age' as categorical
... storing 'cell_ontology_class' as categorical
... storing 'cell_ontology_id' as categorical
... storing 'free_annotation' as categorical
... storing 'subtissue' as categorical
... storing 'tissue' as categorical
... storing 'channel' as categorical
... storing 'sequencing_run' as categorical
... storing 'cell_barcode' as categorical
... storing 'individual' as categorical
... storing 'narrow_group' as categorical
... storing 'broad_group' as categorical
... storing 'compartment_group' as categorical


In [29]:
adata_updated_muscle_new_obs.obs.individual.value_counts()

mouse_20_30-M-2     24369
mouse_15_21-F-55    18359
mouse_12_18-M-52    17441
mouse_14_21-F-54    16285
mouse_3_3-F-56      13089
mouse_2_1-M-63      12977
mouse_23_30-M-5     12484
mouse_16_24-M-58    11883
mouse_17_24-M-59    11838
mouse_10_18-F-50    11808
mouse_11_18-F-51     9571
mouse_18_24-M-60     9497
mouse_21_30-M-3      9286
mouse_22_30-M-4      7768
mouse_1_1-M-62       6598
mouse_4_3-F-57       6497
mouse_7_3-M-8        6189
mouse_6_3-M-7/8      5235
mouse_9_3-M-9        4897
mouse_5_3-M-5/6      4880
mouse_13_18-M-53     4837
mouse_19_24-M-61     3665
mouse_8_3-M-8/9       450
Name: individual, dtype: int64

In [30]:
def check_celltype_presence(
    adata,
    celltypes=("fast muscle cell", "slow muscle cell"),
    celltype_col="narrow_group",
):
    for celltype in celltypes:
        assert celltype in set(adata.obs[celltype_col])


check_celltype_presence(adata_updated_muscle_new_obs)