# Aims
- Read in the 2 diseased heart objects and the atlas object (subset to just LV data)
- Ensure raw counts are in .X
- Ensure var names are ensemblIDs
- Ensure there `donor`, `sex`, `age`, `condition`, `publication`, `kit_10x`, `cell_or_nuclei` metadata at least
- Concatenate

In [1]:
import scanpy as sc
import os
import json
import sys
import anndata

In [2]:
sys.path.append("/lustre/scratch126/cellgen/team205/jc48/jupyter/jctk") # add the directory containing the cloned jctk package to Python's path
import inspect
from jctk import api, utils

print("API module function names:\n", [f for f in dir(api) if inspect.isfunction(getattr(api, f))], "\n")
print("Utils module function names:\n", [f for f in dir(utils) if inspect.isfunction(getattr(utils, f))], "\n")

API module function names:
 ['getClinVargenes', 'getHGNCgroup', 'getHGNCgroup_dict'] 

Utils module function names:
 ['add_genomic_coordinates', 'check_update', 'compute_median_count', 'downsample_adata_proportionately', 'downsample_adata_randomly', 'lognorm_to_counts_with_progress', 'rank_genes_vs_nearest', 'rename_adata_var_index', 'shapiro', 'test_normality'] 



# Prepare Chaffin22 data

In [3]:
Chaffin22=sc.read("/lustre/scratch126/cellgen/team205/heart/objects/hypersampling/diseased/RNA/Chaffin22/Chaffin22.h5ad")
Chaffin22

AnnData object with n_obs × n_vars = 592689 × 36601
    obs: 'biosample_id', 'donor_id', 'disease', 'sex', 'age', 'lvef', 'cell_type_leiden0.6', 'SubCluster', 'cellbender_ncount', 'cellbender_ngenes', 'cellranger_percent_mito', 'exon_prop', 'cellbender_entropy', 'cellranger_doublet_scores'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'SubCluster_colors', 'biosample_id_colors', 'cell_type_leiden0.6_colors', 'disease_colors', 'donor_id_colors', 'log1p', 'sex_colors'
    obsm: 'X_umap'
    layers: 'cellbender_adjusted_counts', 'cellranger_raw'

In [4]:
Chaffin22.X.data[:10]

array([0.8174079 , 1.8014603 , 0.8174079 , 2.0738537 , 0.8174079 ,
       0.48999685, 0.8174079 , 0.48999685, 0.8174079 , 0.48999685],
      dtype=float32)

In [5]:
Chaffin22.X=Chaffin22.layers["cellbender_adjusted_counts"].copy()

In [6]:
Chaffin22.X.data[:10]

array([ 2.,  8.,  2., 11.,  2.,  1.,  2.,  1.,  2.,  1.], dtype=float32)

In [7]:
Chaffin22.obs['publication']="Chaffin22"

In [8]:
Chaffin22.obs['donor']=Chaffin22.obs['donor_id']

In [9]:
Chaffin22.obs['region']="LV"

In [10]:
Chaffin22.obs['disease'].value_counts()

HCM    235252
NF     185441
DCM    171996
Name: disease, dtype: int64

In [11]:
Chaffin22.obs['condition'] = Chaffin22.obs['disease'].apply(lambda x: 'healthy' if x == 'NF' else x)

In [12]:
Chaffin22.obs['cell_or_nuclei']="Nuclei"

In [13]:
Chaffin22.obs['kit_10x']="3prime-v3"

In [14]:
Chaffin22.obs['cell_type']=Chaffin22.obs['cell_type_leiden0.6']
Chaffin22.obs['cell_state']=Chaffin22.obs['SubCluster']

In [15]:
Chaffin22.obs

Unnamed: 0,biosample_id,donor_id,disease,sex,age,lvef,cell_type_leiden0.6,SubCluster,cellbender_ncount,cellbender_ngenes,...,cellbender_entropy,cellranger_doublet_scores,publication,donor,region,condition,cell_or_nuclei,kit_10x,cell_type,cell_state
TTCTTCCGTTCAACGT-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-X1,15815.0,4632,...,7.503471,0.091892,Chaffin22,P1622,LV,healthy,Nuclei,3prime-v3,Cardiomyocyte_I,CM-X1
CATCCACCATCTAACG-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-HHATL,15546.0,4673,...,7.582058,0.101727,Chaffin22,P1622,LV,healthy,Nuclei,3prime-v3,Cardiomyocyte_I,CM-HHATL
ACCCAAACAGCTAACT-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-HHATL,14983.0,4432,...,7.466405,0.089618,Chaffin22,P1622,LV,healthy,Nuclei,3prime-v3,Cardiomyocyte_I,CM-HHATL
AAGGAATCAACTGGTT-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-X1,14995.0,4483,...,7.452266,0.071875,Chaffin22,P1622,LV,healthy,Nuclei,3prime-v3,Cardiomyocyte_I,CM-X1
TACCCGTAGCGTGCTC-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-HHATL,14797.0,4690,...,7.609082,0.063736,Chaffin22,P1622,LV,healthy,Nuclei,3prime-v3,Cardiomyocyte_I,CM-HHATL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTATTGCGTCGGTGTC-1-79,LV_1561_2_nf,P1561,NF,male,65.0,60.0,Endothelial_I,EC-Lymphatic,403.0,291,...,6.691553,0.038961,Chaffin22,P1561,LV,healthy,Nuclei,3prime-v3,Endothelial_I,EC-Lymphatic
GTCACGGGTTGTATGC-1-79,LV_1561_2_nf,P1561,NF,male,65.0,60.0,Endothelial_I,EC-PKD1L1,494.0,349,...,6.791491,0.032258,Chaffin22,P1561,LV,healthy,Nuclei,3prime-v3,Endothelial_I,EC-PKD1L1
GTCATGATCTTTCGAT-1-79,LV_1561_2_nf,P1561,NF,male,65.0,60.0,Endothelial_I,EC-Lymphatic,502.0,404,...,7.409244,0.012023,Chaffin22,P1561,LV,healthy,Nuclei,3prime-v3,Endothelial_I,EC-Lymphatic
GCGATCGTCAGAGTGG-1-79,LV_1561_2_nf,P1561,NF,male,65.0,60.0,Endothelial_I,EC-Lymphatic,423.0,343,...,7.284163,0.009591,Chaffin22,P1561,LV,healthy,Nuclei,3prime-v3,Endothelial_I,EC-Lymphatic


In [16]:
Chaffin22.var

Unnamed: 0,gene_ids,feature_types,genome
MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38-2020-A_premrna
FAM138A,ENSG00000237613,Gene Expression,GRCh38-2020-A_premrna
OR4F5,ENSG00000186092,Gene Expression,GRCh38-2020-A_premrna
AL627309.1,ENSG00000238009,Gene Expression,GRCh38-2020-A_premrna
AL627309.3,ENSG00000239945,Gene Expression,GRCh38-2020-A_premrna
...,...,...,...
AC141272.1,ENSG00000277836,Gene Expression,GRCh38-2020-A_premrna
AC023491.2,ENSG00000278633,Gene Expression,GRCh38-2020-A_premrna
AC007325.1,ENSG00000276017,Gene Expression,GRCh38-2020-A_premrna
AC007325.4,ENSG00000278817,Gene Expression,GRCh38-2020-A_premrna


In [17]:
utils.rename_adata_var_index(Chaffin22,"gene_ids")

AnnData expects .var.index to contain strings, but got values like:
    ['ENSG00000243485', 'ENSG00000237613', 'ENSG00000186092', 'ENSG00000238009', 'ENSG00000239945']

    Inferred to be: categorical

  names = self._prep_dim_index(names, "var")


AnnData object with n_obs × n_vars = 592689 × 36601
    obs: 'biosample_id', 'donor_id', 'disease', 'sex', 'age', 'lvef', 'cell_type_leiden0.6', 'SubCluster', 'cellbender_ncount', 'cellbender_ngenes', 'cellranger_percent_mito', 'exon_prop', 'cellbender_entropy', 'cellranger_doublet_scores', 'publication', 'donor', 'region', 'condition', 'cell_or_nuclei', 'kit_10x', 'cell_type', 'cell_state'
    var: 'original_index', 'gene_ids', 'feature_types', 'genome'
    uns: 'SubCluster_colors', 'biosample_id_colors', 'cell_type_leiden0.6_colors', 'disease_colors', 'donor_id_colors', 'log1p', 'sex_colors'
    obsm: 'X_umap'
    layers: 'cellbender_adjusted_counts', 'cellranger_raw'

In [18]:
Chaffin22.var

Unnamed: 0_level_0,original_index,gene_ids,feature_types,genome
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,MIR1302-2HG,ENSG00000243485,Gene Expression,GRCh38-2020-A_premrna
ENSG00000237613,FAM138A,ENSG00000237613,Gene Expression,GRCh38-2020-A_premrna
ENSG00000186092,OR4F5,ENSG00000186092,Gene Expression,GRCh38-2020-A_premrna
ENSG00000238009,AL627309.1,ENSG00000238009,Gene Expression,GRCh38-2020-A_premrna
ENSG00000239945,AL627309.3,ENSG00000239945,Gene Expression,GRCh38-2020-A_premrna
...,...,...,...,...
ENSG00000277836,AC141272.1,ENSG00000277836,Gene Expression,GRCh38-2020-A_premrna
ENSG00000278633,AC023491.2,ENSG00000278633,Gene Expression,GRCh38-2020-A_premrna
ENSG00000276017,AC007325.1,ENSG00000276017,Gene Expression,GRCh38-2020-A_premrna
ENSG00000278817,AC007325.4,ENSG00000278817,Gene Expression,GRCh38-2020-A_premrna


# Prepare Reichart22 data

In [19]:
Reichart22=sc.read("/lustre/scratch126/cellgen/team205/heart/objects/hypersampling/diseased/RNA/Reichart22/Reichart22.h5ad")
Reichart22

AnnData object with n_obs × n_vars = 881081 × 33234
    obs: 'Sample', 'donor_id', 'Region_x', 'Primary.Genetic.Diagnosis', 'n_genes', 'n_counts', 'percent_mito', 'percent_ribo', 'scrublet_score_z', 'scrublet_score_log', 'solo_score', 'cell_states', 'Assigned', 'self_reported_ethnicity_ontology_term_id', 'disease_ontology_term_id', 'cell_type_ontology_term_id', 'sex_ontology_term_id', 'assay_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'tissue_ontology_term_id', 'development_stage_ontology_term_id', 'suspension_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage'
    var: 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype'
    uns: 'Primary.Genetic.Diagnosis_colors', 'Region_x_colors', 'cell_states_colors', 'cell_type_colors', 'cell_type_ontology_term_id_colors', 'leiden', 'neighbors', 'pca', 'schema_version', 'title', 'umap'
    obsm: 'X_pca', 'X_umap'
    layers: 'counts'

In [20]:
Reichart22.X.data[:10]

array([2.1988854, 1.3006421, 1.3006421, 1.8474003, 1.3006421, 1.3006421,
       2.1988854, 1.3006421, 1.3006421, 1.3006421], dtype=float32)

In [21]:
Reichart22.X=Reichart22.layers["counts"].copy()

In [22]:
Reichart22.X.data[:10]

array([1, 1, 1, 1, 1, 2, 1, 1, 1, 1], dtype=int32)

In [23]:
Reichart22.obs['publication']="Reichart22"

In [24]:
Reichart22.obs["tissue"].value_counts()

heart left ventricle       419113
heart right ventricle      230723
interventricular septum    206873
apex of heart               24372
Name: tissue, dtype: int64

In [25]:
Reichart22=Reichart22[Reichart22.obs.tissue!="heart right ventricle"]
Reichart22

View of AnnData object with n_obs × n_vars = 650358 × 33234
    obs: 'Sample', 'donor_id', 'Region_x', 'Primary.Genetic.Diagnosis', 'n_genes', 'n_counts', 'percent_mito', 'percent_ribo', 'scrublet_score_z', 'scrublet_score_log', 'solo_score', 'cell_states', 'Assigned', 'self_reported_ethnicity_ontology_term_id', 'disease_ontology_term_id', 'cell_type_ontology_term_id', 'sex_ontology_term_id', 'assay_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'tissue_ontology_term_id', 'development_stage_ontology_term_id', 'suspension_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'publication'
    var: 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype'
    uns: 'Primary.Genetic.Diagnosis_colors', 'Region_x_colors', 'cell_states_colors', 'cell_type_colors', 'cell_type_ontology_term_id_colors', 'leiden', 'neighbors', 'pca', 'schema_version', 'title', 'umap'
    obsm: 'X_pca', 'X_umap'


In [26]:
def add_region_column(adata):
    # Define a dictionary to map tissue values to region values
    tissue_to_region = {
        "heart left ventricle": "LV",
        "interventricular septum": "SP",
        "apex of heart": "AX"
    }

    # Create a new column 'region' by mapping the 'tissue' column using the dictionary
    adata.obs['region'] = adata.obs['tissue'].map(tissue_to_region)

    return adata

# Assuming Reichart22 is your anndata object, apply the function to add the 'region' column
Reichart22 = add_region_column(Reichart22)

  adata.obs['region'] = adata.obs['tissue'].map(tissue_to_region)


In [27]:
Reichart22.obs["region"].value_counts()

LV    419113
SP    206873
AX     24372
Name: region, dtype: int64

In [28]:
Reichart22.obs["disease"].value_counts()

dilated cardiomyopathy                             349797
normal                                             219149
arrhythmogenic right ventricular cardiomyopathy     73849
non-compaction cardiomyopathy                        7563
Name: disease, dtype: int64

In [29]:
def add_condition_column(adata):
    # Define a dictionary to map disease values to condition values
    disease_to_condition = {
        "normal": "healthy",
        "arrhythmogenic right ventricular cardiomyopathy": "ARVC",
        "dilated cardiomyopathy": "DCM",
        "non-compaction cardiomyopathy": "LVNC"
    }

    # Create a new column 'condition' by mapping the 'disease' column using the dictionary
    adata.obs['condition'] = adata.obs['disease'].map(disease_to_condition)

    return adata

# Assuming Reichart22 is your anndata object, apply the function to add the 'condition' column
Reichart22 = add_condition_column(Reichart22)

In [30]:
Reichart22.obs["condition"].value_counts()

DCM        349797
healthy    219149
ARVC        73849
LVNC         7563
Name: condition, dtype: int64

In [31]:
Reichart22=Reichart22[Reichart22.obs.condition.isin(["healthy","DCM"])]

In [32]:
def add_age_column(adata):
    # Define a dictionary to map development stages to the average age
    stage_to_age = {
        "fifth decade human stage": 45,
        "sixth decade human stage": 55,
        "seventh decade human stage": 65,
        "fourth decade human stage": 35,
        "eighth decade human stage": 75,
        "adolescent stage": 15,  # Assuming adolescent refers to ages 10-19
        "young adult stage": 25,  # Assuming young adult refers to ages 20-29
        "third decade human stage": 25,  # Overlaps with young adult stage
        "infant stage": 5,  # Assuming infant refers to ages 0-9
        "child stage": 5  # Overlaps with infant stage
    }
    
    # Create a new column 'age' by mapping the 'development_stage' column using the dictionary
    adata.obs['age'] = adata.obs['development_stage'].map(stage_to_age)
    
    return adata

# Assuming Reichart22 is your anndata object, apply the function to add the 'age' column
Reichart22 = add_age_column(Reichart22)

  adata.obs['age'] = adata.obs['development_stage'].map(stage_to_age)


In [33]:
Reichart22.obs['cell_type']=Reichart22.obs['cell_type']
Reichart22.obs['cell_state']=Reichart22.obs['cell_states']

In [34]:
Reichart22.obs

Unnamed: 0,Sample,donor_id,Region_x,Primary.Genetic.Diagnosis,n_genes,n_counts,percent_mito,percent_ribo,scrublet_score_z,scrublet_score_log,...,organism,sex,tissue,self_reported_ethnicity,development_stage,publication,region,condition,age,cell_state
2428,BS_H25_S00_premrna,H3,LV,control,649,907,0.004410,0.002205,0.017023,0.129609,...,Homo sapiens,male,interventricular septum,Asian,sixth decade human stage,Reichart22,SP,healthy,55,SMC1.2
2429,BS_H25_S00_premrna,H3,LV,control,1479,2659,0.000376,0.002256,0.033552,0.022799,...,Homo sapiens,male,interventricular septum,Asian,sixth decade human stage,Reichart22,SP,healthy,55,vCM1.0
2430,BS_H25_S00_premrna,H3,LV,control,716,1157,0.000864,0.000864,0.043906,0.000817,...,Homo sapiens,male,interventricular septum,Asian,sixth decade human stage,Reichart22,SP,healthy,55,vCM2
2431,BS_H25_S00_premrna,H3,LV,control,753,1055,0.007583,0.000000,0.094017,0.066094,...,Homo sapiens,male,interventricular septum,Asian,sixth decade human stage,Reichart22,SP,healthy,55,EC8.0
2432,BS_H25_S00_premrna,H3,LV,control,429,603,0.001658,0.001658,0.021382,0.021382,...,Homo sapiens,male,interventricular septum,Asian,sixth decade human stage,Reichart22,SP,healthy,55,PC1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
878977,IC_H04_LV0_premrna,IC_H04,LV,PVneg,2292,4195,0.003337,0.002145,0.017964,0.007822,...,Homo sapiens,female,heart left ventricle,European,seventh decade human stage,Reichart22,LV,DCM,65,EC2.0
878978,IC_H04_LV0_premrna,IC_H04,LV,PVneg,446,516,0.000000,0.000000,0.014629,0.006115,...,Homo sapiens,female,heart left ventricle,European,seventh decade human stage,Reichart22,LV,DCM,65,vFB3
878979,IC_H04_LV0_premrna,IC_H04,LV,PVneg,2756,6429,0.001400,0.001089,0.016749,0.007822,...,Homo sapiens,female,heart left ventricle,European,seventh decade human stage,Reichart22,LV,DCM,65,vFB2
878980,IC_H04_LV0_premrna,IC_H04,LV,PVneg,2618,6587,0.003492,0.001670,0.014629,0.010629,...,Homo sapiens,female,heart left ventricle,European,seventh decade human stage,Reichart22,LV,DCM,65,PC_lowQC


In [35]:
Reichart22.obs["donor"]=Reichart22.obs["donor_id"]

In [36]:
Reichart22.obs['cell_or_nuclei']="Nuclei"

In [37]:
Reichart22.obs['kit_10x']="3prime-v3"

In [38]:
Reichart22.obs

Unnamed: 0,Sample,donor_id,Region_x,Primary.Genetic.Diagnosis,n_genes,n_counts,percent_mito,percent_ribo,scrublet_score_z,scrublet_score_log,...,self_reported_ethnicity,development_stage,publication,region,condition,age,cell_state,donor,cell_or_nuclei,kit_10x
2428,BS_H25_S00_premrna,H3,LV,control,649,907,0.004410,0.002205,0.017023,0.129609,...,Asian,sixth decade human stage,Reichart22,SP,healthy,55,SMC1.2,H3,Nuclei,3prime-v3
2429,BS_H25_S00_premrna,H3,LV,control,1479,2659,0.000376,0.002256,0.033552,0.022799,...,Asian,sixth decade human stage,Reichart22,SP,healthy,55,vCM1.0,H3,Nuclei,3prime-v3
2430,BS_H25_S00_premrna,H3,LV,control,716,1157,0.000864,0.000864,0.043906,0.000817,...,Asian,sixth decade human stage,Reichart22,SP,healthy,55,vCM2,H3,Nuclei,3prime-v3
2431,BS_H25_S00_premrna,H3,LV,control,753,1055,0.007583,0.000000,0.094017,0.066094,...,Asian,sixth decade human stage,Reichart22,SP,healthy,55,EC8.0,H3,Nuclei,3prime-v3
2432,BS_H25_S00_premrna,H3,LV,control,429,603,0.001658,0.001658,0.021382,0.021382,...,Asian,sixth decade human stage,Reichart22,SP,healthy,55,PC1,H3,Nuclei,3prime-v3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
878977,IC_H04_LV0_premrna,IC_H04,LV,PVneg,2292,4195,0.003337,0.002145,0.017964,0.007822,...,European,seventh decade human stage,Reichart22,LV,DCM,65,EC2.0,IC_H04,Nuclei,3prime-v3
878978,IC_H04_LV0_premrna,IC_H04,LV,PVneg,446,516,0.000000,0.000000,0.014629,0.006115,...,European,seventh decade human stage,Reichart22,LV,DCM,65,vFB3,IC_H04,Nuclei,3prime-v3
878979,IC_H04_LV0_premrna,IC_H04,LV,PVneg,2756,6429,0.001400,0.001089,0.016749,0.007822,...,European,seventh decade human stage,Reichart22,LV,DCM,65,vFB2,IC_H04,Nuclei,3prime-v3
878980,IC_H04_LV0_premrna,IC_H04,LV,PVneg,2618,6587,0.003492,0.001670,0.014629,0.010629,...,European,seventh decade human stage,Reichart22,LV,DCM,65,PC_lowQC,IC_H04,Nuclei,3prime-v3


In [39]:
Reichart22.var

Unnamed: 0_level_0,feature_is_filtered,feature_name,feature_reference,feature_biotype
gene_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSG00000243485,False,MIR1302-2HG,NCBITaxon:9606,gene
ENSG00000237613,False,FAM138A,NCBITaxon:9606,gene
ENSG00000186092,False,OR4F5,NCBITaxon:9606,gene
ENSG00000238009,False,RP11-34P13.7,NCBITaxon:9606,gene
ENSG00000239945,False,RP11-34P13.8,NCBITaxon:9606,gene
...,...,...,...,...
ENSG00000277856,False,ENSG00000277856.1,NCBITaxon:9606,gene
ENSG00000275063,False,ENSG00000275063.1,NCBITaxon:9606,gene
ENSG00000271254,False,ENSG00000271254.6,NCBITaxon:9606,gene
ENSG00000277475,False,ENSG00000277475.1,NCBITaxon:9606,gene


# Prepare atlas

In [40]:
EightRegions=sc.read("/lustre/scratch126/cellgen/team205/heart/objects/hypersampling/RNA/8regions/RNA_adult-8reg_full_raw_cellstate-annotated.h5ad")
EightRegions

AnnData object with n_obs × n_vars = 704296 × 32732
    obs: 'sangerID', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'scrublet_score', 'scrublet_leiden', 'cluster_scrublet_score', 'doublet_pval', 'doublet_bh_pval', 'batch_key', 'leiden_scVI', 'cell_type', 'cell_state_HCAv1', 'cell_state_scNym', 'cell_state_scNym_confidence', 'cell_state', 'latent_RT_efficiency', 'latent_cell_probability', 'latent_scale', 'n_counts', '_scvi_batch', '_scvi_labels', 'clus20', 'doublet_cls', 'original_or_new', 'batch', 'scANVI_predictions', 'leiden_scArches'
    var: 'gene_name-new', 'gene_name_scRNA-0-original', 'gene_name_snRNA-1-original', 'gene_name_multiome-2-original'
    uns: 'age_colors', 'cell_or_nuclei_colors', 'cell_state_colors', 'cell_type_colors', 'donor_colors', 'donor_t

In [41]:
EightRegions.obs.region.value_counts()

LV     148429
RV     104454
AX     102221
SP      99024
LA      74501
SAN     69153
RA      57495
AVN     49019
Name: region, dtype: int64

In [42]:
EightRegions=EightRegions[EightRegions.obs.region.isin(["LV","AX","SP"])]
EightRegions

View of AnnData object with n_obs × n_vars = 349674 × 32732
    obs: 'sangerID', 'combinedID', 'donor', 'donor_type', 'region', 'region_finest', 'age', 'gender', 'facility', 'cell_or_nuclei', 'modality', 'kit_10x', 'flushed', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'scrublet_score', 'scrublet_leiden', 'cluster_scrublet_score', 'doublet_pval', 'doublet_bh_pval', 'batch_key', 'leiden_scVI', 'cell_type', 'cell_state_HCAv1', 'cell_state_scNym', 'cell_state_scNym_confidence', 'cell_state', 'latent_RT_efficiency', 'latent_cell_probability', 'latent_scale', 'n_counts', '_scvi_batch', '_scvi_labels', 'clus20', 'doublet_cls', 'original_or_new', 'batch', 'scANVI_predictions', 'leiden_scArches'
    var: 'gene_name-new', 'gene_name_scRNA-0-original', 'gene_name_snRNA-1-original', 'gene_name_multiome-2-original'
    uns: 'age_colors', 'cell_or_nuclei_colors', 'cell_state_colors', 'cell_type_colors', 'donor_colors', 

In [43]:
EightRegions.X.data[:10]

array([1., 1., 1., 1., 2., 1., 1., 1., 5., 1.], dtype=float32)

In [44]:
EightRegions.obs['publication']="Kanemaru22"

  EightRegions.obs['publication']="Kanemaru22"


In [45]:
EightRegions.obs['condition']="healthy"

In [46]:
EightRegions.obs['sex']=EightRegions.obs['gender']

In [47]:
import numpy as np

def convert_age_to_numeric(adata):
    # Function to convert age value to numeric, taking the mean of age bands
    def age_to_numeric(age):
        if isinstance(age, str):
            # Split the string by '-' and take the mean of the two values
            age_values = age.split('-')
            return np.mean([float(val) for val in age_values])
        else:
            # If the age is already numeric, return it as is
            return age

    # Apply the function to each value in the 'age' column
    adata.obs['age'] = adata.obs['age'].apply(age_to_numeric)
    
    return adata

# Assuming adata is your anndata object, apply the function to convert age values to numeric
EightRegions = convert_age_to_numeric(EightRegions)

In [48]:
EightRegions.obs

Unnamed: 0_level_0,sangerID,combinedID,donor,donor_type,region,region_finest,age,gender,facility,cell_or_nuclei,...,_scvi_labels,clus20,doublet_cls,original_or_new,batch,scANVI_predictions,leiden_scArches,publication,condition,sex
barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HCAHeart7606896_GATGAGGCACGGCTAC,HCAHeart7606896,na,D1,DBD,AX,AX,52.5,Female,Sanger,Cell,...,,No,,original,D1_Cell_3prime-v2,Endothelial cell,10,Kanemaru22,healthy,Female
HCAHeart7606896_CGCTTCACATTTGCCC,HCAHeart7606896,na,D1,DBD,AX,AX,52.5,Female,Sanger,Cell,...,,No,,original,D1_Cell_3prime-v2,Mural cell,18,Kanemaru22,healthy,Female
HCAHeart7606896_GTTAAGCAGAGACTAT,HCAHeart7606896,na,D1,DBD,AX,AX,52.5,Female,Sanger,Cell,...,,No,,original,D1_Cell_3prime-v2,Endothelial cell,10,Kanemaru22,healthy,Female
HCAHeart7606896_TCGCGTTGTAAGAGGA,HCAHeart7606896,na,D1,DBD,AX,AX,52.5,Female,Sanger,Cell,...,,No,,original,D1_Cell_3prime-v2,Mural cell,3,Kanemaru22,healthy,Female
HCAHeart7606896_GCTGCGAGTGTTGGGA,HCAHeart7606896,na,D1,DBD,AX,AX,52.5,Female,Sanger,Cell,...,,No,,original,D1_Cell_3prime-v2,Endothelial cell,10,Kanemaru22,healthy,Female
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST11064577_HCAHeartST11023242_CAATGACTCGATATTG-1,HCAHeartST11064577,HCAHeartST11064577_HCAHeartST11023242,AH1,DCD,SP,IVS MID LV,47.5,Female,Sanger,Nuclei,...,,No,,original,AH1_Nuclei_Multiome-v1,Endothelial cell,29,Kanemaru22,healthy,Female
HCAHeartST11064577_HCAHeartST11023242_AGCTACGTCCTGAATA-1,HCAHeartST11064577,HCAHeartST11064577_HCAHeartST11023242,AH1,DCD,SP,IVS MID LV,47.5,Female,Sanger,Nuclei,...,,No,,original,AH1_Nuclei_Multiome-v1,Myeloid,29,Kanemaru22,healthy,Female
HCAHeartST11064577_HCAHeartST11023242_CGTCATTGTTTGACCT-1,HCAHeartST11064577,HCAHeartST11064577_HCAHeartST11023242,AH1,DCD,SP,IVS MID LV,47.5,Female,Sanger,Nuclei,...,,No,,original,AH1_Nuclei_Multiome-v1,Ventricular Cardiomyocyte,13,Kanemaru22,healthy,Female
HCAHeartST11064577_HCAHeartST11023242_TCACTGACAACTAGCC-1,HCAHeartST11064577,HCAHeartST11064577_HCAHeartST11023242,AH1,DCD,SP,IVS MID LV,47.5,Female,Sanger,Nuclei,...,,No,,original,AH1_Nuclei_Multiome-v1,Myeloid,12,Kanemaru22,healthy,Female


In [49]:
EightRegions.var

Unnamed: 0,gene_name-new,gene_name_scRNA-0-original,gene_name_snRNA-1-original,gene_name_multiome-2-original
ENSG00000243485,MIR1302-2HG,MIR1302-2HG,MIR1302-2HG,MIR1302-2HG
ENSG00000237613,FAM138A,FAM138A,FAM138A,FAM138A
ENSG00000186092,OR4F5,OR4F5,OR4F5,OR4F5
ENSG00000238009,AL627309.1,AL627309.1,AL627309.1,AL627309.1
ENSG00000239945,AL627309.3,AL627309.3,AL627309.3,AL627309.3
...,...,...,...,...
ENSG00000277856,AC233755.2,AC233755.2,AC233755.2,AC233755.2
ENSG00000275063,AC233755.1,AC233755.1,AC233755.1,AC233755.1
ENSG00000271254,AC240274.1,AC240274.1,AC240274.1,AC240274.1
ENSG00000277475,AC213203.2,AC213203.1,AC213203.1,AC213203.2


# Concatenate into one large LV object

In [50]:
%%time
adata=anndata.concat([Chaffin22, Reichart22, EightRegions], join='outer')
adata

  warn(


CPU times: user 1min 10s, sys: 3min 9s, total: 4min 19s
Wall time: 4min 19s


AnnData object with n_obs × n_vars = 1511309 × 37171
    obs: 'biosample_id', 'donor_id', 'disease', 'sex', 'age', 'lvef', 'cell_type_leiden0.6', 'SubCluster', 'cellbender_ncount', 'cellbender_ngenes', 'cellranger_percent_mito', 'exon_prop', 'cellbender_entropy', 'cellranger_doublet_scores', 'publication', 'donor', 'region', 'condition', 'cell_or_nuclei', 'kit_10x', 'cell_type', 'cell_state', 'Sample', 'Region_x', 'Primary.Genetic.Diagnosis', 'n_genes', 'n_counts', 'percent_mito', 'percent_ribo', 'scrublet_score_z', 'scrublet_score_log', 'solo_score', 'cell_states', 'Assigned', 'self_reported_ethnicity_ontology_term_id', 'disease_ontology_term_id', 'cell_type_ontology_term_id', 'sex_ontology_term_id', 'assay_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'tissue_ontology_term_id', 'development_stage_ontology_term_id', 'suspension_type', 'assay', 'organism', 'tissue', 'self_reported_ethnicity', 'development_stage', 'sangerID', 'combinedID', 'donor_type', 'region_fine

In [51]:
adata.var

ENSG00000000003
ENSG00000000005
ENSG00000000419
ENSG00000000457
ENSG00000000460
...
ENSG00000288380
ENSG00000288398
ENSG00000288436
ENSG00000288459
ENSG00000288460


In [52]:
utils.add_genomic_coordinates(adata)

Ensembl gene IDs detected in index, merging.
36964 out of 37171 features (99.443114255737%) have updated information.


AnnData object with n_obs × n_vars = 1511309 × 37171
    obs: 'biosample_id', 'donor_id', 'disease', 'sex', 'age', 'lvef', 'cell_type_leiden0.6', 'SubCluster', 'cellbender_ncount', 'cellbender_ngenes', 'cellranger_percent_mito', 'exon_prop', 'cellbender_entropy', 'cellranger_doublet_scores', 'publication', 'donor', 'region', 'condition', 'cell_or_nuclei', 'kit_10x', 'cell_type', 'cell_state', 'Sample', 'Region_x', 'Primary.Genetic.Diagnosis', 'n_genes', 'n_counts', 'percent_mito', 'percent_ribo', 'scrublet_score_z', 'scrublet_score_log', 'solo_score', 'cell_states', 'Assigned', 'self_reported_ethnicity_ontology_term_id', 'disease_ontology_term_id', 'cell_type_ontology_term_id', 'sex_ontology_term_id', 'assay_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'tissue_ontology_term_id', 'development_stage_ontology_term_id', 'suspension_type', 'assay', 'organism', 'tissue', 'self_reported_ethnicity', 'development_stage', 'sangerID', 'combinedID', 'donor_type', 'region_fine

In [53]:
adata.var

Unnamed: 0_level_0,gene_id,gene_name,gene_biotype,gene_seq_start,gene_seq_end,seq_name,seq_strand,seq_coord_system,description,gene_id_version,canonical_transcript
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ENSG00000000003,ENSG00000000003,TSPAN6,protein_coding,100627108.0,100639991.0,X,-1.0,chromosome,tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858],ENSG00000000003.15,ENST00000373020
ENSG00000000005,ENSG00000000005,TNMD,protein_coding,100584936.0,100599885.0,X,1.0,chromosome,tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757],ENSG00000000005.6,ENST00000373031
ENSG00000000419,ENSG00000000419,DPM1,protein_coding,50934867.0,50959140.0,20,-1.0,chromosome,dolichyl-phosphate mannosyltransferase subunit...,ENSG00000000419.14,ENST00000371588
ENSG00000000457,ENSG00000000457,SCYL3,protein_coding,169849631.0,169894267.0,1,-1.0,chromosome,SCY1 like pseudokinase 3 [Source:HGNC Symbol;A...,ENSG00000000457.14,ENST00000367771
ENSG00000000460,ENSG00000000460,C1orf112,protein_coding,169662007.0,169854080.0,1,1.0,chromosome,chromosome 1 open reading frame 112 [Source:HG...,ENSG00000000460.17,ENST00000359326
...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000288380,,,,,,,,,,,
ENSG00000288398,ENSG00000288398,,lncRNA,16228674.0,16231335.0,1,-1.0,chromosome,novel transcript,ENSG00000288398.1,ENST00000671994
ENSG00000288436,,,,,,,,,,,
ENSG00000288459,ENSG00000288459,,lncRNA,104293678.0,104294921.0,14,1.0,chromosome,novel transcript,ENSG00000288459.1,ENST00000672686


In [54]:
utils.rename_adata_var_index(adata,"gene_name")

AnnData expects .var.index to contain strings, but got values like:
    ['TSPAN6', 'TNMD', 'DPM1', 'SCYL3', 'C1orf112']

    Inferred to be: categorical

  names = self._prep_dim_index(names, "var")


AnnData object with n_obs × n_vars = 1511309 × 37171
    obs: 'biosample_id', 'donor_id', 'disease', 'sex', 'age', 'lvef', 'cell_type_leiden0.6', 'SubCluster', 'cellbender_ncount', 'cellbender_ngenes', 'cellranger_percent_mito', 'exon_prop', 'cellbender_entropy', 'cellranger_doublet_scores', 'publication', 'donor', 'region', 'condition', 'cell_or_nuclei', 'kit_10x', 'cell_type', 'cell_state', 'Sample', 'Region_x', 'Primary.Genetic.Diagnosis', 'n_genes', 'n_counts', 'percent_mito', 'percent_ribo', 'scrublet_score_z', 'scrublet_score_log', 'solo_score', 'cell_states', 'Assigned', 'self_reported_ethnicity_ontology_term_id', 'disease_ontology_term_id', 'cell_type_ontology_term_id', 'sex_ontology_term_id', 'assay_ontology_term_id', 'organism_ontology_term_id', 'is_primary_data', 'tissue_ontology_term_id', 'development_stage_ontology_term_id', 'suspension_type', 'assay', 'organism', 'tissue', 'self_reported_ethnicity', 'development_stage', 'sangerID', 'combinedID', 'donor_type', 'region_fine

In [55]:
adata.var

Unnamed: 0_level_0,index,gene_id,gene_name,gene_biotype,gene_seq_start,gene_seq_end,seq_name,seq_strand,seq_coord_system,description,gene_id_version,canonical_transcript
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
TSPAN6,ENSG00000000003,ENSG00000000003,TSPAN6,protein_coding,100627108.0,100639991.0,X,-1.0,chromosome,tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858],ENSG00000000003.15,ENST00000373020
TNMD,ENSG00000000005,ENSG00000000005,TNMD,protein_coding,100584936.0,100599885.0,X,1.0,chromosome,tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757],ENSG00000000005.6,ENST00000373031
DPM1,ENSG00000000419,ENSG00000000419,DPM1,protein_coding,50934867.0,50959140.0,20,-1.0,chromosome,dolichyl-phosphate mannosyltransferase subunit...,ENSG00000000419.14,ENST00000371588
SCYL3,ENSG00000000457,ENSG00000000457,SCYL3,protein_coding,169849631.0,169894267.0,1,-1.0,chromosome,SCY1 like pseudokinase 3 [Source:HGNC Symbol;A...,ENSG00000000457.14,ENST00000367771
C1orf112,ENSG00000000460,ENSG00000000460,C1orf112,protein_coding,169662007.0,169854080.0,1,1.0,chromosome,chromosome 1 open reading frame 112 [Source:HG...,ENSG00000000460.17,ENST00000359326
...,...,...,...,...,...,...,...,...,...,...,...,...
nan-205,ENSG00000288380,,,,,,,,,,,
-11567,ENSG00000288398,ENSG00000288398,,lncRNA,16228674.0,16231335.0,1,-1.0,chromosome,novel transcript,ENSG00000288398.1,ENST00000671994
nan-206,ENSG00000288436,,,,,,,,,,,
-11568,ENSG00000288459,ENSG00000288459,,lncRNA,104293678.0,104294921.0,14,1.0,chromosome,novel transcript,ENSG00000288459.1,ENST00000672686


In [56]:
adata.obs

Unnamed: 0,biosample_id,donor_id,disease,sex,age,lvef,cell_type_leiden0.6,SubCluster,cellbender_ncount,cellbender_ngenes,...,latent_cell_probability,latent_scale,_scvi_batch,_scvi_labels,clus20,doublet_cls,original_or_new,batch,scANVI_predictions,leiden_scArches
TTCTTCCGTTCAACGT-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-X1,15815.0,4632.0,...,,,,,,,,,,
CATCCACCATCTAACG-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-HHATL,15546.0,4673.0,...,,,,,,,,,,
ACCCAAACAGCTAACT-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-HHATL,14983.0,4432.0,...,,,,,,,,,,
AAGGAATCAACTGGTT-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-X1,14995.0,4483.0,...,,,,,,,,,,
TACCCGTAGCGTGCTC-1-0,LV_1622_2_nf,P1622,NF,male,56.0,65.0,Cardiomyocyte_I,CM-HHATL,14797.0,4690.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HCAHeartST11064577_HCAHeartST11023242_CAATGACTCGATATTG-1,,,,Female,47.5,,,,,,...,,,,,No,,original,AH1_Nuclei_Multiome-v1,Endothelial cell,29
HCAHeartST11064577_HCAHeartST11023242_AGCTACGTCCTGAATA-1,,,,Female,47.5,,,,,,...,,,,,No,,original,AH1_Nuclei_Multiome-v1,Myeloid,29
HCAHeartST11064577_HCAHeartST11023242_CGTCATTGTTTGACCT-1,,,,Female,47.5,,,,,,...,,,,,No,,original,AH1_Nuclei_Multiome-v1,Ventricular Cardiomyocyte,13
HCAHeartST11064577_HCAHeartST11023242_TCACTGACAACTAGCC-1,,,,Female,47.5,,,,,,...,,,,,No,,original,AH1_Nuclei_Multiome-v1,Myeloid,12


In [57]:
# make a batch key
adata.obs["batch_key"]=adata.obs["donor"].astype(str)+"_"+adata.obs["kit_10x"].astype(str)+"_"+adata.obs["cell_or_nuclei"].astype(str)
adata.obs.batch_key.value_counts()

H5_3prime-v3_Nuclei        53434
H6_3prime-v3_Nuclei        38907
D11_3prime-v3_Cell         37446
H3_3prime-v3_Nuclei        34680
H4_3prime-v3_Nuclei        33982
                           ...  
DO1_3prime-v3_Nuclei        1163
IC_H03_3prime-v3_Nuclei     1066
IC_H04_3prime-v3_Nuclei      902
D4_3prime-v2_Cell            543
D1_3prime-v2_Cell            166
Name: batch_key, Length: 132, dtype: int64

In [58]:
adata.obs.condition.value_counts()

healthy    754264
DCM        521793
HCM        235252
Name: condition, dtype: int64

In [59]:
adata.obs=adata.obs[["donor", "sex", "age", "condition", "publication", 'kit_10x', 'cell_or_nuclei',"batch_key","cell_type","cell_state"]]
adata.obs

Unnamed: 0,donor,sex,age,condition,publication,kit_10x,cell_or_nuclei,batch_key,cell_type,cell_state
TTCTTCCGTTCAACGT-1-0,P1622,male,56.0,healthy,Chaffin22,3prime-v3,Nuclei,P1622_3prime-v3_Nuclei,Cardiomyocyte_I,CM-X1
CATCCACCATCTAACG-1-0,P1622,male,56.0,healthy,Chaffin22,3prime-v3,Nuclei,P1622_3prime-v3_Nuclei,Cardiomyocyte_I,CM-HHATL
ACCCAAACAGCTAACT-1-0,P1622,male,56.0,healthy,Chaffin22,3prime-v3,Nuclei,P1622_3prime-v3_Nuclei,Cardiomyocyte_I,CM-HHATL
AAGGAATCAACTGGTT-1-0,P1622,male,56.0,healthy,Chaffin22,3prime-v3,Nuclei,P1622_3prime-v3_Nuclei,Cardiomyocyte_I,CM-X1
TACCCGTAGCGTGCTC-1-0,P1622,male,56.0,healthy,Chaffin22,3prime-v3,Nuclei,P1622_3prime-v3_Nuclei,Cardiomyocyte_I,CM-HHATL
...,...,...,...,...,...,...,...,...,...,...
HCAHeartST11064577_HCAHeartST11023242_CAATGACTCGATATTG-1,AH1,Female,47.5,healthy,Kanemaru22,Multiome-v1,Nuclei,AH1_Multiome-v1_Nuclei,Fibroblast,unclassified
HCAHeartST11064577_HCAHeartST11023242_AGCTACGTCCTGAATA-1,AH1,Female,47.5,healthy,Kanemaru22,Multiome-v1,Nuclei,AH1_Multiome-v1_Nuclei,Ventricular Cardiomyocyte,vCM1
HCAHeartST11064577_HCAHeartST11023242_CGTCATTGTTTGACCT-1,AH1,Female,47.5,healthy,Kanemaru22,Multiome-v1,Nuclei,AH1_Multiome-v1_Nuclei,Ventricular Cardiomyocyte,vCM4
HCAHeartST11064577_HCAHeartST11023242_TCACTGACAACTAGCC-1,AH1,Female,47.5,healthy,Kanemaru22,Multiome-v1,Nuclei,AH1_Multiome-v1_Nuclei,Myeloid,unclassified


In [60]:
adata.obs.age.value_counts()

45.0    174759
55.0    152717
65.0    109549
62.5     94712
52.5     68309
63.0     65888
54.0     64093
47.5     58876
51.0     58029
67.5     57134
46.0     52740
25.0     44449
75.0     41436
35.0     41249
58.0     40068
49.0     37475
57.5     35996
29.0     28641
15.0     25865
42.0     23984
60.0     20536
42.5     19954
41.0     19513
62.0     19384
64.0     17986
56.0     15733
40.0     15378
72.5     14693
5.0      14613
59.0     13550
61.0     12009
72.0     11638
47.0     11076
69.0     10638
66.0      9361
33.0      9278
Name: age, dtype: int64

In [62]:
adata

AnnData object with n_obs × n_vars = 1511309 × 37171
    obs: 'donor', 'sex', 'age', 'condition', 'publication', 'kit_10x', 'cell_or_nuclei', 'batch_key', 'cell_type', 'cell_state'
    var: 'index', 'gene_id', 'gene_name', 'gene_biotype', 'gene_seq_start', 'gene_seq_end', 'seq_name', 'seq_strand', 'seq_coord_system', 'description', 'gene_id_version', 'canonical_transcript'
    obsm: 'X_umap', 'X_pca', 'X_scArches'
    layers: 'cellbender_adjusted_counts', 'cellranger_raw', 'counts'

In [64]:
del adata.obsm["X_umap"]

In [65]:
del adata.obsm["X_pca"]

In [66]:
del adata.obsm["X_scArches"]

In [69]:
del adata.layers["cellbender_adjusted_counts"]

In [70]:
del adata.layers["cellranger_raw"]

In [71]:
del adata.layers["counts"]

In [72]:
adata

AnnData object with n_obs × n_vars = 1511309 × 37171
    obs: 'donor', 'sex', 'age', 'condition', 'publication', 'kit_10x', 'cell_or_nuclei', 'batch_key', 'cell_type', 'cell_state'
    var: 'index', 'gene_id', 'gene_name', 'gene_biotype', 'gene_seq_start', 'gene_seq_end', 'seq_name', 'seq_strand', 'seq_coord_system', 'description', 'gene_id_version', 'canonical_transcript'

In [73]:
adata.write("/nfs/team205/jc48/ForHana/HeartData.h5ad")