### Notebook to create a merge object of diseased and healthy heart LVs.

- **Developed by**: Carlos Talavera-López Ph.D
- **Institute of AI for Health, HelmholtzZentrum münchen**
- v210830

### Load required modules

In [1]:
import anndata
import numpy as np
import pandas as pd
import scanpy as sc

### Set up working environment

In [2]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 200, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'svg')



-----
anndata     0.7.6
scanpy      1.7.2
sinfo       0.3.1
-----
PIL                 8.0.1
anndata             0.7.6
appnope             0.1.0
backcall            0.2.0
bottleneck          1.3.2
cairo               1.19.1
cffi                1.14.3
cloudpickle         1.6.0
colorama            0.4.4
cycler              0.10.0
cython_runtime      NA
cytoolz             0.11.0
dask                2.30.0
dateutil            2.8.1
decorator           4.4.2
get_version         2.2
google              NA
h5py                2.10.0
igraph              0.9.1
ipykernel           5.3.4
ipython_genutils    0.2.0
jedi                0.17.1
joblib              0.17.0
kiwisolver          1.3.0
legacy_api_wrap     1.2
leidenalg           0.8.4
llvmlite            0.34.0
louvain             0.6.1
matplotlib          3.3.2
mkl                 2.3.0
mpl_toolkits        NA
natsort             7.1.1
numba               0.51.2
numexpr             2.7.1
numpy               1.19.2
packaging           20.4
p

### Read in healthy heart

In [9]:
healthy_heart = sc.read_h5ad('/Users/carlos.lopez/INBOX/heart/hca_heart_global_subset_ctl210808.h5ad') 
healthy_heart

AnnData object with n_obs × n_vars = 344621 × 22260
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used', 'Cells_Nuclei', 'combined'
    var: 'gene_ids-Harvard-Nuclei-full', 'feature_types-Harvard-Nuclei-full', 'gene_ids-Sanger-Nuclei-full', 'feature_types-Sanger-Nuclei-full', 'gene_ids-Sanger-Cells-full', 'feature_types-Sanger-Cells-full', 'gene_ids-Sanger-CD45-full', 'feature_types-Sanger-CD45-full', 'n_cells-myeloid', 'n_counts-myeloid'
    obsm: 'X_pca'

### Select only Left Ventricle (LV)

In [5]:
healthy_LV = healthy_heart[healthy_heart.obs['region'].isin(['LV'])]
healthy_LV

View of AnnData object with n_obs × n_vars = 99487 × 22260
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used', 'Cells_Nuclei', 'combined'
    var: 'gene_ids-Harvard-Nuclei-full', 'feature_types-Harvard-Nuclei-full', 'gene_ids-Sanger-Nuclei-full', 'feature_types-Sanger-Nuclei-full', 'gene_ids-Sanger-Cells-full', 'feature_types-Sanger-Cells-full', 'gene_ids-Sanger-CD45-full', 'feature_types-Sanger-CD45-full', 'n_cells-myeloid', 'n_counts-myeloid'
    obsm: 'X_pca'

### Select only nuclei

In [16]:
healthy_LV_sn = healthy_LV[healthy_LV.obs['cell_source'].isin(['Sanger-Nuclei', 'Harvard-Nuclei'])]
healthy_LV_sn

View of AnnData object with n_obs × n_vars = 82806 × 22260
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used', 'Cells_Nuclei', 'combined'
    var: 'gene_ids-Harvard-Nuclei-full', 'feature_types-Harvard-Nuclei-full', 'gene_ids-Sanger-Nuclei-full', 'feature_types-Sanger-Nuclei-full', 'gene_ids-Sanger-Cells-full', 'feature_types-Sanger-Cells-full', 'gene_ids-Sanger-CD45-full', 'feature_types-Sanger-CD45-full', 'n_cells-myeloid', 'n_counts-myeloid'
    obsm: 'X_pca'

### Read in damaged heart

In [10]:
damaged_heart = sc.read_h5ad('/Users/carlos.lopez/INBOX/heart/Heart_iCell8_GSE121893_HF_ctl200512.RAW.h5ad') 
damaged_heart



AnnData object with n_obs × n_vars = 4933 × 25742
    obs: ' ≈ß≈çID', 'Barcode', 'Type', 'Individual', 'Age', 'Gender', 'Dispense.Order', 'X384.Well.Plate.Location', 'Chip.Row.ID', 'Chip.Column.ID', 'Image.ID', 'Barcode.Read.Pairs', 'Distinct.UMIs', 'ERCC.Read.Pairs', 'Trimmed.Read.Pairs', 'NoContam.Read.Pairs', 'Mitochondria.Alignments', 'Mitochondria.Read.Pairs', 'Total.Barcode.Alignments', 'Distinct.Genes.w..Alignments', 'Distinct.Gene.UMI.Combos', 'Aligned', 'Assigned', 'Ambiguity', 'Chimera', 'Duplicate', 'FragementLength', 'MappingQuality', 'MultiMapping', 'NoFeatures', 'Nonjunction', 'Secondary', 'Unmapped', 'mito.perc', 'CellType'

In [12]:
damaged_heart.obs['Type'].cat.categories

Index(['HF_LA_CM', 'HF_LA_NCM', 'HF_LV_CM', 'HF_LV_NCM', 'N_LA_CM', 'N_LA_NCM',
       'N_LV_CM', 'N_LV_NCM'],
      dtype='object')

In [14]:
damaged_LV = damaged_heart[damaged_heart.obs['Type'].isin(['HF_LV_CM', 'HF_LV_NCM','N_LV_CM', 'N_LV_NCM'])]
damaged_LV

View of AnnData object with n_obs × n_vars = 1942 × 25742
    obs: ' ≈ß≈çID', 'Barcode', 'Type', 'Individual', 'Age', 'Gender', 'Dispense.Order', 'X384.Well.Plate.Location', 'Chip.Row.ID', 'Chip.Column.ID', 'Image.ID', 'Barcode.Read.Pairs', 'Distinct.UMIs', 'ERCC.Read.Pairs', 'Trimmed.Read.Pairs', 'NoContam.Read.Pairs', 'Mitochondria.Alignments', 'Mitochondria.Read.Pairs', 'Total.Barcode.Alignments', 'Distinct.Genes.w..Alignments', 'Distinct.Gene.UMI.Combos', 'Aligned', 'Assigned', 'Ambiguity', 'Chimera', 'Duplicate', 'FragementLength', 'MappingQuality', 'MultiMapping', 'NoFeatures', 'Nonjunction', 'Secondary', 'Unmapped', 'mito.perc', 'CellType'

### Merge both datasets

In [17]:
heart = healthy_LV_sn.concatenate(damaged_LV, batch_key = 'state', batch_categories = ['healthy', 'damaged'], join = 'inner')
heart

AnnData object with n_obs × n_vars = 84748 × 15224
    obs: 'NRP', 'age_group', 'cell_source', 'cell_type', 'donor', 'gender', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'region', 'sample', 'scrublet_score', 'source', 'type', 'version', 'cell_states', 'Used', 'Cells_Nuclei', 'combined', ' ≈ß≈çID', 'Barcode', 'Type', 'Individual', 'Age', 'Gender', 'Dispense.Order', 'X384.Well.Plate.Location', 'Chip.Row.ID', 'Chip.Column.ID', 'Image.ID', 'Barcode.Read.Pairs', 'Distinct.UMIs', 'ERCC.Read.Pairs', 'Trimmed.Read.Pairs', 'NoContam.Read.Pairs', 'Mitochondria.Alignments', 'Mitochondria.Read.Pairs', 'Total.Barcode.Alignments', 'Distinct.Genes.w..Alignments', 'Distinct.Gene.UMI.Combos', 'Aligned', 'Assigned', 'Ambiguity', 'Chimera', 'Duplicate', 'FragementLength', 'MappingQuality', 'MultiMapping', 'NoFeatures', 'Nonjunction', 'Secondary', 'Unmapped', 'mito.perc', 'CellType', 'state'
    var: 'gene_ids-Harvard-Nuclei-full-healthy', 'feature_types-Harvard-Nuclei-full-healthy', 'gene_ids

### Export merged object

In [18]:
heart.write('/Users/carlos.lopez/INBOX/heart/heart_LV.10Xsn-iCell8.healthy-diseased.ctl210830.raw.h5ad')

... storing 'NRP' as categorical
... storing 'age_group' as categorical
... storing 'cell_source' as categorical
... storing 'cell_type' as categorical
... storing 'donor' as categorical
... storing 'gender' as categorical
... storing 'region' as categorical
... storing 'sample' as categorical
... storing 'source' as categorical
... storing 'type' as categorical
... storing 'version' as categorical
... storing 'cell_states' as categorical
... storing 'Used' as categorical
... storing 'Cells_Nuclei' as categorical
... storing 'combined' as categorical
... storing ' ≈ß≈çID' as categorical
... storing 'Barcode' as categorical
... storing 'Type' as categorical
... storing 'Individual' as categorical
... storing 'Gender' as categorical
... storing 'X384.Well.Plate.Location' as categorical
... storing 'CellType' as categorical
