# Process Kuppe et al. data

## Set up Env

In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
from scipy.sparse import csr_matrix

In [2]:
import liana as li

Params

In [3]:
dataset = 'kuppe'
groupby = 'cell_type'
sample_key = 'sample'
condition_key = 'patient_group'
batch_key = 'sex'

min_cells_per_sample = 700
sample_zcounts_max = 3
sample_zcounts_min = -2

# set filtering parameters
min_cells = 10 # min number of cells per cell type
min_samples = 5 # min number of samples that pass the threshold per cell type

In [4]:
from prep_utils import filter_samples, filter_celltypes

### Load data

In [None]:
adata = sc.read_h5ad(os.path.join('data', "kuppe.h5ad"), backed='r')

In [None]:
msk = np.array([patient in ['ischemic', 'myogenic'] for patient in adata.obs['patient_group']])
adata = adata[msk]

In [None]:
adata.obs[[sample_key, condition_key]].drop_duplicates().nunique()

In [None]:
# NOTE: I use .raw
adata = adata.raw.to_adata()
adata.X = csr_matrix(adata.X)

In [None]:
# change to gene symbols
adata.var.index = adata.var['feature_name']

Filter samples, cell types, and genes

In [None]:
adata = filter_samples(adata, sample_key, condition_key, min_cells_per_sample, sample_zcounts_max, sample_zcounts_min)

In [None]:
adata = filter_celltypes(adata=adata, groupby=groupby, sample_key=sample_key, min_cells=min_cells, min_samples=min_samples)

In [None]:
# Remove genes expressed in few cells, normalize
sc.pp.filter_genes(adata, min_cells=30)
adata

In [None]:
import gc
gc.collect()

##### Abbrevaite Cell types

In [None]:
cell_type_abbreviations = {'neuronal receptor cell': 'NRC',
    'mast cell': 'MC',
    'cardiac muscle myoblast': 'CMM',
    'smooth muscle myoblast': 'SMM',
    'pericyte': 'PC',
    'lymphoid lineage restricted progenitor cell': 'LLRPC',
    'immature innate lymphoid cell': 'IILC',
    'fibroblast of cardiac tissue': 'FCT',
    'cardiac endothelial cell': 'CEC',
    'adipocyte of epicardial fat of left ventricle': 'AEFLV',
    'native cell': 'NC'
}

In [None]:
# use the replace() method to recode the cell_type column
adata.obs['cell_abbr'] = adata.obs['cell_type'].replace(cell_type_abbreviations)

In [None]:
# change groupby to abbreviation
groupby = 'cell_abbr'

### Normalize

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

### Run LIANA

In [None]:
li.mt.rank_aggregate.by_sample(adata, groupby=groupby, use_raw=False, sample_key=sample_key, verbose=True, n_perms=None)

### Add Metadata & Write

In [None]:
adata.uns['sample_key'] = sample_key
adata.uns['batch_key'] = batch_key
adata.uns['condition_key'] = condition_key

In [None]:
assert np.isin(['sample_key', 'batch_key', 'condition_key'], adata.uns_keys()).all()

In [None]:
adata.write_h5ad(os.path.join('data', f"{dataset}_processed.h5ad"))

## Classify

In [5]:
import scanpy as sc
from classify_utils import classifier_pipe



In [6]:
adata = sc.read_h5ad(os.path.join('data', f"{dataset}_processed.h5ad"), backed='r')



In [7]:
classifier_pipe(adata, dataset)

Creating views with: lr_means


100%|██████████| 119/119 [00:03<00:00, 33.66it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=333 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=296 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=373 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=324 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=369 features.

100%|██████████| 23/23 [01:24<00:00,  3.66s/it]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 119/119 [00:03<00:00, 33.93it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=333 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=296 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=373 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=324 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=369 features.

100%|██████████| 23/23 [01:28<00:00,  3.83s/it]


Running Tensor Factorization
Creating views with: lr_logfc


100%|██████████| 119/119 [00:03<00:00, 33.98it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=333 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=296 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=373 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=324 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=369 features.

100%|██████████| 23/23 [01:27<00:00,  3.82s/it]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 119/119 [00:03<00:00, 32.97it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=333 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=296 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=373 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=324 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=369 features.

100%|██████████| 23/23 [01:26<00:00,  3.77s/it]


Running Tensor Factorization
Creating views with: lrscore


100%|██████████| 119/119 [00:03<00:00, 32.39it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=333 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=296 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=373 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=324 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=369 features.

100%|██████████| 23/23 [01:26<00:00,  3.76s/it]


Running Tensor Factorization
Creating views with: lr_probs


100%|██████████| 119/119 [00:03<00:00, 32.80it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=239 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=236 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=312 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=264 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=273 features.

100%|██████████| 23/23 [01:29<00:00,  3.88s/it]


Running Tensor Factorization
Creating views with: magnitude_rank


100%|██████████| 119/119 [00:03<00:00, 32.09it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=318 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=288 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=368 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=315 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=359 features.

100%|██████████| 23/23 [01:28<00:00,  3.84s/it]


Running Tensor Factorization


In [8]:
adata.uns['auc'].sort_values('fold')

Unnamed: 0,reduction_name,score_key,fold,auc,tpr,fpr,train_split,test_split,dataset
0,mofa,lr_means,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
18,mofa,expr_prod,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
24,mofa,lrscore,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
25,tensor,lrscore,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
13,tensor,lr_logfc,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
12,mofa,lr_logfc,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
30,mofa,lr_probs,0,0.5,"[0.0, 1.0]","[0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
19,tensor,expr_prod,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
31,tensor,lr_probs,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
7,tensor,expr_prod,0,1.0,"[0.0, 0.2, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 8, 9, 13, 14, 15, 16, 18, 19, ...","[2, 4, 7, 10, 11, 12, 17, 21]",kuppe
