# Process Kuppe et al. data

## Set up Env

In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
from scipy.sparse import csr_matrix

In [2]:
import liana as li

In [3]:
# load muon and mofax
import muon as mu
import mofax as mofa



In [4]:
from prep_utils import filter_samples, filter_celltypes

### Load data

Params

In [5]:
groupby = 'cell_type'
sample_key = 'sample'
condition_key = 'patient_group'
batch_key = 'sex'

min_cells_per_sample = 700
sample_zcounts_max = 3
sample_zcounts_min = -2

# set filtering parameters
min_cells = 10 # min number of cells per cell type
min_samples = 5 # min number of samples that pass the threshold per cell type

In [None]:
adata = sc.read_h5ad(os.path.join('data', "kuppe.h5ad"), backed='r')

In [None]:
msk = np.array([patient in ['ischemic', 'myogenic'] for patient in adata.obs['patient_group']])
adata = adata[msk]

In [None]:
# NOTE: I use .raw
adata = adata.raw.to_adata()
adata.X = csr_matrix(adata.X)

In [None]:
# change to gene symbols
adata.var.index = adata.var['feature_name']

Filter samples, cell types, and genes

In [None]:
adata = filter_samples(adata, sample_key, condition_key, min_cells_per_sample, sample_zcounts_max, sample_zcounts_min)

In [None]:
adata = filter_celltypes(adata=adata, groupby=groupby, sample_key=sample_key, min_cells=min_cells, min_samples=min_samples)

In [None]:
# Remove genes expressed in few cells, normalize
sc.pp.filter_genes(adata, min_cells=30)
adata

In [None]:
import gc
gc.collect()

##### Abbrevaite Cell types

In [None]:
cell_type_abbreviations = {'neuronal receptor cell': 'NRC',
    'mast cell': 'MC',
    'cardiac muscle myoblast': 'CMM',
    'smooth muscle myoblast': 'SMM',
    'pericyte': 'PC',
    'lymphoid lineage restricted progenitor cell': 'LLRPC',
    'immature innate lymphoid cell': 'IILC',
    'fibroblast of cardiac tissue': 'FCT',
    'cardiac endothelial cell': 'CEC',
    'adipocyte of epicardial fat of left ventricle': 'AEFLV',
    'native cell': 'NC'
}

In [None]:
# use the replace() method to recode the cell_type column
adata.obs['cell_abbr'] = adata.obs['cell_type'].replace(cell_type_abbreviations)

In [None]:
# change groupby to abbreviation
groupby = 'cell_abbr'

### Normalize

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

### Run LIANA

In [None]:
li.mt.rank_aggregate.by_sample(adata, groupby=groupby, use_raw=False, sample_key=sample_key, verbose=True, n_perms=None)

write adata with LIANA results

In [None]:
adata.write_h5ad(os.path.join('data', "kuppe_processed.h5ad"))

## Classify

In [6]:
from classify_utils import run_classifier, run_mofatalk, run_tensor_c2c, NestedDict

In [7]:
adata = sc.read_h5ad(os.path.join('data', "kuppe_processed.h5ad"), backed='r')



In [8]:
methods = li.mt.show_methods()
# in case a method is missing Magnitude Score, use Specificity Score
methods['score_key'] = methods["Magnitude Score"].fillna(methods["Specificity Score"])
# remove Geometric Mean	method
methods = methods[methods['Method Name'] != 'Geometric Mean']
# drop duplicated scores (expr_prod for NATMI & Connectome)
methods = methods.drop_duplicates(subset=['Method Name', 'score_key'])
methods = methods[['Method Name', 'score_key']]

## Run MOFA

In [13]:
adata.uns['mofa_res'] = NestedDict()
adata.uns['tensor_res'] = NestedDict()
adata.uns['auc'] = pd.DataFrame(columns=['reduction_name', 'score_key', 'fold', 'auc', 'tpr', 'fpr'])

In [14]:
for score_key in methods['score_key']: 
    print(f"Creating views with: {score_key}")

    run_mofatalk(adata=adata, score_key=score_key, sample_key=sample_key, condition_key=condition_key, batch_key=batch_key)
    run_classifier(adata=adata, reduction_name='mofa_res', score_key=score_key)
    
    run_tensor_c2c(adata=adata, score_key=score_key, sample_key=sample_key, condition_key=condition_key)
    run_classifier(adata=adata, reduction_name='tensor_res', score_key=score_key)
    
    

Creating views with: lr_means


100%|██████████| 121/121 [00:04<00:00, 28.29it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=428 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=419 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=497 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=477 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=447 features.

100%|██████████| 23/23 [00:10<00:00,  2.14it/s]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 121/121 [00:04<00:00, 28.65it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=428 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=419 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=497 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=477 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=447 features.

100%|██████████| 23/23 [00:10<00:00,  2.13it/s]


Running Tensor Factorization
Creating views with: lr_logfc


100%|██████████| 121/121 [00:04<00:00, 28.32it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=428 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=419 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=497 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=477 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=447 features.

100%|██████████| 23/23 [00:10<00:00,  2.23it/s]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 121/121 [00:04<00:00, 29.19it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=428 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=419 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=497 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=477 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=447 features.

100%|██████████| 23/23 [00:10<00:00,  2.20it/s]


Running Tensor Factorization
Creating views with: lrscore


100%|██████████| 121/121 [00:04<00:00, 27.65it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=428 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=419 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=497 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=477 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=447 features.

100%|██████████| 23/23 [00:10<00:00,  2.22it/s]


Running Tensor Factorization
Creating views with: lr_probs


100%|██████████| 121/121 [00:04<00:00, 27.44it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=275 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=266 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=334 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=292 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=292 features.

100%|██████████| 23/23 [00:10<00:00,  2.19it/s]


Running Tensor Factorization
Creating views with: magnitude_rank


100%|██████████| 121/121 [00:04<00:00, 28.66it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='CMM&NRC' group='group1' with N=23 samples and D=399 features...
Loaded view='NRC&CMM' group='group1' with N=23 samples and D=388 features...
Loaded view='CMM&CMM' group='group1' with N=23 samples and D=476 features...
Loaded view='NC&CMM' group='group1' with N=23 samples and D=439 features...
Loaded view='CMM&SMM' group='group1' with N=23 samples and D=417 features.

100%|██████████| 23/23 [00:10<00:00,  2.28it/s]


Running Tensor Factorization


In [40]:
adata.uns['auc'].to_csv(os.path.join('data', 'results', "kuppe_auc.csv"), index=False)