# Process Carraro et al.

Note: This one has brutal batch effect (by lab)

## Set up Env

In [1]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
from scipy.sparse import csr_matrix

In [2]:
import liana as li

In [3]:
from prep_utils import filter_samples, filter_celltypes

In [4]:
dataset = 'carraro'
groupby = 'major'
sample_key = 'orig.ident'
condition_key = 'type'
batch_key = 'lab'

min_cells_per_sample = 700
sample_zcounts_max = 3
sample_zcounts_min = -2

# set filtering parameters
min_cells = 10 # min number of cells per cell type
min_samples = 5 # min number of samples that pass the threshold per cell type

## Preprocess

### Load data

In [5]:
adata = sc.read_h5ad(os.path.join('data', f"{dataset}.h5ad"))
adata

AnnData object with n_obs × n_vars = 40709 × 31229
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'lab', 'type', 'mutation', 'percent.mt', 'percent.rp', 'major', 'minor', 'ident'
    uns: 'X_name'

Filter samples, cell types, and genes

In [6]:
adata = filter_samples(adata, sample_key, condition_key, min_cells_per_sample, sample_zcounts_max, sample_zcounts_min)

      orig.ident
type            
CF             8
CO            14


In [7]:
adata = filter_celltypes(adata=adata, groupby=groupby, sample_key=sample_key, min_cells=min_cells, min_samples=min_samples)

In [8]:
# Remove genes expressed in few cells, normalize
sc.pp.filter_genes(adata, min_cells=30)
adata



AnnData object with n_obs × n_vars = 34274 × 20531
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'lab', 'type', 'mutation', 'percent.mt', 'percent.rp', 'major', 'minor', 'ident'
    var: 'n_cells'
    uns: 'X_name'

In [9]:
# adata.X = sc.pp.combat(adata, key=batch_key)

### Normalize

In [10]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

## Run LIANA

In [11]:
li.mt.rank_aggregate.by_sample(adata, groupby=groupby, use_raw=False, sample_key=sample_key, verbose=True, n_perms=None)

Now running: ND15989: 100%|██████████| 22/22 [01:30<00:00,  4.12s/it]


### Add Metadata & Write

In [12]:
adata.uns['sample_key'] = sample_key
adata.uns['batch_key'] = batch_key
adata.uns['condition_key'] = condition_key

In [13]:
assert np.isin(['sample_key', 'batch_key', 'condition_key'], adata.uns_keys()).all()

In [14]:
adata.write_h5ad(os.path.join('data', f"{dataset}_processed.h5ad"))

## Classify

In [15]:
from classify_utils import classifier_pipe



In [16]:
adata = sc.read_h5ad(os.path.join('data', f"{dataset}_processed.h5ad"), backed='r')

In [17]:
classifier_pipe(adata, dataset)

Creating views with: lr_means


100%|██████████| 16/16 [00:00<00:00, 216.19it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=199 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=253 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=290 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=387 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:19<00:00,  1.11it/s]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 16/16 [00:00<00:00, 164.44it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=199 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=253 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=290 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=387 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:19<00:00,  1.15it/s]


Running Tensor Factorization
Creating views with: lr_logfc


100%|██████████| 16/16 [00:00<00:00, 213.91it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=199 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=253 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=290 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=387 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:20<00:00,  1.06it/s]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 16/16 [00:00<00:00, 225.58it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=199 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=253 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=290 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=387 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:23<00:00,  1.06s/it]


Running Tensor Factorization
Creating views with: lrscore


100%|██████████| 16/16 [00:00<00:00, 200.32it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=199 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=253 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=290 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=387 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:22<00:00,  1.01s/it]


Running Tensor Factorization
Creating views with: lr_probs


100%|██████████| 16/16 [00:00<00:00, 217.54it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=144 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=156 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=216 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=334 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:21<00:00,  1.02it/s]


Running Tensor Factorization
Creating views with: magnitude_rank


100%|██████████| 16/16 [00:00<00:00, 223.72it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=163 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=215 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=248 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=352 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:20<00:00,  1.07it/s]


Running Tensor Factorization


In [18]:
adata.uns['auc']

Unnamed: 0,reduction_name,score_key,fold,auc,tpr,fpr,train_split,test_split,dataset
0,mofa,lr_means,0,0.666667,"[0.0, 0.3333333333333333, 0.6666666666666666, ...","[0.0, 0.0, 0.0, 1.0, 1.0]","[0, 1, 2, 3, 4, 6, 8, 9, 10, 12, 14, 15, 16, 1...","[5, 7, 11, 13, 19]",carraro
1,tensor,lr_means,0,0.666667,"[0.0, 0.3333333333333333, 0.6666666666666666, ...","[0.0, 0.0, 0.0, 1.0, 1.0]","[0, 1, 2, 3, 4, 6, 8, 9, 10, 12, 14, 15, 16, 1...","[5, 7, 11, 13, 19]",carraro
2,mofa,lr_means,1,1.000000,"[0.0, 0.3333333333333333, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 1...","[2, 4, 10, 18, 20]",carraro
3,tensor,lr_means,1,1.000000,"[0.0, 0.3333333333333333, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 1...","[2, 4, 10, 18, 20]",carraro
4,mofa,lr_means,2,1.000000,"[0.0, 0.5, 1.0, 1.0]","[0.0, 0.0, 0.0, 1.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[15, 16, 17, 21]",carraro
...,...,...,...,...,...,...,...,...,...
65,tensor,magnitude_rank,2,0.750000,"[0.0, 0.5, 0.5, 1.0, 1.0]","[0.0, 0.0, 0.5, 0.5, 1.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[15, 16, 17, 21]",carraro
66,mofa,magnitude_rank,3,0.000000,"[0.0, 0.0, 1.0]","[0.0, 1.0, 1.0]","[2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, ...","[0, 1, 3, 14]",carraro
67,tensor,magnitude_rank,3,0.000000,"[0.0, 0.0, 1.0]","[0.0, 1.0, 1.0]","[2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, ...","[0, 1, 3, 14]",carraro
68,mofa,magnitude_rank,4,0.000000,"[0.0, 0.0, 1.0]","[0.0, 1.0, 1.0]","[0, 1, 2, 3, 4, 5, 7, 10, 11, 13, 14, 15, 16, ...","[6, 8, 9, 12]",carraro
