# Process Carraro et al.

Note: This one has brutal batch effect (by lab)

## Set up Env

In [1]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
from scipy.sparse import csr_matrix

In [2]:
import liana as li

In [3]:
from prep_utils import filter_samples, filter_celltypes

In [4]:
dataset = 'carraro'
groupby = 'major'
sample_key = 'orig.ident'
condition_key = 'type'
batch_key = 'lab'

min_cells_per_sample = 700
sample_zcounts_max = 3
sample_zcounts_min = -2

# set filtering parameters
min_cells = 10 # min number of cells per cell type
min_samples = 5 # min number of samples that pass the threshold per cell type

## Preprocess

### Load data

In [None]:
adata = sc.read_h5ad(os.path.join('data', f"{dataset}.h5ad"))
adata

In [None]:
adata.obs[[sample_key, condition_key]].drop_duplicates().nunique()

Filter samples, cell types, and genes

In [None]:
adata = filter_samples(adata, sample_key, condition_key, min_cells_per_sample, sample_zcounts_max, sample_zcounts_min)

In [None]:
adata = filter_celltypes(adata=adata, groupby=groupby, sample_key=sample_key, min_cells=min_cells, min_samples=min_samples)

In [None]:
# Remove genes expressed in few cells, normalize
sc.pp.filter_genes(adata, min_cells=30)
adata

In [None]:
# adata.X = sc.pp.combat(adata, key=batch_key)

### Normalize

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

## Run LIANA

In [None]:
li.mt.rank_aggregate.by_sample(adata, groupby=groupby, use_raw=False, sample_key=sample_key, verbose=True, n_perms=None)

### Add Metadata & Write

In [None]:
adata.uns['sample_key'] = sample_key
adata.uns['batch_key'] = batch_key
adata.uns['condition_key'] = condition_key

In [None]:
assert np.isin(['sample_key', 'batch_key', 'condition_key'], adata.uns_keys()).all()

In [None]:
adata.write_h5ad(os.path.join('data', f"{dataset}_processed.h5ad"))

## Classify

In [5]:
import scanpy as sc
from classify_utils import classifier_pipe



In [6]:
adata = sc.read_h5ad(os.path.join('data', f"{dataset}_processed.h5ad"), backed='r')

In [7]:
classifier_pipe(adata, dataset)

Creating views with: lr_means


100%|██████████| 16/16 [00:00<00:00, 217.78it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=129 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=158 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=177 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=258 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:13<00:00,  1.59it/s]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 16/16 [00:00<00:00, 272.58it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=129 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=158 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=177 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=258 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:14<00:00,  1.51it/s]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: lr_logfc


100%|██████████| 16/16 [00:00<00:00, 257.38it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=129 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=158 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=177 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=258 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:15<00:00,  1.44it/s]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 16/16 [00:00<00:00, 219.14it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=129 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=158 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=177 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=258 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:14<00:00,  1.52it/s]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: lrscore


100%|██████████| 16/16 [00:00<00:00, 242.70it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=129 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=158 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=177 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=258 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:14<00:00,  1.48it/s]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: lr_probs


100%|██████████| 16/16 [00:00<00:00, 269.83it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=115 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=128 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=165 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=243 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:15<00:00,  1.43it/s]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: magnitude_rank


100%|██████████| 16/16 [00:00<00:00, 256.58it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Secretory&Ciliated' group='group1' with N=22 samples and D=120 features...
Loaded view='Secretory&Secretory' group='group1' with N=22 samples and D=147 features...
Loaded view='Secretory&Basal' group='group1' with N=22 samples and D=173 features...
Loaded view='Basal&Basal' group='group1' with N=22 samples and D=249 features...
Loaded view='Basal&Secretory' group='g

100%|██████████| 22/22 [00:14<00:00,  1.52it/s]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization


In [9]:
adata.uns['auc']

Unnamed: 0,reduction_name,score_key,fold,auc,tpr,fpr,train_split,test_split,dataset
0,mofa,lr_means,0,0.6,"[0.0, 0.2, 0.4, 0.4, 1.0, 1.0]","[0.0, 0.0, 0.0, 0.6666666666666666, 0.66666666...","[0, 1, 3, 6, 8, 9, 10, 12, 14, 15, 16, 17, 20,...","[2, 4, 5, 7, 11, 13, 18, 19]",carraro
1,tensor,lr_means,0,0.266667,"[0.0, 0.0, 0.2, 0.2, 0.6, 0.6, 1.0]","[0.0, 0.3333333333333333, 0.3333333333333333, ...","[0, 1, 3, 6, 8, 9, 10, 12, 14, 15, 16, 17, 20,...","[2, 4, 5, 7, 11, 13, 18, 19]",carraro
2,mofa,lr_means,1,0.5,"[0.0, 0.0, 0.5, 0.5, 1.0, 1.0]","[0.0, 0.3333333333333333, 0.3333333333333333, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 18,...","[10, 14, 15, 16, 17, 20, 21]",carraro
3,tensor,lr_means,1,0.5,"[0.0, 0.0, 1.0, 1.0]","[0.0, 0.5, 0.5, 1.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 18,...","[10, 14, 15, 16, 17, 20, 21]",carraro
4,mofa,lr_means,2,0.7,"[0.0, 0.2, 0.4, 0.4, 1.0, 1.0]","[0.0, 0.0, 0.0, 0.5, 0.5, 1.0]","[2, 4, 5, 7, 10, 11, 13, 14, 15, 16, 17, 18, 1...","[0, 1, 3, 6, 8, 9, 12]",carraro
5,tensor,lr_means,2,0.333333,"[0.0, 0.0, 0.0, 1.0, 1.0]","[0.0, 0.3333333333333333, 0.6666666666666666, ...","[2, 4, 5, 7, 10, 11, 13, 14, 15, 16, 17, 18, 1...","[0, 1, 3, 6, 8, 9, 12]",carraro
6,mofa,expr_prod,0,0.733333,"[0.0, 0.2, 0.4, 0.4, 0.8, 0.8, 1.0, 1.0]","[0.0, 0.0, 0.0, 0.3333333333333333, 0.33333333...","[0, 1, 3, 6, 8, 9, 10, 12, 14, 15, 16, 17, 20,...","[2, 4, 5, 7, 11, 13, 18, 19]",carraro
7,tensor,expr_prod,0,0.266667,"[0.0, 0.0, 0.2, 0.2, 0.6, 0.6, 1.0]","[0.0, 0.3333333333333333, 0.3333333333333333, ...","[0, 1, 3, 6, 8, 9, 10, 12, 14, 15, 16, 17, 20,...","[2, 4, 5, 7, 11, 13, 18, 19]",carraro
8,mofa,expr_prod,1,0.916667,"[0.0, 0.25, 0.75, 0.75, 1.0, 1.0]","[0.0, 0.0, 0.0, 0.3333333333333333, 0.33333333...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 18,...","[10, 14, 15, 16, 17, 20, 21]",carraro
9,tensor,expr_prod,1,0.5,"[0.0, 0.0, 1.0, 1.0]","[0.0, 0.5, 0.5, 1.0]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 18,...","[10, 14, 15, 16, 17, 20, 21]",carraro
