# Process Velmeshev et al.

## Set up Env

In [1]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
from scipy.sparse import csr_matrix

In [2]:
import liana as li

In [3]:
from prep_utils import filter_samples, filter_celltypes, map_gene_symbols

In [4]:
dataset = 'velmeshev'
groupby = 'cluster'
sample_key = 'individual'
condition_key = 'diagnosis'
batch_key = 'sex'

min_cells_per_sample = 700
sample_zcounts_max = 3
sample_zcounts_min = -2

# set filtering parameters
min_cells = 10 # min number of cells per cell type
min_samples = 5 # min number of samples that pass the threshold per cell type

## Preprocess

### Load data

In [None]:
adata = sc.read_h5ad(os.path.join('data', f"{dataset}.h5ad"))
adata

In [None]:
adata.obs[[sample_key, condition_key]].drop_duplicates().nunique()

### Convert to Genesymbols

In [None]:
df = adata.var.reset_index()['index'].str.split('\\|', expand=True).rename(columns={0:'ensembl', 1:'genesymbol'})
adata.var = df.set_index('ensembl')
map_df = df.rename(columns={'ensembl':'alias', 'genesymbol':'gene'})
map_df

In [None]:
adata = map_gene_symbols(adata, map_df)

In [None]:
adata

Filter samples, cell types, and genes

In [None]:
adata = filter_samples(adata, sample_key, condition_key, min_cells_per_sample, sample_zcounts_max, sample_zcounts_min)

In [None]:
## ^ Double check why are there more than 16?

In [None]:
adata = filter_celltypes(adata=adata, groupby=groupby, sample_key=sample_key, min_cells=min_cells, min_samples=min_samples)

In [None]:
# Remove genes expressed in few cells, normalize
sc.pp.filter_genes(adata, min_cells=30)

### Normalize

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

## Run LIANA

In [None]:
li.mt.rank_aggregate.by_sample(adata, groupby=groupby, use_raw=False, sample_key=sample_key, verbose=True, n_perms=None)

In [None]:
dataset

### Add Metadata & Write

In [None]:
adata.uns['sample_key'] = sample_key
adata.uns['batch_key'] = batch_key
adata.uns['condition_key'] = condition_key

In [None]:
assert np.isin(['sample_key', 'batch_key', 'condition_key'], adata.uns_keys()).all()

In [None]:
adata.write_h5ad(os.path.join('data', 'interim', f"{dataset}_processed.h5ad"))

## Classify

In [5]:
import scanpy as sc
from classify_utils import classifier_pipe



In [6]:
adata = sc.read_h5ad(os.path.join('data', 'interim', f"{dataset}_processed.h5ad"), backed='r')

In [7]:
classifier_pipe(adata, dataset)

Creating views with: lr_means


100%|██████████| 289/289 [00:53<00:00,  5.45it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Oligodendrocytes&Oligodendrocytes' group='group1' with N=40 samples and D=60 features...
Loaded view='Oligodendrocytes&Neu-mat' group='group1' with N=40 samples and D=91 features...
Loaded view='OPC&OPC' group='group1' with N=40 samples and D=183 features...
Loaded view='Neu-NRGN-II&Neu-NRGN-II' group='group1' with N=40 samples and D=75 features...
Loaded view='AST-

100%|██████████| 40/40 [05:41<00:00,  8.53s/it]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 289/289 [00:51<00:00,  5.65it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Oligodendrocytes&Oligodendrocytes' group='group1' with N=40 samples and D=60 features...
Loaded view='Oligodendrocytes&Neu-mat' group='group1' with N=40 samples and D=91 features...
Loaded view='OPC&OPC' group='group1' with N=40 samples and D=183 features...
Loaded view='Neu-NRGN-II&Neu-NRGN-II' group='group1' with N=40 samples and D=75 features...
Loaded view='AST-

100%|██████████| 40/40 [05:33<00:00,  8.34s/it]


Running Tensor Factorization
Creating views with: lr_logfc


100%|██████████| 289/289 [00:51<00:00,  5.65it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Oligodendrocytes&Oligodendrocytes' group='group1' with N=40 samples and D=60 features...
Loaded view='Oligodendrocytes&Neu-mat' group='group1' with N=40 samples and D=91 features...
Loaded view='OPC&OPC' group='group1' with N=40 samples and D=183 features...
Loaded view='Neu-NRGN-II&Neu-NRGN-II' group='group1' with N=40 samples and D=75 features...
Loaded view='AST-

100%|██████████| 40/40 [05:40<00:00,  8.52s/it]


Running Tensor Factorization
Creating views with: expr_prod


100%|██████████| 289/289 [00:50<00:00,  5.68it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Oligodendrocytes&Oligodendrocytes' group='group1' with N=40 samples and D=60 features...
Loaded view='Oligodendrocytes&Neu-mat' group='group1' with N=40 samples and D=91 features...
Loaded view='OPC&OPC' group='group1' with N=40 samples and D=183 features...
Loaded view='Neu-NRGN-II&Neu-NRGN-II' group='group1' with N=40 samples and D=75 features...
Loaded view='AST-

100%|██████████| 40/40 [05:30<00:00,  8.27s/it]


Running Tensor Factorization
Creating views with: lrscore


100%|██████████| 289/289 [00:50<00:00,  5.67it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Oligodendrocytes&Oligodendrocytes' group='group1' with N=40 samples and D=60 features...
Loaded view='Oligodendrocytes&Neu-mat' group='group1' with N=40 samples and D=91 features...
Loaded view='OPC&OPC' group='group1' with N=40 samples and D=183 features...
Loaded view='Neu-NRGN-II&Neu-NRGN-II' group='group1' with N=40 samples and D=75 features...
Loaded view='AST-

100%|██████████| 40/40 [06:01<00:00,  9.03s/it]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: lr_probs


100%|██████████| 289/289 [00:53<00:00,  5.38it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Oligodendrocytes&Oligodendrocytes' group='group1' with N=40 samples and D=37 features...
Loaded view='Oligodendrocytes&Neu-mat' group='group1' with N=40 samples and D=61 features...
Loaded view='OPC&OPC' group='group1' with N=40 samples and D=133 features...
Loaded view='Neu-NRGN-II&Neu-NRGN-II' group='group1' with N=40 samples and D=59 features...
Loaded view='AST-

100%|██████████| 40/40 [06:19<00:00,  9.48s/it]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization
Creating views with: magnitude_rank


100%|██████████| 289/289 [00:53<00:00,  5.38it/s]



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Loaded view='Oligodendrocytes&Oligodendrocytes' group='group1' with N=40 samples and D=43 features...
Loaded view='Oligodendrocytes&Neu-mat' group='group1' with N=40 samples and D=66 features...
Loaded view='OPC&OPC' group='group1' with N=40 samples and D=143 features...
Loaded view='Neu-NRGN-II&Neu-NRGN-II' group='group1' with N=40 samples and D=65 features...
Loaded view='AST-

100%|██████████| 40/40 [06:00<00:00,  9.00s/it]


Device is either not available or the backend used with tensorly does not support this device.                   Try changing it with tensorly.set_backend("<backend_name>") before.
Running Tensor Factorization


In [8]:
adata.uns['auc']

Unnamed: 0,reduction_name,score_key,fold,auc,tpr,fpr,train_split,test_split,dataset
0,mofa,lr_means,0,0.729167,"[0.0, 0.0, 0.3333333333333333, 0.3333333333333...","[0.0, 0.125, 0.125, 0.25, 0.25, 0.375, 0.375, ...","[1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 13, 15, 18, 1...","[0, 9, 12, 14, 16, 17, 20, 28, 30, 32, 33, 36,...",velmeshev
1,tensor,lr_means,0,0.604167,"[0.0, 0.0, 0.16666666666666666, 0.166666666666...","[0.0, 0.125, 0.125, 0.25, 0.25, 0.375, 0.375, ...","[1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 13, 15, 18, 1...","[0, 9, 12, 14, 16, 17, 20, 28, 30, 32, 33, 36,...",velmeshev
2,mofa,lr_means,1,0.571429,"[0.0, 0.0, 0.16666666666666666, 0.166666666666...","[0.0, 0.14285714285714285, 0.14285714285714285...","[0, 4, 5, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18,...","[1, 2, 3, 6, 10, 15, 22, 23, 24, 25, 27, 29, 35]",velmeshev
3,tensor,lr_means,1,0.404762,"[0.0, 0.0, 0.3333333333333333, 0.3333333333333...","[0.0, 0.14285714285714285, 0.14285714285714285...","[0, 4, 5, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18,...","[1, 2, 3, 6, 10, 15, 22, 23, 24, 25, 27, 29, 35]",velmeshev
4,mofa,lr_means,2,0.833333,"[0.0, 0.16666666666666666, 0.16666666666666666...","[0.0, 0.0, 0.14285714285714285, 0.142857142857...","[0, 1, 2, 3, 6, 9, 10, 12, 14, 15, 16, 17, 20,...","[4, 5, 7, 8, 11, 13, 18, 19, 21, 26, 31, 34, 37]",velmeshev
5,tensor,lr_means,2,0.547619,"[0.0, 0.16666666666666666, 0.3333333333333333,...","[0.0, 0.0, 0.0, 0.5714285714285714, 0.57142857...","[0, 1, 2, 3, 6, 9, 10, 12, 14, 15, 16, 17, 20,...","[4, 5, 7, 8, 11, 13, 18, 19, 21, 26, 31, 34, 37]",velmeshev
6,mofa,expr_prod,0,0.791667,"[0.0, 0.0, 0.8333333333333334, 0.8333333333333...","[0.0, 0.125, 0.125, 0.625, 0.625, 1.0]","[1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 13, 15, 18, 1...","[0, 9, 12, 14, 16, 17, 20, 28, 30, 32, 33, 36,...",velmeshev
7,tensor,expr_prod,0,0.604167,"[0.0, 0.0, 0.16666666666666666, 0.166666666666...","[0.0, 0.125, 0.125, 0.25, 0.25, 0.375, 0.375, ...","[1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 13, 15, 18, 1...","[0, 9, 12, 14, 16, 17, 20, 28, 30, 32, 33, 36,...",velmeshev
8,mofa,expr_prod,1,0.642857,"[0.0, 0.16666666666666666, 0.3333333333333333,...","[0.0, 0.0, 0.0, 0.2857142857142857, 0.28571428...","[0, 4, 5, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18,...","[1, 2, 3, 6, 10, 15, 22, 23, 24, 25, 27, 29, 35]",velmeshev
9,tensor,expr_prod,1,0.404762,"[0.0, 0.0, 0.3333333333333333, 0.3333333333333...","[0.0, 0.14285714285714285, 0.14285714285714285...","[0, 4, 5, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18,...","[1, 2, 3, 6, 10, 15, 22, 23, 24, 25, 27, 29, 35]",velmeshev


In [12]:
adata.uns['tensor_res'].X['lr_means']

Unnamed: 0,Factor 1,Factor 2,Factor 3,Factor 4,Factor 5,Factor 6,Factor 7,Factor 8,Factor 9,Factor 10,Category
1823_BA24,0.129502,0.117445,0.208744,0.157211,0.185577,0.150357,0.148398,0.188844,0.247091,0.182364,Control
4341_BA24,0.169745,0.158591,0.166037,0.163774,0.169454,0.112995,0.179367,0.175742,0.185422,0.151256,Control
4341_BA46,0.164383,0.176174,0.156494,0.174124,0.164544,0.13443,0.169145,0.167947,0.122532,0.153165,Control
4849_BA24,0.183103,0.12218,0.16909,0.162332,0.176253,0.122439,0.195129,0.161581,0.232126,0.188336,ASD
4899_BA24,0.187152,0.168754,0.161576,0.161985,0.161001,0.15286,0.236814,0.046698,0.182192,0.149058,ASD
5144_PFC,0.160618,0.138628,0.157067,0.155085,0.153296,0.175675,0.117374,0.174266,0.061729,0.164547,ASD
5163_BA24,0.145355,0.180153,0.160899,0.138382,0.119331,0.169956,0.152677,0.083697,0.106151,0.109853,Control
5242_BA24,0.16867,0.136474,0.170196,0.160478,0.172599,0.141132,0.193924,0.151829,0.196284,0.18406,Control
5278_BA24,0.184101,0.154878,0.163387,0.157254,0.172007,0.153569,0.184427,0.132456,0.152824,0.165841,ASD
5278_PFC,0.145026,0.155093,0.130717,0.162945,0.124507,0.145427,0.145443,0.168926,0.092139,0.162081,ASD
