# Process Carraro et al.

## Set up Env

In [1]:
import os
import numpy as np
import scanpy as sc
from scipy.sparse import csr_matrix

In [2]:
import liana as li

In [3]:
# load muon and mofax
import muon as mu
import mofax as mofa



In [4]:
from prep_utils import filter_samples, filter_celltypes

## BATCH EFFECT CORRECTION HERE!

### Load data

In [5]:
adata = sc.read_h5ad(os.path.join('data', "carraro.h5ad"))
adata

AnnData object with n_obs × n_vars = 40709 × 31229
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'lab', 'type', 'mutation', 'percent.mt', 'percent.rp', 'major', 'minor', 'ident'
    uns: 'X_name'

In [6]:
groupby = 'major'
sample_key = 'orig.ident'
condition_key = 'type'
batch_key = 'lab'

min_cells_per_sample = 700
sample_zcounts_max = 3
sample_zcounts_min = -2

# set filtering parameters
min_cells = 10 # min number of cells per cell type
min_samples = 5 # min number of samples that pass the threshold per cell type

Filter samples, cell types, and genes

In [10]:
adata = filter_samples(adata, sample_key, condition_key, min_cells_per_sample, sample_zcounts_max, sample_zcounts_min)

      orig.ident
type            
CF             8
CO            14


In [11]:
adata = filter_celltypes(adata=adata, groupby=groupby, sample_key=sample_key, min_cells=min_cells, min_samples=min_samples)

In [12]:
# Remove genes expressed in few cells, normalize
sc.pp.filter_genes(adata, min_cells=30)
adata



AnnData object with n_obs × n_vars = 34274 × 20531
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'lab', 'type', 'mutation', 'percent.mt', 'percent.rp', 'major', 'minor', 'ident'
    var: 'n_cells'
    uns: 'X_name'

### Normalize

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
li.mt.rank_aggregate.by_sample(adata, groupby=groupby, use_raw=False, sample_key=sample_key, verbose=True, n_perms=None)

In [None]:
adata.uns['liana_res']

In [None]:
mdata = li.multi.lrs_to_views(adata,
                              sample_key=sample_key,
                              score_key='magnitude_rank',
                              obs_keys=[condition_key, batch_key], # add those to mdata.obs
                              lr_prop = 0.3, # minimum required proportion of samples to keep an LR
                              lrs_per_sample = 10, # minimum number of interactions to keep a sample in a specific view
                              lrs_per_view = 10, # minimum number of interactions to keep a view
                              samples_per_view = 8, # minimum number of samples to keep a view
                              min_variance = 0, # minimum variance to keep an interaction
                              lr_fill = 0, # fill missing LR values across samples with this
                              verbose=True
                              )

In [None]:
mdata

In [None]:
mu.tl.mofa(mdata,
           use_obs='union',
           convergence_mode='medium',
           outfile='models/mofatalk.h5ad',
           n_factors=5,
           )


In [None]:
# obtain factor scores
factor_scores = li.multi.get_factor_scores(mdata, obsm_key='X_mofa')
factor_scores.head()

In [None]:
factor_scores.sort_values('Factor_1', ascending=False)

Brutal batch effect -> try correction?