In [1]:
import os
import pandas as pd
import numpy as np
import plotnine as p9

In [None]:
# list output files
file_paths = os.listdir(os.path.join('data', 'results'))
# keep only .csv files
results = [pd.read_csv(os.path.join('data', 'results', p)) for p in file_paths if p.endswith('.csv')]

In [None]:
results = pd.concat(results)

In [None]:
results['mean_auc'] = results.groupby(['reduction_name', 'dataset', 'score_key'])['auroc'].transform('mean')
results['mean_f1'] = results.groupby(['reduction_name', 'dataset', 'score_key'])['f1_score'].transform('mean')

In [None]:
results.sort_values(['reduction_name', 'dataset', 'score_key'])

In [None]:
results['score_key'].unique()

remap_dict = {'lr_means':'CellPhoneDB',
              'expr_prod':'Product',
              'lr_logfc': 'logFC',
              'lrscore': 'SingleCellSignalR',
              'lr_probs': 'CellChat',
              'magnitude_rank':'Consensus'}
results['score_key'] = results['score_key'].map(remap_dict)

In [None]:
## plot results as boxplots by reduction_name & score_key
p9.ggplot(results, p9.aes(x='reduction_name', y='auroc', color='score_key')) + \
    p9.geom_boxplot() + \
    p9.theme_bw() + p9.facet_grid(' ~ dataset')

In [None]:
metric = 'mean_auc'

In [None]:
results = results[['reduction_name', 'score_key', 'mean_auc', 'mean_f1', 'dataset']].drop_duplicates()

In [None]:
results['rank'] = results.groupby(['dataset', 'reduction_name'])[metric].rank(ascending=False, method='average').astype('int')

In [None]:
results.head()

In [None]:
# mean rank per score
score_avg = results.groupby(['score_key', 'reduction_name'])[['rank', 'mean_auc', 'mean_f1']].mean()

In [None]:
score_avg

In [None]:
score_avg['dataset'] = "Score Average"

In [None]:
## bind to results
results = pd.concat([results, score_avg.reset_index()])

In [None]:
# if dataset != 'Score Average', then None
results['rank'] = results['rank'].where(results['dataset'] == 'Score Average', None)

In [None]:
results['mean_auc'] = results['mean_auc'].round(2)

In [None]:
# to title

results['dataset'] = results['dataset'].replace({'Score Average': 'Average'})
results['dataset'] = pd.Categorical(results['dataset'], categories=['carraro', 'habermann', 'kuppe', 'velmeshev', 'reichart', 'Average'])
results['dataset'] = results['dataset'].str.title()

In [None]:
results['reduction_name'] = results['reduction_name'].replace({"mofa":"MOFA+", 'tensor':"Tensor-cell2cell"})

In [None]:
## mosaic plot
# order alphabetically
p9.ggplot(results, p9.aes(x='reduction_name', y='score_key', fill='mean_auc')) + \
    p9.geom_tile() + \
    p9.theme_bw(base_size=11) + \
    p9.facet_grid(' ~ dataset') + \
    p9.scale_fill_cmap(limits=(0.5, 1), cmap_name='viridis') + \
    p9.geom_text(p9.aes(label='mean_auc'), size=11, color='white', fontweight='bold') + \
    p9.theme(figure_size=(8, 5), axis_text_x=p9.element_text(angle=90, size=11),
             strip_text_x=p9.element_text(size=11),
            strip_background=p9.element_rect(colour="black", fill="#fdfff4"),
            legend_text=p9.element_text(size=11),
            axis_text_y=p9.element_text(size=11),
             ) + \
    p9.labs(x='', y='', fill='AUROC\n(mean)\n')

In [None]:
dimred_results = results.groupby(['reduction_name', 'score_key']).mean().reset_index()

In [None]:
## scatter plot by reduction_name
p9.ggplot(dimred_results, p9.aes(x='mean_auc', y='mean_f1', color='score_key', shape='reduction_name')) + \
    p9.geom_point(size=5) + \
        p9.theme_bw() + \
        p9.scale_x_continuous(limits=(0, 1)) + \
        p9.scale_y_continuous(limits=(0, 1)) + \
        p9.theme(figure_size=(6, 6))

Reichart Model - Does not look like something I want to include

In [None]:
import numpy as np
import pandas as pd

import scanpy as sc

import plotnine as p9

import liana as li

# load muon and mofax
import muon as mu
import mofax as mofa

import decoupler as dc

In [None]:
adata = sc.read_h5ad(os.path.join('data', 'results', 'reichart_dimred.h5ad'), backed='r')

In [None]:
meta_features = ['Sample','Region_x', 'Primary.Genetic.Diagnosis', 'tissue', 'sex', 'assay', 'development_stage', 'disease']

In [None]:
model = mofa.mofa_model(os.path.join("data", "results", "models", "reichart", "expr_prod.hdf5"))
metadata = adata.obs[meta_features].drop_duplicates().rename(columns={'Sample':'sample', 'disease':'condition'})
model.metadata = model.metadata.merge(metadata, on='sample')

In [None]:
metadata.groupby(["condition",'Primary.Genetic.Diagnosis']).count()

In [None]:
model.get_weights().shape

TODO save mdata

In [None]:
mdata = li.multi.lrs_to_views(adata,
                              sample_key="Sample",
                              score_key="expr_prod",
                              inverse_fun=lambda x: -np.log2(x),
                              lr_prop = 0.33, # minimum required proportion of samples to keep an LR
                              lrs_per_sample = 5, # minimum number of interactions to keep a sample in a specific view
                              lrs_per_view = 15, # minimum number of interactions to keep a view
                              samples_per_view = 5, # minimum number of samples to keep a view
                              min_variance = 0, # minimum variance to keep an interaction
                              lr_fill = 0, # fill missing LR values across samples with this
                              verbose=True
                              )


In [None]:
mdata.varm["LFs"] = model.get_weights()
mdata.obsm["X_mofa"] = model.get_factors()

In [None]:
mdata.obs = model.metadata.set_index('sample')

Check Factors

In [None]:
factor_scores = li.multi.get_factor_scores(mdata, obsm_key='X_mofa')
factor_scores.head()

In [None]:
factor_scores

In [None]:
# scatterplot
(p9.ggplot(factor_scores) +
 p9.aes(x='condition', colour='condition', y='Factor1') +
 p9.geom_violin() +
 p9.geom_jitter(size=4, width=0.2) +
 p9.theme_bw() +
 p9.scale_colour_manual(values=['#1f77b4', '#c20019'])
 )



UMAP

In [None]:
sc.pp.neighbors(mdata, use_rep="X_mofa")
sc.tl.umap(mdata)

In [None]:
sc.tl.umap(mdata, min_dist=.2, spread=1., random_state=10)

In [None]:
# scatter plot between Factor 1 and Factor 2
(p9.ggplot(factor_scores) +
 p9.aes(x='Factor1', y='Factor2', colour='condition', shape='tissue') +
 p9.geom_point(size=4) +
 p9.theme_bw()
 )

In [None]:
mofa.plot_weights_correlation(model)

In [None]:
mofa.plot_factors_covariates_correlation(model, covariates=pd.get_dummies(model.metadata['sex']), pvalues=True)

In [None]:
mofa.plot_factors_covariates_correlation(model, covariates=pd.get_dummies(model.metadata['development_stage']), pvalues=True)

In [None]:
mofa.plot_factors_covariates_correlation(model, covariates=pd.get_dummies(model.metadata['Primary.Genetic.Diagnosis']), pvalues=True)

In [None]:
# get variance explained by view and factor
rsq = model.get_r2()
rsq["R2"] = rsq["R2"] / 100

In [None]:
## tile plot
(p9.ggplot(rsq, p9.aes(x='Factor', y='View', fill='R2')) + 
    p9.geom_tile() + 
    p9.theme_bw() + 
    p9.theme(figure_size=(10, 10)) +
    p9.labs(x='Factor', y='View', fill='R2') +
    p9.theme(axis_text_x=p9.element_text(angle=90, hjust=1)) + 
    # change colour
    p9.scale_fill_cmap(limits=(0, 1), cmap_name='magma')
    
)

In [None]:
# Factor 1
factor1_rsq = rsq[rsq['Factor']=='Factor1']
# separate view column
factor1_rsq[['source', 'target']] = factor1_rsq['View'].str.split('&', 1, expand=True)

In [None]:
(p9.ggplot(factor1_rsq.reset_index()) +
 p9.aes(x='target', y='source') +
 p9.geom_tile(p9.aes(fill='R2')) +
 p9.scale_fill_cmap(limits=(0, 0.5), cmap_name='magma') +
 p9.theme_bw() +
    p9.theme(figure_size=(5, 5)) +
    p9.labs(x='Target groups', y='Source groups', fill='R2') +
    p9.theme(axis_text_x=p9.element_text(rotation=90, hjust=0.5))
 )



In [None]:
variable_loadings =  li.multi.get_variable_loadings(mdata,
                                                    view_separator=':',
                                                    pair_separator="&",
                                                    variable_separator="^") # get loadings for factor 1
variable_loadings.head()



In [None]:
variable_loadings['size'] = 4.5

my_plot = li.pl.dotplot(liana_res = variable_loadings,
                        size='Factor2',
                        colour='Factor1',
                        orderby='Factor1',
                        top_n=20,
                        source_labels=['myeloid cell', 'fibroblast of cardiac tissue', 'endothelial cell'],
                        orderby_ascending=False,
                        size_range=(0.1, 8),
                        figure_size=(14, 10)
                        )
# change colour, with mid as white
my_plot + p9.scale_color_gradient2(low='#1f77b4', mid='lightgray', high='#c20019')

