In [65]:
import scanpy as sc
from scipy.stats import zscore
import pandas as pd
import numpy as np
import decoupler as dc
import pickle

Load anndata and resource

In [21]:
adata = sc.read_h5ad("test.h5ad")

In [22]:
resource = pd.read_csv("consensus.csv", index_col=False)
resource = resource[['source_genesymbol', 'target_genesymbol']]
resource = resource.rename(columns={'source_genesymbol':'ligand',
                                    'target_genesymbol':'receptor'})
resource['interaction'] = resource['ligand'] + '|' + resource['receptor']

# Decomplexify
resource = (resource.set_index('interaction')
            .apply(lambda x: x.str.split('_'))
            .explode(['receptor'])
            .explode('ligand')
            .reset_index()
           )

In [23]:
ligands = np.unique(resource["ligand"])
receptors = np.unique(resource["receptor"])
entities = np.union1d(ligands, receptors)

In [24]:
labels = adata.obs.label.cat.categories

Process adata

In [25]:
adata.layers['counts'] = adata.X

In [102]:
# lognorm should be the default (expected)
adata.X = adata.layers['logcounts']
adata.layers['scaled'] = sc.pp.scale(adata, copy=True).X

In [103]:
# Get global mean for SCA before filtering
global_mean = np.mean(adata.X)

In [106]:
# Filter to only include the relevant genes
adata = adata[:,np.intersect1d(entities, adata.var.index)]

In [107]:
sc.tl.rank_genes_groups(adata, 'label', method='wilcoxon')

  self.data[key] = value
  next(self.gen)


In [108]:
dedict = {label:sc.get.rank_genes_groups_df(adata, label).assign(label=label).sort_values('names') for label in labels}

In [109]:
# check if all is gucci
list(adata.var_names) == list(dedict['a']['names'])

True

In [114]:
for label in labels: 
    temp = adata[adata.obs.label.isin([label])]
    dedict[label]['sums'] = temp.X.sum(0)
    dedict[label]['zscores'] = temp.layers['scaled'].mean(0)
    

In [115]:
dedict['a']

Unnamed: 0,names,scores,logfoldchanges,pvals,pvals_adj,label,sums,zscores
30,A1BG,2.122771,0.357488,0.033773,0.581712,a,106.947174,0.300579
540,A2M,0.000000,-0.025030,1.000000,1.000000,a,103.143761,-0.037149
817,ABCA1,-0.934019,-0.157689,0.350294,0.786390,a,101.881752,-0.143818
557,ACE,-0.084911,-0.019372,0.932332,0.965181,a,107.265755,-0.035601
136,ACKR1,1.443484,0.235205,0.148884,0.682272,a,110.385063,0.217979
...,...,...,...,...,...,...,...,...
928,WNT9A,-1.443484,-0.190739,0.148884,0.682272,a,107.000191,-0.219724
846,XCL1,-1.018930,-0.103294,0.308236,0.764089,a,99.015869,-0.211725
685,YBX1,-0.424554,-0.046266,0.671162,0.918480,a,106.093704,-0.065317
344,ZG16B,0.594376,0.087047,0.552261,0.878517,a,105.754723,0.136715


Join Means

In [None]:
pairs = pd.DataFrame(np.array(np.meshgrid(labels, labels)).reshape(2, np.size(labels) * np.size(labels)).T).rename(columns={0: "source", 1: "target"})

In [None]:
def join_means(source, target):
    source_stats = dedict[source].copy()
    source_stats.columns = source_stats.columns.map(lambda x: 'ligand_' + str(x))
    source_stats = source_stats.rename(columns={'ligand_names':'ligand', 'ligand_label':'source'})
    
    target_stats = dedict[target].copy()
    target_stats.columns = target_stats.columns.map(lambda x: 'receptor_' + str(x))
    target_stats = target_stats.rename(columns={'receptor_names':'receptor', 'receptor_label':'target'})
    
    bound = resource.merge(source_stats).merge(target_stats)
    
    return bound

In [None]:
lr_res = pd.concat([join_means(source, target) for source, target in zip(pairs['source'], pairs['target'])])

logFC re-implement

In [None]:
lr_res['logfc'] = lr_res[['ligand_logfoldchanges', 'receptor_logfoldchanges']].mean(1)

In [None]:
lr_res.sort_values(by='logfc', key=abs, ascending=False)