In [1]:
import scanpy as sc
from scipy.stats import zscore
import pandas as pd
import numpy as np
import decoupler as dc
import pickle

Load anndata and resource

In [3]:
adata = sc.read_h5ad("../test.h5ad")

In [5]:
resource = pd.read_csv("../consensus.csv", index_col=False)
resource = resource[['source_genesymbol', 'target_genesymbol']]
resource = resource.rename(columns={'source_genesymbol':'ligand',
                                    'target_genesymbol':'receptor'})
resource['interaction'] = resource['ligand'] + '|' + resource['receptor']

# Decomplexify
resource = (resource.set_index('interaction')
            .apply(lambda x: x.str.split('_'))
            .explode(['receptor'])
            .explode('ligand')
            .reset_index()
           )

In [6]:
ligands = np.unique(resource["ligand"])
receptors = np.unique(resource["receptor"])
entities = np.union1d(ligands, receptors)

In [7]:
labels = adata.obs.label.cat.categories

Process adata

In [8]:
adata.layers['counts'] = adata.X

In [9]:
# lognorm should be the default (expected)
adata.X = adata.layers['logcounts']
adata.layers['scaled'] = sc.pp.scale(adata, copy=True).X

In [10]:
# Get global mean for SCA before filtering
global_mean = np.mean(adata.X)

In [11]:
# Filter to only include the relevant genes
adata = adata[:,np.intersect1d(entities, adata.var.index)]

In [12]:
sc.tl.rank_genes_groups(adata, 'label', method='wilcoxon')

  self.data[key] = value
  next(self.gen)


In [13]:
dedict = {label:sc.get.rank_genes_groups_df(adata, label).assign(label=label).sort_values('names') for label in labels}

Calculate Sum and z-scores by group

In [17]:
# check if all is gucci
list(adata.var_names) == list(dedict['a']['names'])

True

In [18]:
for label in labels: 
    temp = adata[adata.obs.label.isin([label])]
    dedict[label]['sums'] = temp.X.sum(0)
    dedict[label]['zscores'] = temp.layers['scaled'].mean(0)

Join Means

In [19]:
pairs = pd.DataFrame(np.array(np.meshgrid(labels, labels)).reshape(2, np.size(labels) * np.size(labels)).T).rename(columns={0: "source", 1: "target"})

In [20]:
def join_means(source, target):
    source_stats = dedict[source].copy()
    source_stats.columns = source_stats.columns.map(lambda x: 'ligand_' + str(x))
    source_stats = source_stats.rename(columns={'ligand_names':'ligand', 'ligand_label':'source'})
    
    target_stats = dedict[target].copy()
    target_stats.columns = target_stats.columns.map(lambda x: 'receptor_' + str(x))
    target_stats = target_stats.rename(columns={'receptor_names':'receptor', 'receptor_label':'target'})
    
    bound = resource.merge(source_stats).merge(target_stats)
    
    return bound

In [21]:
lr_res = pd.concat([join_means(source, target) for source, target in zip(pairs['source'], pairs['target'])])

Recomplexify

logFC re-implement

In [22]:
lr_res['logfc'] = lr_res[['ligand_logfoldchanges', 'receptor_logfoldchanges']].mean(1)

In [23]:
lr_res.sort_values(by='logfc', key=abs, ascending=False)

Unnamed: 0,interaction,ligand,receptor,ligand_scores,ligand_logfoldchanges,ligand_pvals,ligand_pvals_adj,source,ligand_sums,ligand_zscores,receptor_scores,receptor_logfoldchanges,receptor_pvals,receptor_pvals_adj,target,receptor_sums,receptor_zscores,logfc
341,GAL|GRM7,GAL,GRM7,2.207681,0.401056,0.027266,0.581712,a,102.429787,0.299327,2.547325,0.502969,0.010855,0.581712,a,113.844604,0.398513,0.452012
336,CXCL1|GRM7,CXCL1,GRM7,2.800275,0.392196,0.005106,0.585040,b,127.167938,0.312154,2.547325,0.502969,0.010855,0.581712,a,113.844604,0.398513,0.447582
353,CXCL16|GRM7,CXCL16,GRM7,0.988332,0.346034,0.322990,0.685068,b,125.462364,0.227654,2.547325,0.502969,0.010855,0.581712,a,113.844604,0.398513,0.424501
1351,CGA|VIPR1,CGA,VIPR1,-1.387586,-0.296749,0.165263,0.760820,c,66.520668,-0.265662,-2.122771,-0.533992,0.033773,0.581712,a,100.511772,-0.343919,-0.415370
1355,GHRH|VIPR1,GHRH,VIPR1,-1.850114,-0.286289,0.064297,0.760820,c,75.305580,-0.396659,-2.122771,-0.533992,0.033773,0.581712,a,100.511772,-0.343919,-0.410140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,GDF5|ACVR2A_BMPR1B,GDF5,ACVR2A,0.277517,0.111716,0.781383,0.941618,c,81.831001,0.123509,-0.424554,-0.111612,0.671162,0.918480,a,100.255318,-0.134804,0.000052
937,CLEC2D|KLRB1,CLEC2D,KLRB1,-1.017563,-0.103031,0.308886,0.829508,c,75.864868,-0.089887,1.018930,0.103119,0.308236,0.764089,a,114.379578,0.131088,0.000044
1067,WNT8B|FZD9_LRP5,WNT8B,FZD9,-0.092506,-0.031867,0.926296,0.958933,c,80.634926,-0.061871,0.370023,0.031952,0.711365,0.941264,c,79.366142,0.039840,0.000042
1068,WNT8B|FZD9_LRP6,WNT8B,FZD9,-0.092506,-0.031867,0.926296,0.958933,c,80.634926,-0.061871,0.370023,0.031952,0.711365,0.941264,c,79.366142,0.039840,0.000042
