In [214]:
import scanpy as sc
from scipy.stats import gmean
import pandas as pd
import numpy as np
import decoupler as dc
import pickle


Load anndata and resource

In [3]:
adata = sc.read_h5ad("../test.h5ad")

In [29]:
resource = pd.read_csv("../consensus.csv", index_col=False)
resource = resource[['source_genesymbol', 'target_genesymbol']]
resource = resource.rename(columns={'source_genesymbol':'ligand',
                                    'target_genesymbol':'receptor'})
resource['interaction'] = resource['ligand'] + '|' + resource['receptor']

# Decomplexify
resource = (resource.set_index('interaction')
            .apply(lambda x: x.str.split('_'))
            .explode(['receptor'])
            .explode('ligand')
            .reset_index()
           )

In [37]:
resource[['ligand_complex','receptor_complex']] = resource['interaction'].str.split('|',expand=True)

In [39]:
ligands = np.unique(resource["ligand"])
receptors = np.unique(resource["receptor"])
entities = np.union1d(ligands, receptors)

In [40]:
labels = adata.obs.label.cat.categories

Index(['a', 'b', 'c'], dtype='object')

Process adata

In [42]:
adata.layers['counts'] = adata.X

In [43]:
# lognorm should be the default (expected)
adata.X = adata.layers['logcounts']
adata.layers['scaled'] = sc.pp.scale(adata, copy=True).X

In [44]:
# Get global mean for SCA before filtering
global_mean = np.mean(adata.X)

In [45]:
# Filter to only include the relevant genes
adata = adata[:,np.intersect1d(entities, adata.var.index)]

In [46]:
sc.tl.rank_genes_groups(adata, 'label', method='wilcoxon')

  self.data[key] = value
  next(self.gen)


In [47]:
dedict = {label:sc.get.rank_genes_groups_df(adata, label).assign(label=label).sort_values('names') for label in labels}

Calculate Mean, Sum and z-scores by group

In [48]:
# check if all is gucci
list(adata.var_names) == list(dedict['a']['names'])

True

In [63]:
for label in labels: 
    temp = adata[adata.obs.label.isin([label])]
    dedict[label]['sums'] = temp.X.sum(0)
    dedict[label]['means'] = temp.X.mean(0)
    dedict[label]['zscores'] = temp.layers['scaled'].mean(0)

Join Means

In [64]:
pairs = pd.DataFrame(np.array(np.meshgrid(labels, labels)).reshape(2, np.size(labels) * np.size(labels)).T).rename(columns={0: "source", 1: "target"})

In [65]:
def join_means(source, target):
    source_stats = dedict[source].copy()
    source_stats.columns = source_stats.columns.map(lambda x: 'ligand_' + str(x))
    source_stats = source_stats.rename(columns={'ligand_names':'ligand', 'ligand_label':'source'})
    
    target_stats = dedict[target].copy()
    target_stats.columns = target_stats.columns.map(lambda x: 'receptor_' + str(x))
    target_stats = target_stats.rename(columns={'receptor_names':'receptor', 'receptor_label':'target'})
    
    bound = resource.merge(source_stats).merge(target_stats)
    
    return bound

In [66]:
lr_res = pd.concat([join_means(source, target) for source, target in zip(pairs['source'], pairs['target'])])

Recomplexify

In [229]:
grps = ['source', 'target', 'ligand_complex', 'receptor_complex']

In [230]:
temp = lr_res[['source', 'target', 'ligand', 'receptor', 'ligand_complex', 'receptor_complex', 'ligand_means', 'receptor_means']]

In [235]:
temp = temp[[x=="INHBA_INHBB" for x in temp['ligand_complex']]].sort_values('receptor_complex')
temp = temp[[x=="ACVR1B_ACVR2A" for x in temp['receptor_complex']]]

In [236]:
temp = temp.groupby(grps)

In [237]:
temp.groups.keys()

dict_keys([('a', 'a', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('a', 'b', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('a', 'c', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('b', 'a', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('b', 'b', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('b', 'c', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('c', 'a', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('c', 'b', 'INHBA_INHBB', 'ACVR1B_ACVR2A'), ('c', 'c', 'INHBA_INHBB', 'ACVR1B_ACVR2A')])

Custom min0 function:


In [238]:
# Importing reduce for 
# rolling computations
from functools import reduce
  
# define a Custom aggregation 
# function for finding total
def mean0(series):
      return reduce(lambda x, y: 0 if 0 in (x, y) else (x + y), series)

In [239]:
from numpy import mean

In [319]:
ligand_temp = temp['ligand_means'].agg([gmean, 'min']).reset_index().copy()

In [320]:
ligand_temp = ligand_temp.rename(columns={'gmean':'ligand_gmean', 'min':'ligand_min'})

In [325]:
ligand_temp

Unnamed: 0,source,target,ligand_complex,receptor_complex,ligand_gmean,ligand_min
0,a,a,INHBA_INHBB,ACVR1B_ACVR2A,3.159712,2.868171
1,a,b,INHBA_INHBB,ACVR1B_ACVR2A,3.159712,2.868171
2,a,c,INHBA_INHBB,ACVR1B_ACVR2A,3.159712,2.868171
3,b,a,INHBA_INHBB,ACVR1B_ACVR2A,3.223372,2.986059
4,b,b,INHBA_INHBB,ACVR1B_ACVR2A,3.223372,2.986059
5,b,c,INHBA_INHBB,ACVR1B_ACVR2A,3.223372,2.986059
6,c,a,INHBA_INHBB,ACVR1B_ACVR2A,3.25405,2.911114
7,c,b,INHBA_INHBB,ACVR1B_ACVR2A,3.25405,2.911114
8,c,c,INHBA_INHBB,ACVR1B_ACVR2A,3.25405,2.911114


In [328]:
receptor_temp = temp['receptor_means'].agg([gmean, 'min']).reset_index().copy()
receptor_temp = receptor_temp.rename(columns={'gmean':'receptor_gmean', 'min':'receptor_min'})
receptor_temp

Unnamed: 0,source,target,ligand_complex,receptor_complex,receptor_gmean,receptor_min
0,a,a,INHBA_INHBB,ACVR1B_ACVR2A,3.353771,3.234042
1,a,b,INHBA_INHBB,ACVR1B_ACVR2A,3.451293,3.367968
2,a,c,INHBA_INHBB,ACVR1B_ACVR2A,3.389094,3.215342
3,b,a,INHBA_INHBB,ACVR1B_ACVR2A,3.353771,3.234042
4,b,b,INHBA_INHBB,ACVR1B_ACVR2A,3.451293,3.367968
5,b,c,INHBA_INHBB,ACVR1B_ACVR2A,3.389094,3.215342
6,c,a,INHBA_INHBB,ACVR1B_ACVR2A,3.353771,3.234042
7,c,b,INHBA_INHBB,ACVR1B_ACVR2A,3.451293,3.367968
8,c,c,INHBA_INHBB,ACVR1B_ACVR2A,3.389094,3.215342


In [330]:
# Merge recomplexify
temp3 = pd.merge(temp.obj, ligand_temp, left_on=grps + ['ligand_means'], right_on=grps + ['ligand_min']).copy()

In [335]:
temp3 = temp3.merge(receptor_temp, left_on=grps + ['receptor_means'], right_on=grps + ['receptor_min']).copy()

In [337]:
temp3.sort_values('receptor_means')
# *_means is the expression of the min subunit, not the mean

Unnamed: 0,source,target,ligand,receptor,ligand_complex,receptor_complex,ligand_means,receptor_means,ligand_gmean,ligand_min,receptor_gmean,receptor_min
1,b,c,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.986059,3.215342,3.223372,2.986059,3.389094,3.215342
4,a,c,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.868171,3.215342,3.159712,2.868171,3.389094,3.215342
8,c,c,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.911114,3.215342,3.25405,2.911114,3.389094,3.215342
0,a,a,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.868171,3.234042,3.159712,2.868171,3.353771,3.234042
2,c,a,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.911114,3.234042,3.25405,2.911114,3.353771,3.234042
7,b,a,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.986059,3.234042,3.223372,2.986059,3.353771,3.234042
3,a,b,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.868171,3.367968,3.159712,2.868171,3.451293,3.367968
5,b,b,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.986059,3.367968,3.223372,2.986059,3.451293,3.367968
6,c,b,INHBA,ACVR2A,INHBA_INHBB,ACVR1B_ACVR2A,2.911114,3.367968,3.25405,2.911114,3.451293,3.367968


logFC re-implement

In [53]:
lr_res['logfc'] = lr_res[['ligand_logfoldchanges', 'receptor_logfoldchanges']].mean(1)

In [54]:
lr_res.sort_values(by='logfc', key=abs, ascending=False)

Unnamed: 0,interaction,ligand,receptor,ligand_complex,receptor_complex,ligand_scores,ligand_logfoldchanges,ligand_pvals,ligand_pvals_adj,source,ligand_sums,ligand_zscores,receptor_scores,receptor_logfoldchanges,receptor_pvals,receptor_pvals_adj,target,receptor_sums,receptor_zscores,logfc
341,GAL|GRM7,GAL,GRM7,GAL,GRM7,2.207681,0.401056,0.027266,0.581712,a,102.429787,0.299327,2.547325,0.502969,0.010855,0.581712,a,113.844604,0.398513,0.452012
336,CXCL1|GRM7,CXCL1,GRM7,CXCL1,GRM7,2.800275,0.392196,0.005106,0.585040,b,127.167938,0.312154,2.547325,0.502969,0.010855,0.581712,a,113.844604,0.398513,0.447582
353,CXCL16|GRM7,CXCL16,GRM7,CXCL16,GRM7,0.988332,0.346034,0.322990,0.685068,b,125.462364,0.227654,2.547325,0.502969,0.010855,0.581712,a,113.844604,0.398513,0.424501
1351,CGA|VIPR1,CGA,VIPR1,CGA,VIPR1,-1.387586,-0.296749,0.165263,0.760820,c,66.520668,-0.265662,-2.122771,-0.533992,0.033773,0.581712,a,100.511772,-0.343919,-0.415370
1355,GHRH|VIPR1,GHRH,VIPR1,GHRH,VIPR1,-1.850114,-0.286289,0.064297,0.760820,c,75.305580,-0.396659,-2.122771,-0.533992,0.033773,0.581712,a,100.511772,-0.343919,-0.410140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,GDF5|ACVR2A_BMPR1B,GDF5,ACVR2A,GDF5,ACVR2A_BMPR1B,0.277517,0.111716,0.781383,0.941618,c,81.831001,0.123509,-0.424554,-0.111612,0.671162,0.918480,a,100.255318,-0.134804,0.000052
937,CLEC2D|KLRB1,CLEC2D,KLRB1,CLEC2D,KLRB1,-1.017563,-0.103031,0.308886,0.829508,c,75.864868,-0.089887,1.018930,0.103119,0.308236,0.764089,a,114.379578,0.131088,0.000044
1067,WNT8B|FZD9_LRP5,WNT8B,FZD9,WNT8B,FZD9_LRP5,-0.092506,-0.031867,0.926296,0.958933,c,80.634926,-0.061871,0.370023,0.031952,0.711365,0.941264,c,79.366142,0.039840,0.000042
1068,WNT8B|FZD9_LRP6,WNT8B,FZD9,WNT8B,FZD9_LRP6,-0.092506,-0.031867,0.926296,0.958933,c,80.634926,-0.061871,0.370023,0.031952,0.711365,0.941264,c,79.366142,0.039840,0.000042
