In [9]:
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix

Load anndata and resource

In [17]:
adata = sc.datasets.pbmc3k()
adata.X

<2700x32738 sparse matrix of type '<class 'numpy.float32'>'
	with 2286884 stored elements in Compressed Sparse Row format>

In [18]:
adata = sc.read_h5ad("../test.h5ad")

In [44]:
adata.X = csr_matrix(adata.X.astype(np.int64))

In [45]:
adata.X

<90x10249 sparse matrix of type '<class 'numpy.int64'>'
	with 103311 stored elements in Compressed Sparse Row format>

In [46]:
np.random.permutation(adata.X)

IndexError: x must be an integer or at least 1-dimensional

In [21]:
resource = pd.read_csv("../consensus.csv", index_col=False)
resource = resource[['source_genesymbol', 'target_genesymbol']]
resource = resource.rename(columns={'source_genesymbol':'ligand',
                                    'target_genesymbol':'receptor'})
resource['interaction'] = resource['ligand'] + '|' + resource['receptor']

# Decomplexify
resource = (resource.set_index('interaction')
            .apply(lambda x: x.str.split('_'))
            .explode(['receptor'])
            .explode('ligand')
            .reset_index()
           )

In [22]:
resource[['ligand_complex','receptor_complex']] = resource['interaction'].str.split('|',expand=True)

In [None]:
ligands = np.unique(resource["ligand"])
receptors = np.unique(resource["receptor"])
entities = np.union1d(ligands, receptors)

In [None]:
labels = adata.obs.label.cat.categories

In [None]:
labels

Process adata

In [None]:
# adata.layers['counts'] = adata.X

In [None]:
# lognorm should be the default (expected)
adata.X = adata.layers['logcounts']
adata.layers['scaled'] = sc.pp.scale(adata, copy=True).X

In [None]:
# Get global mean for SCA before filtering
mat_mean = np.mean(adata.X)

In [None]:
# Filter to only include the relevant genes
adata = adata[:,np.intersect1d(entities, adata.var.index)]

In [None]:
sc.tl.rank_genes_groups(adata, 'label', method='wilcoxon')

In [None]:
dedict = {label:sc.get.rank_genes_groups_df(adata, label).assign(label=label).sort_values('names') for label in labels}

Calculate Mean, Sum and z-scores by group

In [None]:
# check if all is gucci
list(adata.var_names) == list(dedict['B']['names'])

In [None]:
for label in labels: 
    temp = adata[adata.obs.label.isin([label])].copy()
    # dedict[label]['sums'] = temp.X.sum(0)
    dedict[label]['means'] = temp.X.mean(0).A.flatten()
    dedict[label]['zscores'] = temp.layers['scaled'].mean(0)

Join Means

In [None]:
pairs = pd.DataFrame(np.array(np.meshgrid(labels, labels)).reshape(2, np.size(labels) * np.size(labels)).T).rename(columns={0: "source", 1: "target"})

In [None]:
def join_means(source, target):
    source_stats = dedict[source].copy()
    source_stats.columns = source_stats.columns.map(lambda x: 'ligand_' + str(x))
    source_stats = source_stats.rename(columns={'ligand_names':'ligand', 'ligand_label':'source'})
    
    target_stats = dedict[target].copy()
    target_stats.columns = target_stats.columns.map(lambda x: 'receptor_' + str(x))
    target_stats = target_stats.rename(columns={'receptor_names':'receptor', 'receptor_label':'target'})
    
    bound = resource.merge(source_stats).merge(target_stats)
    
    return bound

In [None]:
lr_res = pd.concat([join_means(source, target) for source, target in zip(pairs['source'], pairs['target'])])

In [None]:
lr_res['mat_mean'] = mat_mean

Recomplexify

In [None]:
# def recomplexify(lr_res, grps, complex_cols, complex_policy = 'min'):


# def _reduce_complexes():
    

In [None]:
complex_policy = 'min'

In [None]:
grps = ['source', 'target', 'ligand_complex', 'receptor_complex']

In [None]:
complex_cols = ['ligand_means' , 'receptor_means'] # specific for every method

In [None]:
add_cols = ['mat_mean']

In [None]:
# subset /w only means here - to be extended to all columns
lr_res = lr_res[ grps + complex_cols + add_cols ]

In [None]:
# temp = temp[[x=="INHBA_INHBB" for x in temp['ligand_complex']]].sort_values('receptor_complex')
# temp = temp[[x=="ACVR1B_ACVR2A" for x in temp['receptor_complex']]]

In [None]:
lr_res = lr_res.groupby(grps)

In [None]:
lr_res.obj

In [None]:
# Functions to be used to reduce the complexes
aggs = set([complex_policy, 'min']) # set to remove if both are min

In [None]:
complex_cols

In [None]:
cols_dict = {}

for col in complex_cols:
    cols_dict[col] = lr_res[col].agg(aggs).reset_index().copy().\
    rename(columns={agg:col.split('_')[0] + '_' + agg for agg in aggs})

In [None]:
lr_res = lr_res.obj.copy()

In [None]:
lr_res

In [None]:
for col in complex_cols:
    # left is lr_res /w the actual column name
    left_on = grps + [col]
    # right is the min subunit for that column
    join_key = col.split('_')[0] + '_min' # ligand_min or receptor_min
    right_on = grps + [join_key]
    
    # Here, I join the min value and keep only those rows that match
    lr_res = lr_res.merge(cols_dict[col], left_on=left_on, right_on=right_on).drop(join_key, 1)


In [None]:
lr_res

In [None]:
# More than those in LIANA - why? Duplicates?

Calculate means_sums for NATMI

In [None]:
def _sum_means(lr_res, what, on):
    return lr_res.join(lr_res.groupby(on)[what].sum(), on=on, rsuffix='_sums')

In [None]:
lr_res = _sum_means(lr_res, what='ligand_means', on=['ligand_complex', 'receptor_complex', 'target'])
lr_res = _sum_means(lr_res, what='receptor_means', on=['ligand_complex', 'receptor_complex', 'source'])

In [None]:
lr_res

NATMI fun

In [None]:
def _natmi_score(x):
    lig = (x.ligand_means / x.ligand_means_sums)
    rec = (x.receptor_means / x.receptor_means_sums)
    return  lig * rec

In [None]:
lr_res['edge_specificity'] = lr_res.apply(_natmi_score, axis=1)

In [None]:
lr_res.sort_values('edge_specificity', ascending=False)

SCA re-implement

In [None]:
def _sca_score(x):
    lr_sqrt = np.sqrt(x.ligand_means) * np.sqrt(x.receptor_means)
    return lr_sqrt / (lr_sqrt + x.mat_mean)

In [None]:
lr_res['lrscore'] = lr_res.apply(_sca_score, axis=1)

In [None]:
lr_res.sort_values('lrscore', ascending=False)

In [None]:
lr_res[(lr_res.ligand_complex=='TGFB1') & (lr_res.receptor_complex=='ACVR1_TGFBR1_TGFBR2')].sort_values('lrscore', ascending=False)

CellPhoneDB re-implement

In [None]:
temp = lr_res.iloc[:, 0:6].copy()

In [None]:
temp['lr_mean'] = lr_res[['ligand_means', 'receptor_means']].mean(1)

In [None]:
perms = {}
clusts = {}
ad = adata.copy()

rng = np.random.default_rng(seed=69)

In [None]:
for perm in range(100):
    for label in labels:
        temp.X = rng.permutation(ad.X)
        clusts[label] = temp.X.mean(0)
    perms[perm] = clusts.copy()

In [None]:
perms[perm]

In [None]:
from statsmodels.distributions.empirical_distribution import ECDF

In [None]:
ec = ECDF(perms[perm]['a'])

logFC re-implement

In [None]:
lr_res['logfc'] = lr_res[['ligand_logfoldchanges', 'receptor_logfoldchanges']].mean(1)

In [None]:
lr_res.sort_values(by='logfc', key=abs, ascending=False)

Re-implement Connectome

In [None]:
lr_res['edge_weight'] = lr_res[['ligand_zscores', 'receptor_zscores']].mean(1)

Custom min0 function:


In [None]:
# Importing reduce for 
# rolling computations
from functools import reduce
  
# define a Custom aggregation 
# function for finding total
def mean0(series):
      return reduce(lambda x, y:0  if 0 in (x, y) else (x + y), series)