# Specify the comparison of interest

In [1]:
#Specify which populations to use as source and target populations
#In this example major cell types were used as sourced populations and fibroblast subpopulations as target populations
#Choose between "major", "fib", "epi" (relevant due to naming of input files)
source='major'
target='fib'

#Specify the appendix to use in file names
name_appendix='MajorToFib'

#We performed this analysis individually for the following 9 combinations: 
#"MajorToFib", "MajorToEpi", "MajorToMajor"
#"FibToFib", "FibToEpi", "FibToMajor"
#"EpiToFib", "EpiToEpi", "EpiToMajor"

# Load libraries and custom scripts

In [2]:
#Set path
path='/mnt/c/Users/User/Jacob-Et-Al/Interaction-Analysis'

In [3]:
#Load the required packages
import sys
sys.path.append(path)
import os
from IPython.display import clear_output
import pandas as pd
import numpy as np
import time
import multiprocessing
from functools import partial
import statsmodels.stats.multitest as st

In [4]:
#Choose input and output folder
path_input = os.path.join(path, 'data_input')
path_output = os.path.join(path, 'data_output')

In [5]:
#Define function that can finds receptor-ligand pairs between specified cluster pairs and quantify them
def quantify_LigRec(gene_dict_Lig, gene_dict_Rec, pairs):
    output_df = pd.DataFrame(index = gene_dict_Lig.keys(), columns = gene_dict_Rec.keys())
    output_dict = {}
    output_quant_df = pd.DataFrame(index = gene_dict_Lig.keys(), columns = gene_dict_Rec.keys())
    output_quant_dict = {}
    
    for kLig in gene_dict_Lig.keys():
        #This list comprehension is necessary to include only genes that are also present in the RL pairs dataframe. Otherwise throws an error
        lig_exp = np.intersect1d(pairs.index,gene_dict_Lig[kLig])
        
        for kRec in gene_dict_Rec.keys():
            rec_exp = gene_dict_Rec[kRec]
            rl = []
            for l in lig_exp:
                for r in rec_exp:
                    if np.any(pairs.loc[l] == r):
                        rl.append((l,r))
            
            output_df.loc[kLig,kRec] = rl
            output_dict['%s - %s' % (kLig,kRec)] = rl
            output_quant_df.loc[kLig,kRec] = len(rl)
            output_quant_dict['%s - %s' % (kLig,kRec)] = len(rl)
            
    return output_df, output_dict, output_quant_df, output_quant_dict

In [6]:
#Define function that can simulate receptor-ligand pairs between specified cluster pairs and quantify them
def sim_lig_rec(dict_rec, dict_lig, pairs, repeats):
    if __name__=='__main__':
        output = {}
        pool = multiprocessing.Pool(processes=12)
        for Lig_ix, kLig in enumerate(dict_lig.keys()):
            nLig = len(dict_lig[kLig])
            start_source = time.time()
            for Rec_ix, kRec in enumerate(dict_rec.keys()):
                nRec = len(dict_rec[kRec])
                print('Ligand cluster: {}/{}, Receptor cluster: {}/{}'.format(Lig_ix+1, len(dict_lig.keys()), Rec_ix+1, len(dict_rec.keys())))
                output_tmp = list(pool.map(partial(mapFunction, pairs=pairs, nLig=nLig, nRec=nRec), range(repeats)))
                output['%s - %s' % (kLig,kRec)] = output_tmp
            print('time per source cluster ',time.time()-start_source)
        return output

In [7]:
#Define function that can calculate p-value by comparing observed and simulated receptor-ligand pairs
def sim_lig_reg_get_p(obs, sim):
    output_df = pd.DataFrame()
    for k in sim.keys():
        if k in obs.keys():
            output_df.loc[k.split(' - ')[0], k.split(' - ')[1]] = np.sum(np.array(sim[k])>=obs[k]) / len(sim[k])        
    return output_df

In [8]:
#Define function for running other functions with parallel computing
def mapFunction(repeatIndx, pairs, nLig, nRec):
    gLig = np.random.choice(np.unique(pairs.index), size=nLig, replace=False)
    gRec = np.random.choice(np.unique(pairs.values), size=nRec, replace=False)
    return len(np.intersect1d(pairs.loc[gLig].values, gRec))

# Import receptor-ligand database

In [9]:
#Import receptor-ligand database
#The curated receptor-ligand databases from (Ramilowski et al., 2015) and (Cabello-Aguilar et al., 2020) were combined to obtain an even more complete set of potential interactions
PairsLigRec_mm = pd.read_csv(os.path.join(path_input,'LRdb_combined.csv'), sep = ';', header = 0, index_col=0, low_memory = False, squeeze = True)
print('PairsLigRec_mm')
print(PairsLigRec_mm)

PairsLigRec_mm
             Ligand.ApprovedSymbol_mm Receptor.ApprovedSymbol_mm
A2m-Lrp1                          A2m                       Lrp1
Aanat-Mtnr1a                    Aanat                     Mtnr1a
Aanat-Mtnr1b                    Aanat                     Mtnr1b
Ace-Agtr2                         Ace                      Agtr2
Ace-Bdkrb2                        Ace                     Bdkrb2
...                               ...                        ...
Sbpl-Tlr4                        Sbpl                       Tlr4
Sbp-Tlr5                          Sbp                       Tlr5
Sbpl-Tlr5                        Sbpl                       Tlr5
Sbp-Tlr6                          Sbp                       Tlr6
Sbpl-Tlr6                        Sbpl                       Tlr6

[3424 rows x 2 columns]


In [10]:
#Extract all ligands or receptors, respectively
ligands=PairsLigRec_mm['Ligand.ApprovedSymbol_mm'].tolist()
receptors=PairsLigRec_mm['Receptor.ApprovedSymbol_mm'].tolist()

In [11]:
#Convert the PairsLigRec_mm dataframe to a series where ligands are in the index
Pairs_LigRec_mm_S = pd.Series(data = PairsLigRec_mm['Receptor.ApprovedSymbol_mm'].values, index = PairsLigRec_mm['Ligand.ApprovedSymbol_mm'])
print(Pairs_LigRec_mm_S)

Ligand.ApprovedSymbol_mm
A2m        Lrp1
Aanat    Mtnr1a
Aanat    Mtnr1b
Ace       Agtr2
Ace      Bdkrb2
          ...  
Sbpl       Tlr4
Sbp        Tlr5
Sbpl       Tlr5
Sbp        Tlr6
Sbpl       Tlr6
Length: 3424, dtype: object


# Import Data (source dataset)

In [12]:
#Import pre-computed differentially expressed genes
dge = pd.read_csv(os.path.join(path_input,'Cluster_marker_genes_'+source+'.csv'), sep = ';', header = 0, index_col=0, low_memory = False, squeeze = True)
print('dge-'+source)
print(dge)

dge-major
                  p_val    avg_logFC  pct.1  pct.2 p_val_adj  \
Krt14                 0  2,935752564  0,944  0,049         0   
Krt5                  0  2,658551194  0,986  0,035         0   
Perp                  0  2,582795445  0,992  0,035         0   
Sfn                   0  2,556360688  0,987  0,042         0   
Krt15                 0  2,489261449  0,871  0,027         0   
...                 ...          ...    ...    ...       ...   
Gnl31          5,43E-22  0,263368343  0,905   0,68  1,09E-17   
1700025G04Rik  9,88E-22  0,263833772  0,899  0,681  1,98E-17   
Dkc11          3,43E-20  0,250810614  0,863  0,632  6,87E-16   
Srm            2,86E-19  0,292000724  0,925  0,865  5,72E-15   
Cks2           5,69E-17  0,260401994  0,925  0,773  1,14E-12   

                         cluster           gene  Unnamed: 8  
Krt14                        EPI          Krt14         NaN  
Krt5                         EPI           Krt5         NaN  
Perp                         EPI   

In [13]:
#Identify all ligands among differentially expressed genes of source populations
dge_lig = dge[dge.gene.isin(ligands)]
print('dge_lig-'+source)
print(dge_lig)
print(pd.DataFrame(dge_lig)['cluster'].value_counts())

dge_lig-major
            p_val    avg_logFC  pct.1  pct.2 p_val_adj            cluster  \
Pdgfa           0  1,606844344  0,852  0,153         0                EPI   
Fabp5           0  1,553960334  0,998  0,899         0                EPI   
Anxa1           0    1,2092633  0,534  0,062         0                EPI   
Col18a1         0  1,115434246    0,9  0,147         0                EPI   
Wnt4            0    0,9701761  0,849  0,281         0                EPI   
...           ...          ...    ...    ...       ...                ...   
Vcl      4,92E-39  0,325304116   0,84  0,468  9,83E-35  VESSEL MuralCells   
Arpc52   2,23E-37  0,299580116  0,987  0,902  4,46E-33  VESSEL MuralCells   
Mapk1    2,40E-35  0,287142422  0,954  0,758  4,81E-31  VESSEL MuralCells   
Lamb11   2,30E-31  0,294826335  0,915  0,596  4,61E-27  VESSEL MuralCells   
Tln1     1,26E-25  0,255181998  0,964  0,804  2,52E-21  VESSEL MuralCells   

            gene  Unnamed: 8  
Pdgfa      Pdgfa         NaN  

In [14]:
#Identify all receptors among differentially expressed genes of source populations
dge_rec = dge[dge.gene.isin(receptors)]
print('dge_rec-'+source)
print(dge_rec)
print(pd.DataFrame(dge_rec)['cluster'].value_counts())

dge_rec-major
            p_val    avg_logFC  pct.1  pct.2 p_val_adj            cluster  \
Gjb2            0  1,980937986   0,91  0,026         0                EPI   
Bcam            0  1,665976136  0,892  0,118         0                EPI   
Ifitm3          0  1,565638402  0,848  0,439         0                EPI   
Kremen2         0  1,490523892  0,809  0,034         0                EPI   
Cd9             0  1,421273752  0,959  0,483         0                EPI   
...           ...          ...    ...    ...       ...                ...   
Msn2     3,98E-49  0,337571482  0,931  0,586  7,96E-45  VESSEL MuralCells   
F2r1     8,96E-44  0,339690481   0,82  0,441  1,79E-39  VESSEL MuralCells   
Itgb12   2,19E-39  0,324306268  0,993  0,912  4,38E-35  VESSEL MuralCells   
Axl      6,19E-34  0,289903526  0,781  0,421  1,24E-29  VESSEL MuralCells   
Ifngr2   5,31E-28  0,252876967  0,814  0,509  1,06E-23  VESSEL MuralCells   

            gene  Unnamed: 8  
Gjb2        Gjb2         NaN  

In [15]:
#Use list (dictionary) comprehension to make the necessary ligand and receptor dictionaries
lig_dge_dict = {clust: list(dge_lig[dge_lig['cluster']==clust]['gene'].values.tolist()) for clust in set(dge_lig['cluster'])}
rec_dge_dict = {clust: list(dge_rec[dge_rec['cluster']==clust]['gene'].values.tolist()) for clust in set(dge_rec['cluster'])}
print('lig_dge_dict '+source)
print(lig_dge_dict)
print('rec_dge_dict '+source)
print(rec_dge_dict)

lig_dge_dict major
{'MUSCLE Early': ['Pdgfa', 'Fabp5', 'Hsp90aa1', 'Pdgfc', 'Fgf9', 'Tnc', 'Pkm', 'Tgfb2', 'Hspa8', 'Pdap1', 'Edn3', 'Fyn', 'Hmgb1'], 'NC SchwannCells': ['Npy', 'Cd200', 'Fst', 'Serpine2', 'Timp3', 'Prss23', 'Col18a1', 'Pdgfa', 'L1cam', 'Sema3c', 'Adam23', 'Sorbs1', 'Sema3g', 'Gdnf', 'Sema3b', 'Reln', 'Dhh', 'Nlgn3', 'Pon2', 'Col4a2', 'Sema4c', 'Lama4', 'Hbegf', 'Pros1', 'Adam12', 'Vcan', 'Hspg2', 'Vim', 'Cthrc1', 'Agrn', 'Lamc1', 'Col4a1', 'Bsg', 'Dkk2', 'Col14a1', 'App', 'Lamb1', 'Ngf'], 'IMMU MastCells': ['Hdc', 'Tyrobp', 'Selplg', 'Ccl17', 'Il4', 'Ecm1', 'Cd55', 'Icam4', 'Il6', 'Cd48', 'Itgb2', 'Icam2', 'Il13', 'Tnf', 'Tgfb1', 'Mfng', 'St6gal1', 'Alox5ap', 'Nmb', 'Gpi1', 'Vasp', 'Hsp90aa1', 'B2m', 'Angpt1', 'Arpc5', 'Cd47', 'Mfge8', 'Actr2', 'Hspa8', 'Tln1', 'Hspa1a', 'Calm1', 'Gm11808', 'Cd34', 'Calm3', 'Calm2', 'Pkm'], 'IMMU Macrophages': ['Pf4', 'Apoe', 'C1qb', 'C1qa', 'Tyrobp', 'F13a1', 'Mrc1', 'Lyz2', 'B2m', 'Gas6', 'Ccl24', 'Ly86', 'Lpl', 'Ccl9', 'Alox5ap', 'C

In [16]:
#Show an example of how these dictionaries look like
for i in lig_dge_dict.items():
    print(i, end = '\n\n')

('MUSCLE Early', ['Pdgfa', 'Fabp5', 'Hsp90aa1', 'Pdgfc', 'Fgf9', 'Tnc', 'Pkm', 'Tgfb2', 'Hspa8', 'Pdap1', 'Edn3', 'Fyn', 'Hmgb1'])

('NC SchwannCells', ['Npy', 'Cd200', 'Fst', 'Serpine2', 'Timp3', 'Prss23', 'Col18a1', 'Pdgfa', 'L1cam', 'Sema3c', 'Adam23', 'Sorbs1', 'Sema3g', 'Gdnf', 'Sema3b', 'Reln', 'Dhh', 'Nlgn3', 'Pon2', 'Col4a2', 'Sema4c', 'Lama4', 'Hbegf', 'Pros1', 'Adam12', 'Vcan', 'Hspg2', 'Vim', 'Cthrc1', 'Agrn', 'Lamc1', 'Col4a1', 'Bsg', 'Dkk2', 'Col14a1', 'App', 'Lamb1', 'Ngf'])

('IMMU MastCells', ['Hdc', 'Tyrobp', 'Selplg', 'Ccl17', 'Il4', 'Ecm1', 'Cd55', 'Icam4', 'Il6', 'Cd48', 'Itgb2', 'Icam2', 'Il13', 'Tnf', 'Tgfb1', 'Mfng', 'St6gal1', 'Alox5ap', 'Nmb', 'Gpi1', 'Vasp', 'Hsp90aa1', 'B2m', 'Angpt1', 'Arpc5', 'Cd47', 'Mfge8', 'Actr2', 'Hspa8', 'Tln1', 'Hspa1a', 'Calm1', 'Gm11808', 'Cd34', 'Calm3', 'Calm2', 'Pkm'])

('IMMU Macrophages', ['Pf4', 'Apoe', 'C1qb', 'C1qa', 'Tyrobp', 'F13a1', 'Mrc1', 'Lyz2', 'B2m', 'Gas6', 'Ccl24', 'Ly86', 'Lpl', 'Ccl9', 'Alox5ap', 'Ccl12', 'Trf',

# Import Data (target dataset)

In [17]:
#Import pre-computed differentially expressed genes
dge2 = pd.read_csv(os.path.join(path_input,'Cluster_marker_genes_'+target+'.csv'), sep = ';', header = 0, index_col=0, low_memory = False, squeeze = True)
print('dge-'+target)
print(dge2)

dge-fib
            p_val    avg_logFC  pct.1  pct.2    p_val_adj     cluster    gene  \
Crabp11         0  1,334346774      1  0,908            0  FIB Upper3  Crabp1   
Zeb22           0  1,122377993  0,989  0,496            0  FIB Upper3    Zeb2   
Crip12          0  1,014704042  0,993  0,671            0  FIB Upper3   Crip1   
Calb11          0  0,985096178  0,575  0,105            0  FIB Upper3   Calb1   
Nrgn3           0  0,982753356  0,882   0,39            0  FIB Upper3    Nrgn   
...           ...          ...    ...    ...          ...         ...     ...   
Trim8    1,91E-06  0,276753018  0,493  0,311  0,038245666  FIB LateDC   Trim8   
Kifap3   2,11E-06  0,369271519  0,901  0,832  0,042283423  FIB LateDC  Kifap3   
Taok3    2,13E-06  0,311521159  0,507  0,321  0,042697184  FIB LateDC   Taok3   
Aplp24   2,33E-06  0,259177903  0,803  0,627  0,046578778  FIB LateDC   Aplp2   
Mkrn1    2,47E-06  0,308291238  0,479  0,295  0,049489926  FIB LateDC   Mkrn1   

         Unnamed: 8

In [18]:
#Identify all ligands among differentially expressed genes of target populations
dge_lig2 = dge2[dge2.gene.isin(ligands)]
print ('dge_lig-'+target)
print(dge_lig2)
print(pd.DataFrame(dge_lig2)['cluster'].value_counts())

dge_lig-fib
            p_val    avg_logFC  pct.1  pct.2    p_val_adj     cluster    gene  \
Fbln11          0   0,94755609  0,996  0,885            0  FIB Upper3   Fbln1   
Penk2           0  0,744732756  0,672  0,245            0  FIB Upper3    Penk   
Sema6d1         0  0,722426292  0,748  0,162            0  FIB Upper3  Sema6d   
Lum3            0  0,663843352  0,997  0,923            0  FIB Upper3     Lum   
Col6a32         0  0,658108239  0,962  0,568            0  FIB Upper3  Col6a3   
...           ...          ...    ...    ...          ...         ...     ...   
Podxl2   1,14E-11  0,457469868  0,803  0,581     2,28E-07  FIB LateDC  Podxl2   
Gnai2    3,94E-10   0,35265238  0,986  0,981     7,88E-06  FIB LateDC   Gnai2   
Ntf3     6,57E-08  0,523412549   0,38  0,178  0,001314822  FIB LateDC    Ntf3   
Rtn41    2,80E-07  0,268560345      1  0,974  0,005592672  FIB LateDC    Rtn4   
Sema3d4  4,84E-07  0,278078938  0,465  0,225  0,009671824  FIB LateDC  Sema3d   

         Unname

In [19]:
#Identify all receptors among differentially expressed genes of target populations
dge_rec2 = dge2[dge2.gene.isin(receptors)]
print ('dge_rec-'+target)
print(dge_rec2)
print(pd.DataFrame(dge_rec2)['cluster'].value_counts())

dge_rec-fib
            p_val    avg_logFC  pct.1  pct.2    p_val_adj     cluster  \
Nrp13           0  0,827260024  0,995  0,729            0  FIB Upper3   
Pth1r2          0  0,757167775  0,994  0,847            0  FIB Upper3   
Thy12           0  0,724891138  0,851  0,367            0  FIB Upper3   
Robo21          0  0,666356631  0,891  0,515            0  FIB Upper3   
Plxna4          0  0,573241554  0,645  0,145            0  FIB Upper3   
...           ...          ...    ...    ...          ...         ...   
Ror2     8,03E-09  0,417689559   0,62    0,4  0,000160644  FIB LateDC   
Adipor1  1,02E-07  0,304764728  0,817  0,655  0,002031646  FIB LateDC   
Ifngr2   3,93E-07  0,497567386  0,648  0,513  0,007855654  FIB LateDC   
Smad3    1,84E-06  0,317157794  0,408   0,22  0,036842181  FIB LateDC   
Aplp24   2,33E-06  0,259177903  0,803  0,627  0,046578778  FIB LateDC   

            gene  Unnamed: 8  
Nrp13       Nrp1         NaN  
Pth1r2     Pth1r         NaN  
Thy12       Thy1  

In [20]:
#Use list (dictionary) comprehension to make the necessary ligand and receptor dictionaries
lig_dge_dict2 = {clust: list(dge_lig2[dge_lig2['cluster']==clust]['gene'].values.tolist()) for clust in set(dge_lig2['cluster'])}
rec_dge_dict2 = {clust: list(dge_rec2[dge_rec2['cluster']==clust]['gene'].values.tolist()) for clust in set(dge_rec2['cluster'])}
print('lig_dge_dict '+target)
print(lig_dge_dict2)
print('rec_dge_dict '+target)
print(rec_dge_dict2)

lig_dge_dict fib
{'FIB Inter1': ['Sema3c', 'Efemp1', 'Apod', 'Prss23', 'Edil3', 'Nrg1', 'Cd55', 'Adm', 'Fgf12', 'B2m', 'Col1a1', 'Sema3a', 'Fbn1', 'Col3a1', 'Rarres2', 'Col1a2', 'Dlk1', 'Sema3d', 'Sfrp1', 'Nid2', 'Wnt2', 'Col5a1', 'Igf2', 'Tac1', 'Dcn', 'Cd34', 'Igfbp4', 'Spon2', 'Dkk2', 'Fgf10', 'Col5a2', 'Timp2', 'Fn1', 'Thbs1', 'Efnb2', 'Kitl', 'Ptn', 'Serping1', 'Gpc3', 'Nov', 'Mfap5', 'Lgals1', 'Cthrc1', 'Tnfsf12', 'Plat', 'Angptl1', 'Fbln2', 'Efemp2', 'Sptan1', 'Apoe', 'Cyr61', 'Nid1'], 'FIB Muscle2': ['Dlk1', 'Nppc', 'Angptl1', 'Igf2', 'Col14a1', 'Timp3', 'Igfbp4', 'Thbs2', 'Gnas', 'Col3a1', 'Col1a1', 'Col1a2', 'Rarres2', 'Ptn', 'Nid2', 'Col6a1', 'Igf1', 'Col6a3', 'Dcn', 'Kitl', 'Col5a1', 'Lamb1', 'Col6a2', 'Gdf10', 'Plat', 'Cxcl12', 'Gas6', 'Lama4', 'B2m', 'Fgf7', 'Vcan', 'Ntn1', 'Col5a2', 'Hspg2', 'Fn1', 'Sfrp1'], 'FIB Upper4': ['Penk', 'Fbln1', 'Serpinc1', 'Col6a3', 'Sema6d', 'Col7a1', 'Fgf13', 'Sema3d', 'Lum', 'Vcam1', 'Col6a1', 'Col6a2', 'Bgn', 'Dcn', 'Hspa8', 'Cd47', 'Efna

# Determine Receptor-Ligand-Pairs (observed)

In [21]:
#Determine the (number of) observed receptor-ligand pairs for each cluster pair
ligrec_dge_df, ligrec_dge_dict, quant_ligrec_dge_df, quant_ligrec_dge_dict = quantify_LigRec(lig_dge_dict, 
                                                                                             rec_dge_dict2, 
                                                                                             Pairs_LigRec_mm_S)
print('ligrec_dge_df')
print(ligrec_dge_df)

ligrec_dge_df
                                                            FIB Inter1  \
MUSCLE Early                                             [(Pkm, Cd44)]   
NC SchwannCells      [(Col14a1, Cd44), (Hbegf, Cd44), (Reln, Vldlr)...   
IMMU MastCells                             [(Il6, Il6st), (Pkm, Cd44)]   
IMMU Macrophages     [(Apoc1, Vldlr), (Apoe, Vldlr), (Ebi3, Il6st),...   
NC Melanocytes       [(Apoe, Vldlr), (Nppb, Npr3), (Pkm, Cd44), (Vi...   
VESSEL BECs          [(Efna1, Epha7), (Gnai2, Agtr2), (Hbegf, Cd44)...   
EPI                  [(Apoc1, Vldlr), (Efna1, Epha7), (Ntf5, Ntrk2)...   
FIB                  [(Bdnf, Ntrk2), (Col14a1, Cd44), (Col1a1, Cd44...   
MUSCLE Mid                                               [(Pkm, Cd44)]   
VESSEL LECs          [(Efna1, Epha7), (Gnai2, Agtr2), (Reln, Vldlr)...   
IMMU DendriticCells  [(Apoc1, Vldlr), (Ebi3, Il6st), (Gnai2, Agtr2)...   
VESSEL MuralCells                        [(Timp3, Agtr2), (Vim, Cd44)]   
MUSCLE Late             

In [22]:
#Save output
ligrec_dge_df.to_csv(os.path.join(path_output,'ligrec_dge_df_'+name_appendix+'.csv'))
print('ligrec_dge_dict')
print(ligrec_dge_dict)
print('quant_ligrec_dge_df')
print(quant_ligrec_dge_df)

ligrec_dge_dict
{'MUSCLE Early - FIB Inter1': [('Pkm', 'Cd44')], 'MUSCLE Early - FIB Muscle2': [], 'MUSCLE Early - FIB Upper4': [('Fgf9', 'Fgfr1'), ('Fyn', 'Thy1')], 'MUSCLE Early - FIB Upper3': [('Edn3', 'Ednra'), ('Fgf9', 'Fgfr1'), ('Fyn', 'Thy1'), ('Hmgb1', 'Sdc1'), ('Tnc', 'Nt5e'), ('Tnc', 'Sdc1')], 'MUSCLE Early - CHOND': [('Fgf9', 'Fgfr3'), ('Fgf9', 'Fgfr2'), ('Hsp90aa1', 'Fgfr3'), ('Tnc', 'Itga9')], 'MUSCLE Early - FIB Origin5': [], 'MUSCLE Early - FIB Upper2': [('Fyn', 'Thy1'), ('Tnc', 'Nt5e')], 'MUSCLE Early - FIB Deep1': [('Edn3', 'Ednra'), ('Tnc', 'Itga8')], 'MUSCLE Early - FIB Lower': [('Fyn', 'Thy1'), ('Pdap1', 'Pdgfrb'), ('Pdgfa', 'Pdgfrb'), ('Pdgfc', 'Pdgfrb'), ('Tnc', 'Nt5e')], 'MUSCLE Early - FIB Origin4': [], 'MUSCLE Early - FIB Deep2': [('Pkm', 'Cd44')], 'MUSCLE Early - FIB Deep3': [], 'MUSCLE Early - FIB Muscle1': [('Tgfb2', 'Tgfbr2'), ('Tnc', 'Nt5e')], 'MUSCLE Early - FIB Origin2': [('Pdgfa', 'Pdgfra'), ('Pdgfc', 'Pdgfra')], 'MUSCLE Early - FIB LateDC': [('Edn3', '

In [23]:
#Save output
quant_ligrec_dge_df.to_csv(os.path.join(path_output,'quant_ligrec_dge_df_'+name_appendix+'.csv'))
print('quant_ligrec_dge_dict')
print(quant_ligrec_dge_dict)

quant_ligrec_dge_dict
{'MUSCLE Early - FIB Inter1': 1, 'MUSCLE Early - FIB Muscle2': 0, 'MUSCLE Early - FIB Upper4': 2, 'MUSCLE Early - FIB Upper3': 6, 'MUSCLE Early - CHOND': 4, 'MUSCLE Early - FIB Origin5': 0, 'MUSCLE Early - FIB Upper2': 2, 'MUSCLE Early - FIB Deep1': 2, 'MUSCLE Early - FIB Lower': 5, 'MUSCLE Early - FIB Origin4': 0, 'MUSCLE Early - FIB Deep2': 1, 'MUSCLE Early - FIB Deep3': 0, 'MUSCLE Early - FIB Muscle1': 2, 'MUSCLE Early - FIB Origin2': 2, 'MUSCLE Early - FIB LateDC': 11, 'MUSCLE Early - FIB Inter2': 2, 'MUSCLE Early - FIB Origin3': 0, 'MUSCLE Early - FIB EarlyDC': 5, 'MUSCLE Early - FIB Origin6': 0, 'MUSCLE Early - FIB Origin1': 0, 'MUSCLE Early - FIB Inter3': 1, 'MUSCLE Early - FIB Upper1': 0, 'NC SchwannCells - FIB Inter1': 6, 'NC SchwannCells - FIB Muscle2': 8, 'NC SchwannCells - FIB Upper4': 3, 'NC SchwannCells - FIB Upper3': 9, 'NC SchwannCells - CHOND': 5, 'NC SchwannCells - FIB Origin5': 0, 'NC SchwannCells - FIB Upper2': 5, 'NC SchwannCells - FIB Deep1':

ave for visualization in R

In [24]:
#Save output
quant_ligrec_dge_dict_df = pd.DataFrame.from_dict(quant_ligrec_dge_dict,orient='index')
print('quant_ligrec_dge_dict_df')
print(quant_ligrec_dge_dict_df)
quant_ligrec_dge_dict_df.to_csv(os.path.join(path_output,'quant_ligrec_dge_dict_'+name_appendix+'.csv'))

quant_ligrec_dge_dict_df
                            0
MUSCLE Early - FIB Inter1   1
MUSCLE Early - FIB Muscle2  0
MUSCLE Early - FIB Upper4   2
MUSCLE Early - FIB Upper3   6
MUSCLE Early - CHOND        4
...                        ..
MUSCLE Late - FIB EarlyDC   3
MUSCLE Late - FIB Origin6   0
MUSCLE Late - FIB Origin1   1
MUSCLE Late - FIB Inter3    1
MUSCLE Late - FIB Upper1    1

[286 rows x 1 columns]


# Determine Receptor-Ligand-Pairs (simulated)

In [25]:
#To test for the enrichment of receptor-ligand pairs between two populations, 
#the observed number of receptor-ligand pairs was compared to the number of pairs obtained 
#from an equally sized randomly sampled pool of receptors and ligands. 
#For each cluster pair, this simulation was repeated 10000 times
sim_lig_rec_dge = sim_lig_rec(rec_dge_dict2, lig_dge_dict, Pairs_LigRec_mm_S, repeats = 10000)

Ligand cluster: 1/13, Receptor cluster: 1/22
Ligand cluster: 1/13, Receptor cluster: 2/22
Ligand cluster: 1/13, Receptor cluster: 3/22
Ligand cluster: 1/13, Receptor cluster: 4/22
Ligand cluster: 1/13, Receptor cluster: 5/22
Ligand cluster: 1/13, Receptor cluster: 6/22
Ligand cluster: 1/13, Receptor cluster: 7/22
Ligand cluster: 1/13, Receptor cluster: 8/22
Ligand cluster: 1/13, Receptor cluster: 9/22
Ligand cluster: 1/13, Receptor cluster: 10/22
Ligand cluster: 1/13, Receptor cluster: 11/22
Ligand cluster: 1/13, Receptor cluster: 12/22
Ligand cluster: 1/13, Receptor cluster: 13/22
Ligand cluster: 1/13, Receptor cluster: 14/22
Ligand cluster: 1/13, Receptor cluster: 15/22
Ligand cluster: 1/13, Receptor cluster: 16/22
Ligand cluster: 1/13, Receptor cluster: 17/22
Ligand cluster: 1/13, Receptor cluster: 18/22
Ligand cluster: 1/13, Receptor cluster: 19/22
Ligand cluster: 1/13, Receptor cluster: 20/22
Ligand cluster: 1/13, Receptor cluster: 21/22
Ligand cluster: 1/13, Receptor cluster: 22/

Ligand cluster: 8/13, Receptor cluster: 21/22
Ligand cluster: 8/13, Receptor cluster: 22/22
time per source cluster  198.0174422264099
Ligand cluster: 9/13, Receptor cluster: 1/22
Ligand cluster: 9/13, Receptor cluster: 2/22
Ligand cluster: 9/13, Receptor cluster: 3/22
Ligand cluster: 9/13, Receptor cluster: 4/22
Ligand cluster: 9/13, Receptor cluster: 5/22
Ligand cluster: 9/13, Receptor cluster: 6/22
Ligand cluster: 9/13, Receptor cluster: 7/22
Ligand cluster: 9/13, Receptor cluster: 8/22
Ligand cluster: 9/13, Receptor cluster: 9/22
Ligand cluster: 9/13, Receptor cluster: 10/22
Ligand cluster: 9/13, Receptor cluster: 11/22
Ligand cluster: 9/13, Receptor cluster: 12/22
Ligand cluster: 9/13, Receptor cluster: 13/22
Ligand cluster: 9/13, Receptor cluster: 14/22
Ligand cluster: 9/13, Receptor cluster: 15/22
Ligand cluster: 9/13, Receptor cluster: 16/22
Ligand cluster: 9/13, Receptor cluster: 17/22
Ligand cluster: 9/13, Receptor cluster: 18/22
Ligand cluster: 9/13, Receptor cluster: 19/22


# Compare observed and simulated R-L-pairs to determine p-value (uncorrected and corrected)

In [26]:
quant_sim_lig_rec_dge = sim_lig_reg_get_p(quant_ligrec_dge_dict, sim_lig_rec_dge)
print('quant_sim_lig_rec_dge')
print(quant_sim_lig_rec_dge)

quant_sim_lig_rec_dge
                     FIB Inter1  FIB Muscle2  FIB Upper4  FIB Upper3   CHOND  \
MUSCLE Early             0.6321       1.0000      0.1478      0.0026  0.0410   
NC SchwannCells          0.0276       0.0032      0.2211      0.0061  0.1933   
IMMU MastCells           0.7177       1.0000      0.4961      0.8615  0.9641   
IMMU Macrophages         0.1206       0.9199      0.9239      0.3159  0.0182   
NC Melanocytes           0.1074       0.8805      0.3592      0.4735  0.6824   
VESSEL BECs              0.0084       0.1507      0.1621      0.0009  0.3974   
EPI                      0.0198       0.0344      1.0000      0.0399  0.0021   
FIB                      0.0000       0.0019      0.0010      0.0000  0.0000   
MUSCLE Mid               0.6926       0.7400      0.1873      0.0864  0.7798   
VESSEL LECs              0.1154       0.0695      0.0877      0.0080  0.0141   
IMMU DendriticCells      0.1607       0.9754      0.5973      0.2252  0.5305   
VESSEL MuralCells 

In [27]:
#Save output
quant_sim_lig_rec_dge.to_csv(os.path.join(path_output,'quant_sim_lig_rec_dge_'+name_appendix+'.csv'))

In [28]:
#Correct for multiple testing (Benjamini-Hochberg-corrected p-values)
_,adj = st.fdrcorrection(quant_sim_lig_rec_dge.values.ravel(), alpha=0.05, method='indep', is_sorted=False)
adj=np.split(adj,len(quant_sim_lig_rec_dge.index))
adj=pd.DataFrame(adj,index=quant_sim_lig_rec_dge.index,columns=quant_sim_lig_rec_dge.columns)
print(adj)

                     FIB Inter1  FIB Muscle2  FIB Upper4  FIB Upper3  \
MUSCLE Early           0.890545     1.000000    0.344335    0.014580   
NC SchwannCells        0.099919     0.017268    0.436101    0.030607   
IMMU MastCells         0.956379     1.000000    0.750712    1.000000   
IMMU Macrophages       0.302558     1.000000    1.000000    0.541859   
NC Melanocytes         0.284411     1.000000    0.590409    0.732005   
VESSEL BECs            0.036833     0.344335    0.356620    0.006278   
EPI                    0.076524     0.115715    1.000000    0.127429   
FIB                    0.000000     0.011321    0.006810    0.000000   
MUSCLE Mid             0.936245     0.975300    0.391006    0.244072   
VESSEL LECs            0.292074     0.207052    0.244072    0.036317   
IMMU DendriticCells    0.356281     1.000000    0.854139    0.438144   
VESSEL MuralCells      0.929364     0.736871    0.171600    0.890806   
MUSCLE Late            1.000000     0.985729    0.424432    0.27

In [29]:
#Save output
adj.to_csv(os.path.join(path_output,'quant_sim_lig_rec_dge_FDRcorrected_'+name_appendix+'.csv'))