In [1]:
import pandas as pd
import numpy as np

In [2]:
# load nichenet ligands for signature
gene_sign="NFKB_regulon_combined" #"NFKB_regulon"#"NFKB_regulon" "AP1_regulon"
receiver="ipEMT"
nichenet_ligands = pd.read_csv(f"/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/nichenet/concat_withWu2022/nichenet_microenvall_celltypes/{gene_sign}{receiver}_all_ligands.csv")
print("Nichnet all ligands:", nichenet_ligands.shape)

Nichnet all ligands: (328, 5)


In [3]:
nichenet_ligands.head(5)

Unnamed: 0,test_ligand,auroc,aupr,aupr_corrected,pearson
0,A2M,0.765001,0.049597,0.038425,0.118493
1,ADAM10,0.746327,0.051322,0.04015,0.123336
2,ADAM12,0.740618,0.045573,0.034401,0.109056
3,ADAM15,0.727156,0.038421,0.027249,0.096278
4,ADAM17,0.77156,0.047719,0.036547,0.064549


### Get downloaded version of cellphonedb and check ligands from nichenet are present in cellphonedb database
Downloaded versions will be stored in a user folder under `~/.cpdb/releases`

i have 4.0.0 (`/data/home/hfx941/.cpdb/releases/v4.0.0/data`)

In [4]:
cpdb_genes_all = pd.read_csv("/data/home/hfx941/.cpdb/releases/v4.0.0/data/gene_input_all.csv")

In [5]:
len(np.intersect1d(nichenet_ligands["test_ligand"], cpdb_genes_all["gene_name"]))

328

In [6]:
assert nichenet_ligands.shape[0] == len(np.intersect1d(nichenet_ligands["test_ligand"], cpdb_genes_all["gene_name"]))

### Look at significant ligands now

In [7]:
# load cellphonedb ligand-receptor pairs (significant means)
sign_means = pd.read_csv("/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/cellphonedb3/concat_withWu2022/cpdb3_output/significant_means.txt",
                         sep="\t"
                        )
sign_means.shape

(2048, 1119)

In [8]:
sign_means.head(5)

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,...,pDC|NLRP3 Mac,pDC|Neutrophil,pDC|Pericyte,pDC|Plasma,pDC|Treg,pDC|cDC1,pDC|cDC2,pDC|gdT,pDC|migDC,pDC|pDC
0,CPI-SC076898564,COL21A1_integrin_a10b1_complex,simple:Q96P44,complex:integrin_a10b1_complex,COL21A1,,True,False,False,curated,...,,,,,,,,,,
1,CPI-CS0E2BE2DEE,IL2_receptor_HA_IL2,complex:IL2_receptor_HA,simple:P60568,,IL2,True,True,False,curated,...,,,,,,,,,,
2,CPI-SS004A073E1,NTRK1_NTF3,simple:P04629,simple:P20783,NTRK1,NTF3,True,True,False,curated,...,,,,,,,,,,
3,CPI-SS06C0074BD,CCL21_CCR7,simple:O00585,simple:P32248,CCL21,CCR7,True,False,True,curated,...,,,,,,,,,,
4,CPI-SS045D94D48,C10orf99_SUSD2,simple:Q6UWK7,simple:Q9UGT4,C10orf99,SUSD2,True,False,True,curated,...,,,,,,,,,,


### get ligands from cellphonedb

In [9]:
cpdb_dict = {}
for i in ["a", "b"]:
    # try to get ligands from cellphonedb, not a receptor and gene name is not nan
    df = sign_means[(sign_means[f"receptor_{i}"]==False) & (~sign_means[f"gene_{i}"].isna())][[f"gene_{i}",f"receptor_{i}"]].copy()
    df.drop_duplicates(inplace=True)
    # rename columns
    df.rename(columns={f"gene_{i}": "ligand_cpdb", f"receptor_{i}": "receptor"}, inplace=True)
    
    cpdb_dict[i] = df    

# concat gene_a, gene_b into single df and remove duplicate rows
cpdb_ligands = pd.concat((cpdb_dict[i] for i in cpdb_dict.keys()), axis=0)
cpdb_ligands.drop_duplicates(inplace=True)

In [10]:
cpdb_ligands = pd.concat((cpdb_dict[i] for i in cpdb_dict.keys()), axis=0)
cpdb_ligands.drop_duplicates(inplace=True)

In [11]:
cpdb_ligands

Unnamed: 0,ligand_cpdb,receptor
0,COL21A1,False
3,CCL21,False
4,C10orf99,False
9,RELN,False
18,SHH,False
...,...,...
1939,CNTN1,False
1941,NOV,False
1948,NDP,False
1957,BMP5,False


### select all ligands of nichenet that arre in cellphonedb

In [12]:
import numpy as np
common_elements = np.intersect1d(nichenet_ligands["test_ligand"], cpdb_ligands["ligand_cpdb"])
common_elements

array(['ADM', 'AGRN', 'ALCAM', 'ANGPT1', 'ANGPT2', 'ANXA1', 'APOE', 'APP',
       'AREG', 'C3', 'CADM1', 'CCL5', 'CD1D', 'CD6', 'CD72', 'CD99',
       'CDH2', 'CDH6', 'CEACAM5', 'CEACAM6', 'COL10A1', 'COL11A1',
       'COL12A1', 'COL13A1', 'COL14A1', 'COL15A1', 'COL16A1', 'COL18A1',
       'COL1A1', 'COL1A2', 'COL24A1', 'COL27A1', 'COL3A1', 'COL4A1',
       'COL4A2', 'COL4A5', 'COL5A1', 'COL5A2', 'COL5A3', 'COL6A1',
       'COL6A2', 'COL6A3', 'COL7A1', 'COL8A1', 'COL9A2', 'COPA', 'CRTAM',
       'CXCL10', 'CXCL12', 'CXCL2', 'CXCL3', 'CXCL8', 'DSC2', 'EFNA5',
       'ENTPD1', 'EREG', 'FAM3C', 'FBN1', 'FGF2', 'FGF7', 'FN1', 'GDF15',
       'GPI', 'GRN', 'HGF', 'ICAM4', 'IFNG', 'IGFBP3', 'IL10', 'IL15',
       'IL16', 'IL1A', 'IL1B', 'IL6', 'IL7', 'JAG1', 'JAM3', 'LAMC1',
       'LGALS3', 'LGALS9', 'LIF', 'LRPAP1', 'LTA', 'LTB', 'MDK', 'MIF',
       'MMP2', 'NAMPT', 'NECTIN3', 'OSM', 'PDGFA', 'PDGFB', 'PDGFC',
       'PGF', 'PLAU', 'PROS1', 'PSAP', 'PTN', 'PTPRC', 'RSPO3', 'SELL',
       

In [13]:
print("there are {} common ligands between nichenet and cellphonedb".format(len(common_elements)))

there are 133 common ligands between nichenet and cellphonedb


### select only common ligands between nichenet and cellphonedb

In [14]:
common_df = nichenet_ligands[nichenet_ligands.test_ligand.isin(common_elements)].copy()

In [15]:
common_df.shape

(133, 5)

In [16]:
common_df.head(5)

Unnamed: 0,test_ligand,auroc,aupr,aupr_corrected,pearson
7,ADM,0.718563,0.025019,0.013847,0.004195
8,AGRN,0.749059,0.042037,0.030866,0.085123
9,ALCAM,0.742565,0.045491,0.034319,0.120341
12,ANGPT1,0.745483,0.043491,0.032319,0.061833
13,ANGPT2,0.717812,0.042983,0.031811,0.084994


In [19]:
### this depends on the senders we choose todo to change
if gene_sign == "NFKB_regulon":
    senders = ["SPP1 Mac", "Neutrophil", "IL1B Mac", "NLRP3 Mac", 
               "ECM CAF", "Myofibroblast", 
               "CD8 Tex"] 
    
if gene_sign == "NFKB_regulon_combined":
    senders = ["SPP1 Mac", "Neutrophil", "IL1B Mac", "NLRP3 Mac", 
               "ECM CAF", "Myofibroblast", 
               "CD8 Tex"] 

if gene_sign == "AP1_regulon":
    senders = ["SPP1 Mac", "Neutrophil", "IL1B Mac", "NLRP3 Mac", 
               "ECM CAF", "Myofibroblast", "Pericyte"]

In [20]:
columns2pick = [f"{s}|{receiver}" for s in senders]
columns2pick_rev = [f"{receiver}|{s}" for s in senders]

In [21]:
# select interactions that are significant in cellphonedb which contains ligands that are common between cellphonedb and nichenet
# here look at direction gene_a is ligand from sender cells and gene_b is receptor on the receiver cell
df_lr1 = sign_means.loc[sign_means.gene_a.isin(common_df["test_ligand"]), sign_means.columns.isin(['id_cp_interaction', 'interacting_pair', 'partner_a', 'partner_b',
       'gene_a', 'gene_b', 'secreted', 'receptor_a', 'receptor_b']+columns2pick)].dropna(subset=columns2pick, how="all")
df_lr1.shape

(134, 16)

In [22]:
df_lr1.head(5)

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,CD8 Tex|ipEMT,ECM CAF|ipEMT,IL1B Mac|ipEMT,Myofibroblast|ipEMT,NLRP3 Mac|ipEMT,Neutrophil|ipEMT,SPP1 Mac|ipEMT
47,CPI-SS008FB916C,FGF2_FGFR4,simple:P09038,simple:P22455,FGF2,FGFR4,True,False,True,,,,0.302,,,
88,CPI-SS0EB5364D4,RSPO3_LGR6,simple:Q9BXY4,simple:Q9HBX8,RSPO3,LGR6,True,False,True,,,,0.281,,,
128,CPI-SS05A94DE17,FGF2_FGFR2,simple:P09038,simple:P21802,FGF2,FGFR2,True,False,True,,,,0.368,,,
174,CPI-SS034B442CE,TNF_CELSR2,simple:P01375,simple:Q9HCU4,TNF,CELSR2,True,False,True,,,1.836,,,,
225,CPI-SS0CB4F2FE4,PDGFB_ADGRV1,simple:P01127,simple:Q8WXG9,PDGFB,ADGRV1,True,False,True,,,1.109,,,,


### select interactions that are significant in cellphonedb which contains ligands that are common between cellphonedb and nichenet

In [23]:
# select interactions that are significant in cellphonedb which contains ligands that are common between cellphonedb and nichenet
# here look at direction gene_b is ligand from sender cells and gene_a is receptor on the receiver cell
df_lr2 = sign_means.loc[sign_means.gene_b.isin(common_df["test_ligand"]), sign_means.columns.isin(['id_cp_interaction', 'interacting_pair', 'partner_a', 'partner_b',
       'gene_a', 'gene_b', 'secreted', 'receptor_a', 'receptor_b']+columns2pick_rev)].dropna(subset=columns2pick_rev, how="all")
df_lr2.shape

(62, 16)

In [24]:
df_lr2.head(5)

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,ipEMT|CD8 Tex,ipEMT|ECM CAF,ipEMT|IL1B Mac,ipEMT|Myofibroblast,ipEMT|NLRP3 Mac,ipEMT|Neutrophil,ipEMT|SPP1 Mac
31,CPI-SS0C283076A,WNT11_SFRP2,simple:O96014,simple:Q96HF1,WNT11,SFRP2,True,False,False,,,,0.687,,,
57,CPI-CS016BAF864,FZD5_LRP6_WNT5B,complex:FZD5_LRP6,simple:Q9H1J7,,WNT5B,True,False,False,,,,0.323,,,
67,CPI-CS026456D1C,FZD5_LRP5_WNT5B,complex:FZD5_LRP5,simple:Q9H1J7,,WNT5B,True,False,False,,,,0.323,,,
73,CPI-CS0DCC20D87,FZD3_LRP5_WNT5B,complex:FZD3_LRP5,simple:Q9H1J7,,WNT5B,True,False,False,,,,0.267,,,
74,CPI-CS090834433,FZD3_LRP6_WNT5B,complex:FZD3_LRP6,simple:Q9H1J7,,WNT5B,True,False,False,,,,0.267,,,


### concatenate gene_a and gene_b from these 2 dfs:

In [25]:
genes_common_sign_cpdb = pd.concat([df_lr1["gene_a"].drop_duplicates(), df_lr2["gene_b"].drop_duplicates()]).drop_duplicates()
genes_common_sign_cpdb.shape

(89,)

In [26]:
genes_common_sign_cpdb

47        FGF2
88       RSPO3
174        TNF
225      PDGFB
247       FGF7
        ...   
536     ENTPD1
661      VEGFB
720    NECTIN3
774      SIRPG
797       AREG
Length: 89, dtype: object

In [27]:
ligands_common_not_cpdb = list(set(common_df["test_ligand"]).difference(set(genes_common_sign_cpdb)))
print("There are {} common ligands in cellphonedb that are not significant means".format(len(ligands_common_not_cpdb)))

There are 44 common ligands in cellphonedb that are not significant means


### Not significant becasue not significant orr because interaction receptor-ligand is not present in cellphonedb database?

### To answer this
* Load receptors from nichenet to see that we are not removoing sttuff because receptor is missing
* check means file and find pair of ligands-recerptor that is in nichenet and cellpphonedb but is not significant in cellpphonedb

In [28]:
# load recepptor-ligand network
# select only interactions of ligands that are discarded by cellphonedb
# check if this receptor-ligand pair exists in "means.txt" file of cellphonedb
# if it does it means it wasn't called significant by cellphonedb as it is not present in signficiant means
# so we can discard it

In [29]:
means = pd.read_csv("/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/cellphonedb3/concat_withWu2022/cpdb3_output/means.txt",
                         sep="\t")

lr_nichenet = pd.read_csv(f"/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/nichenet/concat_withWu2022/nichenet_microenvall_celltypes/{gene_sign}{receiver}_nligands{nichenet_ligands.shape[0]}_LRnetwork.csv")

In [30]:
lr_nichenet["interacting_pair"] = lr_nichenet["from"] + "_" + lr_nichenet["to"]
lr_nichenet["interacting_pair_rev"] = lr_nichenet["to"] + "_" + lr_nichenet["from"]

In [31]:
lr_nichenet = lr_nichenet[lr_nichenet["from"].isin(ligands_common_not_cpdb)]

In [32]:
# check means file and find pair of ligands-recerptor that is in nichenet and cellpphonedb but is not significant in cellpphonedb
ligands2discard_df = means[(means.interacting_pair.isin(lr_nichenet["interacting_pair"])) | (means.interacting_pair.isin(lr_nichenet["interacting_pair_rev"]))]
ligands2discard_df

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,...,pDC|NLRP3 Mac,pDC|Neutrophil,pDC|Pericyte,pDC|Plasma,pDC|Treg,pDC|cDC1,pDC|cDC2,pDC|gdT,pDC|migDC,pDC|pDC
1283,CPI-SS084BE3E4B,LTA_TNFRSF1A,simple:P01374,simple:P19438,LTA,TNFRSF1A,True,False,True,curated,...,0.657,1.07,0.52,0.044,0.121,0.278,0.459,0.365,0.202,0.144
1288,CPI-SS07D9A48A2,LTA_TNFRSF1B,simple:P01374,simple:P20333,LTA,TNFRSF1B,True,False,True,curated,...,1.731,3.635,0.054,0.074,2.033,0.183,0.693,0.63,0.72,0.727
1600,CPI-SS03156A825,LTA_LTBR,simple:P01374,simple:P36941,LTA,LTBR,True,False,True,I2D,...,0.307,0.22,0.191,0.037,0.042,0.163,0.235,0.045,0.097,0.13
1602,CPI-SS0110361CB,LTA_TNFRSF14,simple:P01374,simple:Q92956,LTA,TNFRSF14,True,False,True,curated,...,0.609,1.216,0.0,0.24,0.881,0.422,0.533,0.705,0.803,0.459
1933,CPI-SS06CBC985C,LTBR_LTB,simple:P36941,simple:Q06643,LTBR,LTB,False,True,False,curated,...,0.25,1.73,0.123,0.391,16.205,1.139,1.079,1.199,0.793,13.606


In [33]:
ligands2discard = list(ligands2discard_df["gene_a"].drop_duplicates()) + list(ligands2discard_df["gene_b"].drop_duplicates())
print("We have to remove {} genes (some could be receptors so they won't even be in it) from ligands common as they are not significant in cellphonedb and exist in cpdb".format(len(ligands2discard)))

We have to remove 7 genes (some could be receptors so they won't even be in it) from ligands common as they are not significant in cellphonedb and exist in cpdb


In [34]:
ligands2discard

['LTA', 'LTBR', 'TNFRSF1A', 'TNFRSF1B', 'LTBR', 'TNFRSF14', 'LTB']

In [35]:
common_df = common_df[~common_df["test_ligand"].isin(ligands2discard)].copy()

In [36]:
common_df.shape

(131, 5)

In [37]:
common_df

Unnamed: 0,test_ligand,auroc,aupr,aupr_corrected,pearson
7,ADM,0.718563,0.025019,0.013847,0.004195
8,AGRN,0.749059,0.042037,0.030866,0.085123
9,ALCAM,0.742565,0.045491,0.034319,0.120341
12,ANGPT1,0.745483,0.043491,0.032319,0.061833
13,ANGPT2,0.717812,0.042983,0.031811,0.084994
...,...,...,...,...,...
321,VEGFB,0.739103,0.041378,0.030206,0.089802
323,WNT2,0.732386,0.030814,0.019642,0.066308
324,WNT5A,0.757978,0.038743,0.027571,0.079660
325,WNT5B,0.738694,0.031089,0.019917,0.049433


In [38]:
common_df[common_df.test_ligand=="LTB"]

Unnamed: 0,test_ligand,auroc,aupr,aupr_corrected,pearson


In [39]:
common_df.head(60)

Unnamed: 0,test_ligand,auroc,aupr,aupr_corrected,pearson
7,ADM,0.718563,0.025019,0.013847,0.004195
8,AGRN,0.749059,0.042037,0.030866,0.085123
9,ALCAM,0.742565,0.045491,0.034319,0.120341
12,ANGPT1,0.745483,0.043491,0.032319,0.061833
13,ANGPT2,0.717812,0.042983,0.031811,0.084994
17,ANXA1,0.757398,0.060114,0.048942,0.140545
20,APOE,0.751504,0.043288,0.032116,0.083003
21,APP,0.728876,0.029219,0.018047,0.026411
22,AREG,0.73344,0.02788,0.016708,0.021398
30,C3,0.751812,0.049061,0.037889,0.116013


In [40]:
common_df.shape

(131, 5)

---

### write to file

In [41]:
gene_sign

'NFKB_regulon_combined'

In [42]:
from pathlib import Path
DIR2SAVE = Path("/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/nichenet/concat_withWu2022/nichenet_microenvall_celltypes/intersect_cellphonedb/")
DIR2SAVE.mkdir(parents=True, exist_ok=True)

with open(DIR2SAVE.joinpath(f"{gene_sign}{receiver}_common_ligands.txt"), "w") as file:
    for line in common_df["test_ligand"]:
        file.write(f"{line}\n")

#### this file can be used to sort the ligands from nichenet to use for the downstream target gene stuff