In [1]:
library(tidyverse)
library(dplyr)
library(VennDiagram)
library(ggplot2)
library(utils)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.0      [32m✔[39m [34mpurrr  [39m 0.3.5 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.5.0 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: grid

Loading required package: futile.logger



# Comparing with the previous version of the database

In [2]:
# here is the previous version
previous_db <- read.csv("L_R_Database_Maria.csv")

In [3]:
# here is the new version
complete <- read.csv("LR_database.csv")

we first compare gene spaces between these two DBs

In [4]:
tiff("1st_gene_space.tiff", units="in", width=5, height=5, res=300)

new_gene_space <- unique(c(complete$Ligand,complete$Receptor))
previous_gene_space <- unique(c(previous_db$Ligand,previous_db$Receptor))

Venn_plot <- draw.pairwise.venn(length(new_gene_space), 
    length(previous_gene_space), 
    length(intersect(new_gene_space,
    previous_gene_space)), 
    category = c("New Gene Space", "Previous Gene Space"), 
    lty = rep("blank", 2), fill = c("light blue", "red"), alpha = rep(0.4, 2), 
    cat.pos = c(0, 0), cat.dist = rep(0.025, 2), cex= 1.5, cat.cex=1.5, 
    verbose = FALSE)
grid.draw(Venn_plot)
dev.off()


In [5]:
setdiff(previous_gene_space, new_gene_space)

The difference is due to the use of different gene symbols. Some of the gene symbols used in the previous database may not be recognized as official or approved gene symbols.

See example below

In [6]:
# In the previous version of the database, the gene symbol LPHN2 was used, 
# which is not an approved gene symbol
filter(previous_db, Receptor=="LPHN2")

Pair.Name,Ligand,Ligand.Name,Receptor,Receptor.Name,Classification
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
SHANK1_LPHN2,SHANK1,SH3 and multiple ankyrin repeat domains 1,LPHN2,latrophilin 2,other


In [7]:
# The approved gene symbol for LPHN2 is ADGRL2. In the current version of the database, 
# we can see that this gene is included and represented using its approved gene symbol
filter(complete, Receptor=="ADGRL2")

X,Pair.Name,Ligand,Ligand.Name,Receptor,Receptor.Name,complex_pair,partner_a,partner_b,source,⋯,consensus_stimulation,consensus_inhibition,sources,references,curation_effort,n_references,n_resources,annotation_strategy,db,dup
<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<int>,<chr>,<chr>,<int>,<int>,<int>,<chr>,<chr>,<chr>
5735,SHANK1_ADGRL2,SHANK1,SH3 and multiple ankyrin repeat domains 1,ADGRL2,adhesion G protein-coupled receptor L2,,Q9Y566,O95490,Q9Y566,⋯,0,0,CellTalkDB;DOMINO;Fantom5_LRdb;HPRD;HPRD_LRdb;HPRD_talklr;LRdb;Ramilowski2015;iTALK;talklr,DOMINO:10964907;HPRD:10964907,2,1,7,LR,LR,ADGRL2_SHANK1
5737,SHANK2_ADGRL2,SHANK2,SH3 and multiple ankyrin repeat domains 2,ADGRL2,adhesion G protein-coupled receptor L2,,Q9UPX8,O95490,Q9UPX8,⋯,0,0,Cellinker;HPRD,Cellinker:10964907;HPRD:10964907,2,1,2,LR,LR,ADGRL2_SHANK2


In [8]:
# Convert the non-approved gene symbols to their approved counterparts

In [9]:
difference <- as.data.frame(setdiff(previous_gene_space, new_gene_space))

In [10]:
colnames(difference) = "previous_name"

In [23]:
difference

previous_name,new
<chr>,<chr>
AREGB,AREG
C1orf200,PIK3CD-AS1
C4B,C4B_2
C5orf55,EXOC3-AS1
CCL3L3,CCL3L1
CGB,CGB8
CTGF,CCN2
CYR61,CCN1
DEFB103B,DEFB103A
DEFB4A,DEFB4B


In [11]:
approved <- c("AREG",
"PIK3CD-AS1",
"C4B_2",
"EXOC3-AS1",
"CCL3L1",
"CGB8",
"CCN2",
"CCN1",
"DEFB103A",
"DEFB4B",
"VEGFD",
"IFNA1",
"CXCL8",
"ANOS1",
"LRP1B",
"MELTF",
"AFDN",
"PLXNA1",
"YARS1",
"CCL12_Mouse",
"BEX3",
"HJV",
"PLPP6",
"ACKR1",
"ERG28",
"ADGRE5",
"ADGRE2",
"ADGRL4",
"PGAP6",
"NECTIN1",
"NECTIN2",
"NECTIN3",
"NECTIN4",
"CMKLR2",
"ADGRB2",
"ADGRL1",
"ADGRL2",
"ADGRG1",
"None")

In [12]:
difference$new=approved

In [13]:
replaced_gene_space <- replace(previous_gene_space, 
                               previous_gene_space %in% difference$previous_name, 
                               difference$new)

In [14]:
# lets plot the gene space again

In [15]:
tiff("2nd_gene_space.tiff", units="in", width=5, height=5, res=300)
Venn_plot <- draw.pairwise.venn(length(new_gene_space), 
    length(unique(replaced_gene_space)), 
    length(intersect(new_gene_space,
    replaced_gene_space)), 
    category = c("New Gene Space", "Previous Gene Space with Approved Names"), 
    lty = rep("blank", 2), fill = c("light blue", "red"), alpha = rep(0.4, 2), 
    cat.pos = c(0, 0), cat.dist = rep(0.025, 2), cex= 1.5, cat.cex=1.5, 
    verbose = FALSE)
grid.draw(Venn_plot)
dev.off()

In [16]:
setdiff(replaced_gene_space, new_gene_space)

In [17]:
# Comparing Pairs

In [18]:
# Fix the Ligand/Receptor in the previous version of the database by replacing the non-approved gene symbols
# with their approved counterparts

# Loop through the rows of the difference data to replace the previous names with their approved counterparts.
for (i in 1:nrow(difference)) {
  # Check if the Ligand or Receptor values in previous_db are present in the previous value in difference
  ligand_match <- previous_db$Ligand %in% difference$previous_name[i]
  receptor_match <- previous_db$Receptor %in% difference$previous_name[i]
  
  # Update the Ligand and Receptor values in previous_db with the new value in difference if there is a match
  previous_db$Ligand[ligand_match] <- difference$new[i]
  previous_db$Receptor[receptor_match] <- difference$new[i]
}

In [19]:
"AREG" %in% previous_db$Ligand & !"AREGB" %in% previous_db$Ligand

In [20]:
# fix the edges with the approved gene symbols in the previous db
previous_db$Pair.Name <- paste(previous_db$Ligand, previous_db$Receptor, sep = "_")

In [21]:
#Finally plot'em
tiff("3_pairs.tiff", units="in", width=5, height=5, res=300)
Venn_plot <- draw.pairwise.venn(length(unique(previous_db$Pair.Name)), 
    length(complete$Pair.Name), 
    length(intersect(previous_db$Pair.Name,
    complete$Pair.Name)), 
    category = c("Previous DB Pair", "New DB Pair"), 
    lty = rep("blank", 2), fill = c("light blue", "red"), alpha = rep(0.4, 2), 
    cat.pos = c(0, 0), cat.dist = rep(0.025, 2), cex= 1.5, cat.cex=1.5, 
    verbose = FALSE)
grid.draw(Venn_plot)
dev.off()

In [22]:
setdiff(previous_db$Pair.Name, complete$Pair.Name)