In [23]:
library(OmnipathR)
# library(nichenetr)
library(tidyverse)
library(dplyr)
library(VennDiagram)
library(ggplot2)
library(utils)

In [None]:
lr <- import_ligrecextra_interactions()
lr <- lr %>% filter(!duplicated(lr[, c("source_genesymbol", "target_genesymbol")]))

In [131]:
curated <- curated_ligand_receptor_interactions()
curated <- curated %>% filter(!duplicated(curated[, c("source_genesymbol", "target_genesymbol")]))

In [None]:
lr$pair <- paste(lr$source_genesymbol, lr$target_genesymbol, sep="_")

In [132]:
curated$pair <- paste(curated$source_genesymbol, curated$target_genesymbol, sep="_")

In [25]:
omni <- read.csv("L_R_OmniPathFull.csv")

In [None]:
length(intersect(lr$pair, curated$pair))

In [None]:
length(setdiff(curated$pair,lr$pair))

In [None]:
setdiff(curated$pair,lr$pair)

In [None]:
filter(curated, pair=="EPOR_EPO")

In [None]:
filter(curated, source_genesymbol=="EPOR")

In [None]:
length(intersect(omni$Pair.Name, curated$pair))

In [None]:
length(setdiff(curated$pair,omni$Pair.Name))

*************************

In [26]:
anno_raw <- import_omnipath_intercell()
#subset annotation DB to only ligand and receptors
anno_lig <- anno_raw %>%
    dplyr::filter(category %in% c("receptor","ligand"))
# Drop rows where the values in the "parent", "database", and "uniprot" columns are duplicated
anno_raw <- anno_raw %>% filter(!duplicated(anno_raw[, c("parent", "database", "uniprot")]))

In [27]:
#filter only those are in complex
complex <- filter(curated, grepl('COMPLEX', target) | grepl('COMPLEX',source))
complex$source <- sub("COMPLEX:", "", complex$source)
complex$target <- sub("COMPLEX:", "", complex$target)

In [28]:
#complexes are seperated into individual components
components_target <- unique(unlist(strsplit(complex$source_genesymbol,"_")))
components_source <- unique(unlist(strsplit(complex$target_genesymbol,"_")))
components_both <- c(components_target,components_source)
components_both <- unique(components_both)

In [29]:
# Produce all the possbile pairwise pairs

results <- list()

# Loop through each row of the data frame
for (i in 1:nrow(complex)) {
  # Extract the values from the first column
  values1 <- unlist(strsplit(as.character(complex[i, "source_genesymbol"]), "_"))
  # Extract the values from the second column
  values2 <- unlist(strsplit(as.character(complex[i, "target_genesymbol"]), "_"))
  # Keep the original pair
  original <- paste(complex[i, "source_genesymbol"],complex[i, "target_genesymbol"],sep="_")
  # Generate all the pairwise combinations using combn
  pairs <- combn(c(values1, values2), 2)
  pairs <- t(pairs)
  pairs <- cbind(pairs,original)
  # Append the results to the list
  results[[i]] <- as.data.frame(pairs)
    colnames(results[[i]]) = c("source","target","complex_pair")
    row.names(results[[i]]) <- NULL
}

# Bind the results into a single data frame
result_df2 <- as.data.frame(do.call(rbind, results))

# Switch the values in the "col1" and "col2" columns
df1 <- cbind(result_df2[,2], result_df2[,1], result_df2[,3])
colnames(df1) <- names(result_df2)
# Bind the rows into a single data frame
result_df <- rbind(result_df2, df1)


# Drop the self links
result_df <- result_df %>% filter(!duplicated(result_df[, c("source", "target")]))

# View the resulting data frame
str(result_df)

'data.frame':	2185 obs. of  3 variables:
 $ source      : chr  "IL17A" "IL17A" "IL17RA" "ITGAL" ...
 $ target      : chr  "IL17RA" "IL17RC" "IL17RC" "ITGB2" ...
 $ complex_pair: chr  "IL17A_IL17RA_IL17RC" "IL17A_IL17RA_IL17RC" "IL17A_IL17RA_IL17RC" "ITGAL_ITGB2_ICAM1" ...


In [30]:
#create a df to store annotation
df <- data.frame(genesymbol = character(length(components_both)), score = numeric(length(components_both)), parent = character(length(components_both)), stringsAsFactors = FALSE)

In [31]:
# Check if the components are categorized as ligands or receptors

for (x in 1:length(components_both)) {
#     maxvalue=max(filter(anno, uniprot==components[x])$consensus_score)
    genename <- components_both[x]
    parent_score <- sort(table(filter(anno_lig, genesymbol==components_both[x])$parent), decreasing = T, na.last = T)[1]
    parent_category <- names(parent_score)
    
    if (is.null(parent_category)) {
      parent_category <- "NA"
      parent_score <- 0
    }
    
    df[x, "genesymbol"] <- genename
    df[x, "score"] <- parent_score
    df[x, "parent"] <- parent_category

#     df$genesymbol[x] <- genename
#     df$score[x] <- parent_score
#     df$parent[x] <- parent_category
}

table(df$parent)


  ligand       NA receptor 
     192       32      154 

In [32]:
# If a component is not classified as a ligand or receptor, we may consider other categories such as 
# extracellular matrix, secreted, and transmembrane.# annotated others such as secreted, ecm etc

df_na <- filter(df, parent=="NA")$genesymbol

for (x in 1:length(df_na)) {
#     maxvalue=max(filter(anno, uniprot==components[x])$consensus_score)
    genesymbol <- df_na[x]
    parent_score <- sort(table(filter(anno_raw, genesymbol==df_na[x])$parent), decreasing = T, na.last = T)[1]
    parent_category <- names(parent_score)

    df <- df %>% mutate(parent = ifelse(genesymbol == df_na[x], parent_category, parent))
    df <- df %>% mutate(score = ifelse(genesymbol == df_na[x], parent_score, score))

}

table(df$parent)


          ecm        ligand      receptor      secreted transmembrane 
           22           192           154             7             3 

In [33]:
# categorize ecm/secreted as ligand
df$parent <- replace(df$parent, df$parent == "ecm", "ligand")
df$parent <- replace(df$parent, df$parent == "secreted", "ligand")

In [34]:
# Import All post-translational interactions
pt <- import_post_translational_interactions()

In [35]:
# "Separate the annotated components of complexes based on their type."
ligands <- filter(df, parent=="ligand")
receptors <- filter(df, parent=="receptor")

In [36]:
# Filter the PT network to include only the components of the complexes
pt <- pt %>%
    dplyr::filter(source_genesymbol %in% ligands$genesymbol) %>%
    dplyr::filter(target_genesymbol %in% receptors$genesymbol) %>%
    dplyr::distinct()

In [37]:
# remove duplicated
pt <- pt %>% filter(!duplicated(pt[, c("source_genesymbol", "target_genesymbol")]))

In [38]:
# create the pairs
pt$pair=paste(pt$source_genesymbol, pt$target_genesymbol,sep="_")
result_df$pair=paste(result_df$source, result_df$target,sep="_")

In [39]:
# The data frame result_df consists of all the pairwise pair combinations, 
# and we are checking if those pairs exist in the PT network
pt_edges <- result_df %>%
    filter(pair %in% pt$pair)

str(pt_edges)

'data.frame':	662 obs. of  4 variables:
 $ source      : chr  "IL17A" "IL17A" "IFNW1" "IFNW1" ...
 $ target      : chr  "IL17RA" "IL17RC" "IFNAR1" "IFNAR2" ...
 $ complex_pair: chr  "IL17A_IL17RA_IL17RC" "IL17A_IL17RA_IL17RC" "IFNW1_IFNAR1_IFNAR2" "IFNW1_IFNAR1_IFNAR2" ...
 $ pair        : chr  "IL17A_IL17RA" "IL17A_IL17RC" "IFNW1_IFNAR1" "IFNW1_IFNAR2" ...


In [40]:
single_components = filter(curated, !grepl('COMPLEX', target) & !grepl('COMPLEX',source))

In [41]:
single_components <- single_components %>%
  select(source_genesymbol, target_genesymbol) %>%
  rename(source=source_genesymbol, target=target_genesymbol) %>%
  mutate(complex_pair = NA)

In [109]:
single_components$pair <- paste(single_components$source, single_components$target, sep="_")

In [110]:
#merge the single ones, with complexes componenets that are detected via PT_DB
complete <- rbind(single_components, pt_edges)

In [111]:
#remove the duplicated ones, and drop the last ones, which are coming from the complexes
complete <- complete[ !duplicated(complete[, "pair"], fromLast=F),]

In [112]:
complex$pair <- paste(complex$source_genesymbol, complex$target_genesymbol, sep="_")

In [113]:
length(setdiff(complex$pair, pt_edges$complex_pair))

In [114]:
length(setdiff(complex$pair, pt_edges$complex_pair))

In [122]:
length(intersect(complete$pair, omni$Pair.Name))

In [125]:
length(intersect(complete$complex_pair, omni$complex_pair))

In [126]:
a=intersect(complete$pair, omni$Pair.Name)

In [127]:
b=intersect(complete$complex_pair, omni$complex_pair)

In [None]:
length(intersect(complete$pair, omni$Pair.Name))

In [133]:
length(intersect(curated$pair, omni$Pair.Name))

In [134]:
length(intersect(curated$pair, omni$complex_pair))