In [1]:
library(igraph)
library(dplyr)
library(tibble)


Attaching package: ‘igraph’


The following objects are masked from ‘package:stats’:

    decompose, spectrum


The following object is masked from ‘package:base’:

    union



Attaching package: ‘dplyr’


The following objects are masked from ‘package:igraph’:

    as_data_frame, groups, union


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘tibble’


The following object is masked from ‘package:igraph’:

    as_data_frame




In [2]:
.libPaths()

# 1. CF network

## 1.1 Loading the CF network

In [3]:
CF_PPI_network.pruned.interactions <- 
  read.table(file = "../CFnetwork/data/kegg_diff_pathways_network/diff_kegg_pathways_with_CFTR_interactors_PPI_direct_pruned_interactions_df.txt",
             sep = "\t",
             header = T,
             check.names = F)

CF_PPI_network.pruned.nodes <- 
  read.table(file = "../CFnetwork/data/kegg_diff_pathways_network/diff_kegg_pathways_with_CFTR_interactors_PPI_direct_pruned_nodes_df.txt",
             sep = "\t",
             header = T,
             check.names = F)

## 1.2 As an igraph object

In [4]:
source("../CFnetwork/scripts/pathways_to_network/network_utils.R")

In [5]:
CF_PPI_network.pruned <- new("PPI_network",
                             interactions=CF_PPI_network.pruned.interactions,
                             nodes=CF_PPI_network.pruned.nodes)
CF_PPI_network.pruned.with_CFTR <- CF_PPI_network.pruned

In [6]:
dim(CF_PPI_network.pruned.with_CFTR@interactions)

In [7]:
dim(CF_PPI_network.pruned@nodes)

## 1.3 Remove CFTR from the network and its indirect interactors

In [8]:
# CFTR_indirect_interactors <- CF_PPI_network.pruned.nodes[which(is.na(CF_PPI_network.pruned.nodes$sum)),
#                                                          "Symbol"]

# CF_PPI_network.pruned@nodes <- CF_PPI_network.pruned@nodes[which(!CF_PPI_network.pruned@nodes$Symbol %in% c("CFTR", CFTR_indirect_interactors)),]
# CF_PPI_network.pruned@interactions <- CF_PPI_network.pruned@interactions[which(!CF_PPI_network.pruned@interactions$genesymbol_source %in% c("CFTR", CFTR_indirect_interactors) &
#                                                                                  !CF_PPI_network.pruned@interactions$genesymbol_target %in% c("CFTR", CFTR_indirect_interactors)),]

In [9]:
CF_PPI_network.pruned@nodes <- CF_PPI_network.pruned@nodes[which(CF_PPI_network.pruned@nodes$Symbol!="CFTR"),]
CF_PPI_network.pruned@interactions <- CF_PPI_network.pruned@interactions[which(CF_PPI_network.pruned@interactions$genesymbol_source!="CFTR" &
                                                                                 CF_PPI_network.pruned@interactions$genesymbol_target!="CFTR"),]

In [10]:
dim(CF_PPI_network.pruned@nodes)

In [11]:
dim(CF_PPI_network.pruned@interactions)

## 1.4 Binding interactions into two directed interactions in both directions

In [12]:
# Pb of binding interactions
## Non binding
CF_PPI_network.pruned.interactions.non_binding <- 
  CF_PPI_network.pruned@interactions[which(!(CF_PPI_network.pruned@interactions$effect %in% c("binding/association"))),]
CF_PPI_network.pruned.interactions.non_binding <- 
  CF_PPI_network.pruned.interactions.non_binding[,c("genesymbol_source",
                                                           "genesymbol_target")]
colnames(CF_PPI_network.pruned.interactions.non_binding) <- c("from", 
                                                                     "to")

# ## Dissociation
CF_PPI_network.pruned.interactions.binding <- 
  CF_PPI_network.pruned@interactions[which(CF_PPI_network.pruned@interactions$effect %in% c("binding/association")),]
# Both directions for binding interactions
## one direction
CF_PPI_network.pruned.interactions.binding.one_direction <- 
  CF_PPI_network.pruned.interactions.binding[,c("genesymbol_source",
                                                       "genesymbol_target")]
colnames(CF_PPI_network.pruned.interactions.binding.one_direction) <- c("from", 
                                                                               "to")
## other direction
CF_PPI_network.pruned.interactions.binding.other_direction <- 
  CF_PPI_network.pruned.interactions.binding[,c("genesymbol_target", 
                                                       "genesymbol_source")]
colnames(CF_PPI_network.pruned.interactions.binding.other_direction) <- c("from", 
                                                                                 "to")
## both directions
CF_PPI_network.pruned.interactions.binding.both_directions <- 
  rbind(CF_PPI_network.pruned.interactions.binding.one_direction,
        CF_PPI_network.pruned.interactions.binding.other_direction)
CF_PPI_network.pruned.for_igraph <- rbind(CF_PPI_network.pruned.interactions.non_binding,
                                       CF_PPI_network.pruned.interactions.binding.both_directions)

In [13]:
CF_PPI_network.pruned.igraph <- graph_from_data_frame(CF_PPI_network.pruned.for_igraph, 
                                                   directed=TRUE)

## 1.5 Interesting nodes

### 1.5.1 Sink nodes

In [14]:
sink_nodes.final.pathways.df <- read.table(file = "../CFnetwork/data/sink_nodes/CFnetwork_sink_nodes_to_pathways.txt",
                                          sep = "\t",
                                          header = T,
                                          na.strings = "",
                                          check.names = FALSE)
sink_nodes.final.pathways.df <- sink_nodes.final.pathways.df[order(sink_nodes.final.pathways.df$Endpoint_cat),]
sink_nodes.final.pathways.df$Symbol <- as.character(sink_nodes.final.pathways.df$Symbol)
sink_nodes <- as.character(sink_nodes.final.pathways.df$Symbol)

### 1.5.2 Source nodes

In [15]:
source_nodes <- c("TRADD",
                  "PRKACA",
                  "SYK",
                  "CSNK2A1",
                  "SRC",
                  "PLCB1",
                  "PLCB3",
                  "EZR")

### 1.5.3 CFTR interactors

In [16]:
CFTR_interactors = CF_PPI_network.pruned.nodes[which(CF_PPI_network.pruned.nodes$CFTR_interactor),"Symbol"]

In [17]:
CFTR_interactors %in% CF_PPI_network.pruned@nodes$Symbol

# 2. Differentially methylated CpG sites between CF and NCF dataframe

In [18]:
library(tidyverse)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mlubridate[39m::[32m%--%()[39m       masks [34migraph[39m::%--%()
[31m✖[39m [34mtibble[39m::[32mas_data_frame()[39m masks [34mdplyr[39m::as_data_frame(), [34migraph[39m::as_data_frame()
[31m✖[39m [34mpurrr[39m::[32mcompose()[39m        masks [34migraph[39m::compose()
[31m✖[39m [34mtidyr[39m::[32mcrossing()[39m       masks [34migraph[39m::crossing()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m         masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m       

In [19]:
dm_CF_NCF_sup_table3_1267CpG <- 
  read.table(file = "../CFMethylomeSysbio/dm_CpG_CF_NCF_Magalhaes_2018_sup_table_3.tsv",
             skip = 1,
             quote = "\"",
             sep = "\t",
             header = T,
             check.names = F,
            nrows = 1267,
            na.strings = "-")

dm_CF_NCF_sup_table3_1267CpG <- 
  dm_CF_NCF_sup_table3_1267CpG[order(dm_CF_NCF_sup_table3_1267CpG$`p-value`),]
dm_CF_NCF_sup_table3_1267CpG$CpG_pvalue_rank_ <- 1:dim(dm_CF_NCF_sup_table3_1267CpG)[1]

In [20]:
colnames(dm_CF_NCF_sup_table3_1267CpG)[4:5] <- c("Median_CF_patients","Median_Controls")

In [21]:
dm_CF_NCF_sup_table3_1267CpG <- dm_CF_NCF_sup_table3_1267CpG %>%
    mutate(Median_CF_patients = as.numeric(gsub(pattern = ",", ".", Median_CF_patients, fixed = TRUE))) %>%
    mutate(Median_Controls = as.numeric(gsub(pattern = ",", ".", Median_Controls, fixed = TRUE)))

In [22]:
sapply(dm_CF_NCF_sup_table3_1267CpG, class)

In [23]:
dm_CF_NCF_sup_table3_1267CpG$abs_change <- abs(dm_CF_NCF_sup_table3_1267CpG$Median_CF_patients - dm_CF_NCF_sup_table3_1267CpG$Median_Controls)

In [61]:
mean(dm_CF_NCF_sup_table3_1267CpG$abs_change)

In [24]:
dm_CF_NCF_sup_table3_1267CpG <- 
  dm_CF_NCF_sup_table3_1267CpG[order(dm_CF_NCF_sup_table3_1267CpG$abs_change, decreasing = T),]
dm_CF_NCF_sup_table3_1267CpG$CpG_beta_value_rank_ <- 1:dim(dm_CF_NCF_sup_table3_1267CpG)[1]

In [25]:
dm_CF_NCF_sup_table3_1267CpG <- 
  dm_CF_NCF_sup_table3_1267CpG[order(dm_CF_NCF_sup_table3_1267CpG$`p-value`),]

## 2.1 Keeping only CpG sites in the body of the genes

In [26]:
dm_CF_NCF_sup_table3_1267CpG_onlyGenes <- 
  dm_CF_NCF_sup_table3_1267CpG[which(dm_CF_NCF_sup_table3_1267CpG$`Genomic location`=="Body"),]

dm_CF_NCF_sup_table3_1267CpG_onlyGenes$Gene_rank <- 1:dim(dm_CF_NCF_sup_table3_1267CpG_onlyGenes)[1]

## 2.2 Searching for dm genes in the CF network

In [27]:
dm_CF_NCF_sup_table3_1267CpG_Genes.list <- unique(dm_CF_NCF_sup_table3_1267CpG_onlyGenes$Gene)

dm_CF_NCF_Genes_in_CF_network.list <- 
  dm_CF_NCF_sup_table3_1267CpG_Genes.list[which(dm_CF_NCF_sup_table3_1267CpG_Genes.list %in% CF_PPI_network.pruned.nodes$Symbol)]

dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df <-
  dm_CF_NCF_sup_table3_1267CpG[which(dm_CF_NCF_sup_table3_1267CpG$Gene %in% dm_CF_NCF_Genes_in_CF_network.list),]

In [28]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Unnamed: 0_level_0,Probe ID,Gene,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,CpG_beta_value_rank_
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,<int>
647,cg13433729,EZR,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,702
104,cg01899581,DIAPH3,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,184
552,cg11321921,NFATC1,0.00055,0.78,0.91,Body,Island,False,240,0.13,562
306,cg05687091,NFATC1,0.00061,0.54,0.64,Body,Island,False,257,0.1,1196
1086,cg23752651,TNFRSF1A,0.00095,0.58,0.7,Body,Open sea,False,374,0.12,752
1007,cg22240998,ITPR1,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,1205
891,cg18984715,CBLB,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,762
234,cg04221461,AKT3,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,1158
968,cg21070009,MAP2K1,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,626
400,cg07671055,PLCG2,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,1082


## 2.3 Network analysis

In [29]:
CF_PPI_network.pruned.dists <- distances(CF_PPI_network.pruned.igraph, mode = "out")

### 2.3.1 Distance to sink nodes

In [30]:
### DM gene to sink nodes
CF_PPI_network.pruned.dm_to_sink_nodes <- CF_PPI_network.pruned.dists[dm_CF_NCF_Genes_in_CF_network.list, sink_nodes]


In [31]:
CF_PPI_network.pruned.dm_to_sink_nodes

Unnamed: 0,CASP1,CASP3,CASP7,CYBA,CYBB,DNM1L,GABARAP,ACTN4,ARPC5,CFL1,⋯,IRF5,IRF7,IRF9,JUN,NFATC1,NFKB1,NFKB2,RELA,RELB,STAT1
EZR,inf,inf,inf,inf,inf,inf,inf,7.0,inf,7.0,⋯,inf,inf,inf,5.0,inf,4.0,5.0,3.0,4.0,inf
DIAPH3,inf,inf,inf,inf,inf,inf,inf,inf,3.0,inf,⋯,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf
NFATC1,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,⋯,inf,inf,inf,inf,0.0,inf,inf,inf,inf,inf
TNFRSF1A,4.0,3.0,4.0,5.0,6.0,4.0,9.0,6.0,7.0,7.0,⋯,4.0,4.0,5.0,5.0,5.0,4.0,4.0,5.0,5.0,4.0
ITPR1,3.0,4.0,4.0,10.0,11.0,7.0,6.0,10.0,12.0,10.0,⋯,5.0,5.0,8.0,6.0,3.0,4.0,7.0,5.0,6.0,7.0
CBLB,6.0,5.0,6.0,5.0,6.0,6.0,9.0,6.0,7.0,6.0,⋯,5.0,5.0,5.0,3.0,4.0,4.0,5.0,5.0,6.0,4.0
AKT3,7.0,7.0,7.0,9.0,10.0,8.0,9.0,6.0,11.0,6.0,⋯,6.0,2.0,6.0,4.0,2.0,3.0,2.0,4.0,3.0,5.0
MAP2K1,inf,inf,inf,inf,inf,inf,inf,4.0,inf,4.0,⋯,inf,inf,inf,2.0,inf,8.0,9.0,7.0,8.0,inf
PLCG2,4.0,5.0,5.0,6.0,7.0,6.0,7.0,5.0,8.0,5.0,⋯,6.0,5.0,6.0,3.0,2.0,4.0,5.0,5.0,6.0,5.0
BAIAP2,inf,inf,inf,inf,inf,inf,inf,inf,2.0,inf,⋯,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf


In [32]:
sink_nodes_downstream_to_dm <- data.frame(apply(X = CF_PPI_network.pruned.dm_to_sink_nodes,
      MARGIN = 1,
      FUN=function(x){
        return(sum(!is.infinite(x)))
      }))
colnames(sink_nodes_downstream_to_dm) <- "dist_to_CF_network_sink_nodes"
sink_nodes_downstream_to_dm['Gene'] <- rownames(sink_nodes_downstream_to_dm)
rownames(sink_nodes_downstream_to_dm) <- NULL

In [33]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df <- merge(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df,
                                                    sink_nodes_downstream_to_dm,
                                                    by = "Gene")

In [34]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Gene,Probe ID,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,CpG_beta_value_rank_,dist_to_CF_network_sink_nodes
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,<int>,<int>
AKT3,cg04221461,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,1158,35
BAIAP2,cg02026049,0.0079,0.65,0.76,Body,N_Shelf,False,1123,0.11,1108,2
CBLB,cg18984715,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,762,35
DIAPH3,cg01899581,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,184,2
EZR,cg13433729,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,702,15
ITPR1,cg22240998,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,1205,35
MAP2K1,cg21070009,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,626,15
NFATC1,cg11321921,0.00055,0.78,0.91,Body,Island,False,240,0.13,562,1
NFATC1,cg05687091,0.00061,0.54,0.64,Body,Island,False,257,0.1,1196,1
PLCG2,cg07671055,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,1082,35


### 2.3.2 Distance to CFTR interactors

In [35]:
### CFTR interactors to diff methylated
CF_PPI_network.pruned.dm_from_CFTR_interactors <- data.frame(t(CF_PPI_network.pruned.dists[CFTR_interactors, dm_CF_NCF_Genes_in_CF_network.list]))
colnames(CF_PPI_network.pruned.dm_from_CFTR_interactors) <- paste("from_",
                                                              colnames(CF_PPI_network.pruned.dm_from_CFTR_interactors),
                                                              sep = "")

In [36]:
CF_PPI_network.pruned.dm_from_CFTR_interactors

Unnamed: 0_level_0,from_ADORA2B,from_CAPN1,from_CAPNS1,from_CAV1,from_CAV2,from_CSNK2A1,from_PRKACA,from_PRKCE,from_SLC9A3R1,from_SLC9A3R2,from_SYK,from_TRADD
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
EZR,1.0,1.0,1.0,inf,inf,inf,6.0,1.0,inf,inf,6.0,7.0
DIAPH3,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,9.0,8.0
NFATC1,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,3.0,5.0
TNFRSF1A,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf
ITPR1,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,2.0,4.0
CBLB,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf
AKT3,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,5.0,3.0
MAP2K1,4.0,4.0,4.0,inf,inf,inf,2.0,4.0,inf,inf,4.0,3.0
PLCG2,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,1.0,3.0
BAIAP2,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,6.0,5.0


In [37]:
CF_PPI_network.pruned.dm_from_CFTR_interactors['Gene'] <- rownames(CF_PPI_network.pruned.dm_from_CFTR_interactors)
rownames(CF_PPI_network.pruned.dm_from_CFTR_interactors) <- NULL
CF_PPI_network.pruned.dm_from_CFTR_interactors

from_ADORA2B,from_CAPN1,from_CAPNS1,from_CAV1,from_CAV2,from_CSNK2A1,from_PRKACA,from_PRKCE,from_SLC9A3R1,from_SLC9A3R2,from_SYK,from_TRADD,Gene
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1.0,1.0,1.0,inf,inf,inf,6.0,1.0,inf,inf,6.0,7.0,EZR
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,9.0,8.0,DIAPH3
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,3.0,5.0,NFATC1
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,TNFRSF1A
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,2.0,4.0,ITPR1
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,CBLB
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,5.0,3.0,AKT3
4.0,4.0,4.0,inf,inf,inf,2.0,4.0,inf,inf,4.0,3.0,MAP2K1
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,1.0,3.0,PLCG2
inf,inf,inf,inf,inf,inf,inf,inf,inf,inf,6.0,5.0,BAIAP2


In [38]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df <- merge(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df,
                                                    CF_PPI_network.pruned.dm_from_CFTR_interactors,
                                                    by = "Gene")
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Gene,Probe ID,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,⋯,from_CAPNS1,from_CAV1,from_CAV2,from_CSNK2A1,from_PRKACA,from_PRKCE,from_SLC9A3R1,from_SLC9A3R2,from_SYK,from_TRADD
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
AKT3,cg04221461,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,⋯,inf,inf,inf,inf,inf,inf,inf,inf,5.0,3.0
BAIAP2,cg02026049,0.0079,0.65,0.76,Body,N_Shelf,False,1123,0.11,⋯,inf,inf,inf,inf,inf,inf,inf,inf,6.0,5.0
CBLB,cg18984715,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,⋯,inf,inf,inf,inf,inf,inf,inf,inf,inf,inf
DIAPH3,cg01899581,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,⋯,inf,inf,inf,inf,inf,inf,inf,inf,9.0,8.0
EZR,cg13433729,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,⋯,1.0,inf,inf,inf,6.0,1.0,inf,inf,6.0,7.0
ITPR1,cg22240998,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,⋯,inf,inf,inf,inf,inf,inf,inf,inf,2.0,4.0
MAP2K1,cg21070009,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,⋯,4.0,inf,inf,inf,2.0,4.0,inf,inf,4.0,3.0
NFATC1,cg11321921,0.00055,0.78,0.91,Body,Island,False,240,0.13,⋯,inf,inf,inf,inf,inf,inf,inf,inf,3.0,5.0
NFATC1,cg05687091,0.00061,0.54,0.64,Body,Island,False,257,0.1,⋯,inf,inf,inf,inf,inf,inf,inf,inf,3.0,5.0
PLCG2,cg07671055,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,⋯,inf,inf,inf,inf,inf,inf,inf,inf,1.0,3.0


In [39]:
colnames(CF_PPI_network.pruned.dm_from_CFTR_interactors)

In [40]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df$min_distance_to_CFTR_interactor <- apply(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df[,colnames(CF_PPI_network.pruned.dm_from_CFTR_interactors)],
      1,
       FUN = function(x) {return(min(x, finite=T))})

In [41]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Gene,Probe ID,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,⋯,from_CAV1,from_CAV2,from_CSNK2A1,from_PRKACA,from_PRKCE,from_SLC9A3R1,from_SLC9A3R2,from_SYK,from_TRADD,min_distance_to_CFTR_interactor
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
AKT3,cg04221461,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,⋯,inf,inf,inf,inf,inf,inf,inf,5.0,3.0,3
BAIAP2,cg02026049,0.0079,0.65,0.76,Body,N_Shelf,False,1123,0.11,⋯,inf,inf,inf,inf,inf,inf,inf,6.0,5.0,5
CBLB,cg18984715,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,⋯,inf,inf,inf,inf,inf,inf,inf,inf,inf,1
DIAPH3,cg01899581,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,⋯,inf,inf,inf,inf,inf,inf,inf,9.0,8.0,8
EZR,cg13433729,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,⋯,inf,inf,inf,6.0,1.0,inf,inf,6.0,7.0,1
ITPR1,cg22240998,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,⋯,inf,inf,inf,inf,inf,inf,inf,2.0,4.0,2
MAP2K1,cg21070009,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,⋯,inf,inf,inf,2.0,4.0,inf,inf,4.0,3.0,2
NFATC1,cg11321921,0.00055,0.78,0.91,Body,Island,False,240,0.13,⋯,inf,inf,inf,inf,inf,inf,inf,3.0,5.0,3
NFATC1,cg05687091,0.00061,0.54,0.64,Body,Island,False,257,0.1,⋯,inf,inf,inf,inf,inf,inf,inf,3.0,5.0,3
PLCG2,cg07671055,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,⋯,inf,inf,inf,inf,inf,inf,inf,1.0,3.0,1


### 2.3.2 Distance to source nodes

In [44]:
source_nodes_not_CFTR_interactors <- source_nodes[!source_nodes %in% CFTR_interactors]

In [45]:
source_nodes_not_CFTR_interactors

In [46]:
### Source nodes to diff methylated
CF_PPI_network.pruned.dm_from_source_nodes <- data.frame(t(CF_PPI_network.pruned.dists[source_nodes_not_CFTR_interactors, dm_CF_NCF_Genes_in_CF_network.list]))
colnames(CF_PPI_network.pruned.dm_from_source_nodes) <- paste("from_",
                                                              colnames(CF_PPI_network.pruned.dm_from_source_nodes),
                                                              sep = "")

In [47]:
rownames(CF_PPI_network.pruned.dm_from_source_nodes)

In [48]:
CF_PPI_network.pruned.dm_from_source_nodes['Gene'] <- rownames(CF_PPI_network.pruned.dm_from_source_nodes)
rownames(CF_PPI_network.pruned.dm_from_source_nodes) <- NULL

In [49]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df <- merge(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df,
                                                    CF_PPI_network.pruned.dm_from_source_nodes,
                                                    by = "Gene")
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Gene,Probe ID,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,⋯,from_PRKCE,from_SLC9A3R1,from_SLC9A3R2,from_SYK,from_TRADD,min_distance_to_CFTR_interactor,from_SRC,from_PLCB1,from_PLCB3,from_EZR
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
AKT3,cg04221461,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,⋯,inf,inf,inf,5.0,3.0,3,2.0,7.0,7.0,inf
BAIAP2,cg02026049,0.0079,0.65,0.76,Body,N_Shelf,False,1123,0.11,⋯,inf,inf,inf,6.0,5.0,5,4.0,9.0,9.0,inf
CBLB,cg18984715,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,⋯,inf,inf,inf,inf,inf,1,inf,inf,inf,inf
DIAPH3,cg01899581,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,⋯,inf,inf,inf,9.0,8.0,8,7.0,12.0,12.0,inf
EZR,cg13433729,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,⋯,1.0,inf,inf,6.0,7.0,1,5.0,5.0,5.0,0.0
ITPR1,cg22240998,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,⋯,inf,inf,inf,2.0,4.0,2,10.0,1.0,1.0,inf
MAP2K1,cg21070009,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,⋯,4.0,inf,inf,4.0,3.0,2,2.0,5.0,5.0,3.0
NFATC1,cg11321921,0.00055,0.78,0.91,Body,Island,False,240,0.13,⋯,inf,inf,inf,3.0,5.0,3,4.0,4.0,4.0,inf
NFATC1,cg05687091,0.00061,0.54,0.64,Body,Island,False,257,0.1,⋯,inf,inf,inf,3.0,5.0,3,4.0,4.0,4.0,inf
PLCG2,cg07671055,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,⋯,inf,inf,inf,1.0,3.0,1,9.0,6.0,6.0,inf


In [50]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df$min_distance_to_source_nodes_2 <- apply(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df[,c(colnames(CF_PPI_network.pruned.dm_from_source_nodes),
                                                                                                                               "min_distance_to_CFTR_interactor")],
      1,
       FUN = function(x) {return(min(x, finite=T))})

In [51]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Gene,Probe ID,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,⋯,from_SLC9A3R1,from_SLC9A3R2,from_SYK,from_TRADD,min_distance_to_CFTR_interactor,from_SRC,from_PLCB1,from_PLCB3,from_EZR,min_distance_to_source_nodes_2
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
AKT3,cg04221461,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,⋯,inf,inf,5.0,3.0,3,2.0,7.0,7.0,inf,2
BAIAP2,cg02026049,0.0079,0.65,0.76,Body,N_Shelf,False,1123,0.11,⋯,inf,inf,6.0,5.0,5,4.0,9.0,9.0,inf,4
CBLB,cg18984715,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,⋯,inf,inf,inf,inf,1,inf,inf,inf,inf,1
DIAPH3,cg01899581,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,⋯,inf,inf,9.0,8.0,8,7.0,12.0,12.0,inf,7
EZR,cg13433729,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,⋯,inf,inf,6.0,7.0,1,5.0,5.0,5.0,0.0,0
ITPR1,cg22240998,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,⋯,inf,inf,2.0,4.0,2,10.0,1.0,1.0,inf,1
MAP2K1,cg21070009,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,⋯,inf,inf,4.0,3.0,2,2.0,5.0,5.0,3.0,2
NFATC1,cg11321921,0.00055,0.78,0.91,Body,Island,False,240,0.13,⋯,inf,inf,3.0,5.0,3,4.0,4.0,4.0,inf,3
NFATC1,cg05687091,0.00061,0.54,0.64,Body,Island,False,257,0.1,⋯,inf,inf,3.0,5.0,3,4.0,4.0,4.0,inf,3
PLCG2,cg07671055,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,⋯,inf,inf,1.0,3.0,1,9.0,6.0,6.0,inf,1


### 2.3.4 Betweenness centrality

In [52]:
CF_PPI_network.bc.df <- data.frame(betweenness(CF_PPI_network.pruned.igraph))
CF_PPI_network.bc.df$Symbol <- rownames(CF_PPI_network.bc.df)
rownames(CF_PPI_network.bc.df) <- NULL
colnames(CF_PPI_network.bc.df) <- c("BC.score", "Symbol")

In [53]:
CF_PPI_network.bc.df

BC.score,Symbol
<dbl>,<chr>
93.0000000,PLCB3
383.0000000,TLR4
8.0000000,WASL
744.0440476,MAP2K7
0.0000000,TNFSF11
663.0399625,IKBKB
0.0000000,PPP1R12A
1620.7172382,CHUK
0.0000000,TLR3
1042.2583333,CFLAR


In [54]:
CF_PPI_network.bc.df.dm_CF_NCF_df <- CF_PPI_network.bc.df[which(CF_PPI_network.bc.df$Symbol %in%  dm_CF_NCF_Genes_in_CF_network.list),]

In [55]:
CF_PPI_network.bc.df.dm_CF_NCF_df

Unnamed: 0_level_0,BC.score,Symbol
Unnamed: 0_level_1,<dbl>,<chr>
69,3295.1806,PLCG2
73,419.6667,TNFRSF1A
120,2361.4579,MAP2K1
137,0.0,CBLB
149,602.8583,ITPR1
214,15.0,DIAPH3
225,661.0,BAIAP2
227,2784.6197,AKT3
238,747.3944,EZR
322,0.0,NFATC1


In [56]:
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df <- merge(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df,
                                                    CF_PPI_network.bc.df.dm_CF_NCF_df,
                                                    by.x = "Gene",
                                                    by.y = "Symbol",
                                                   all.x = T)
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Gene,Probe ID,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,⋯,from_SLC9A3R2,from_SYK,from_TRADD,min_distance_to_CFTR_interactor,from_SRC,from_PLCB1,from_PLCB3,from_EZR,min_distance_to_source_nodes_2,BC.score
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
AKT3,cg04221461,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,⋯,inf,5.0,3.0,3,2.0,7.0,7.0,inf,2,2784.6197
BAIAP2,cg02026049,0.0079,0.65,0.76,Body,N_Shelf,False,1123,0.11,⋯,inf,6.0,5.0,5,4.0,9.0,9.0,inf,4,661.0
CBLB,cg18984715,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,⋯,inf,inf,inf,1,inf,inf,inf,inf,1,0.0
DIAPH3,cg01899581,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,⋯,inf,9.0,8.0,8,7.0,12.0,12.0,inf,7,15.0
EZR,cg13433729,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,⋯,inf,6.0,7.0,1,5.0,5.0,5.0,0.0,0,747.3944
ITPR1,cg22240998,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,⋯,inf,2.0,4.0,2,10.0,1.0,1.0,inf,1,602.8583
MAP2K1,cg21070009,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,⋯,inf,4.0,3.0,2,2.0,5.0,5.0,3.0,2,2361.4579
NFATC1,cg11321921,0.00055,0.78,0.91,Body,Island,False,240,0.13,⋯,inf,3.0,5.0,3,4.0,4.0,4.0,inf,3,0.0
NFATC1,cg05687091,0.00061,0.54,0.64,Body,Island,False,257,0.1,⋯,inf,3.0,5.0,3,4.0,4.0,4.0,inf,3,0.0
PLCG2,cg07671055,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,⋯,inf,1.0,3.0,1,9.0,6.0,6.0,inf,1,3295.1806


### 2.3.5 Pathways

In [57]:
dm_CF_NCF_Genes_in_CF_network.CF_network_df <- CF_PPI_network.pruned.nodes[which(CF_PPI_network.pruned.nodes$Symbol %in% dm_CF_NCF_Genes_in_CF_network.list),]

dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df <- merge(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df,
                                                    CF_PPI_network.pruned.nodes[,1:18],
                                                    by.x = "Gene",
                                                    by.y = "Symbol",
                                                   all.x = T)
dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df

Gene,Probe ID,p-value,Median_CF_patients,Median_Controls,Genomic location,Island status,*Enhancer,CpG_pvalue_rank_,abs_change,⋯,Osteoclast differentiation,Regulation of actin cytoskeleton,RIG-I-like receptor signaling pathway,T cell receptor signaling pathway,Th17 cell differentiation,TNF signaling pathway,Toll-like receptor signaling pathway,Viral protein interaction with cytokine and cytokine receptor,sum,pathway
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<int>,<dbl>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>
AKT3,cg04221461,0.0022,0.46,0.56,Body,Open sea,False,613,0.1,⋯,1,0,0,1,0,1,1,0,6,
BAIAP2,cg02026049,0.0079,0.65,0.76,Body,N_Shelf,False,1123,0.11,⋯,0,1,0,0,0,0,0,0,1,Regulation of actin cytoskeleton
CBLB,cg18984715,0.0012,0.79,0.67,Body,Open sea,True,441,0.12,⋯,0,0,0,1,0,0,0,0,2,
DIAPH3,cg01899581,9e-05,0.64,0.47,Body,N_Shelf,False,53,0.17,⋯,0,1,0,0,0,0,0,0,1,Regulation of actin cytoskeleton
EZR,cg13433729,1.2e-06,0.61,0.49,Body,Open sea,True,2,0.12,⋯,0,1,0,0,0,0,0,0,1,Regulation of actin cytoskeleton
ITPR1,cg22240998,0.0011,0.6,0.5,Body,Open sea,True,420,0.1,⋯,0,0,0,0,0,0,0,0,3,
MAP2K1,cg21070009,0.0041,0.48,0.35,Body,Open sea,True,852,0.13,⋯,1,1,0,1,0,1,1,0,6,
NFATC1,cg11321921,0.00055,0.78,0.91,Body,Island,False,240,0.13,⋯,1,0,0,1,1,0,0,0,4,
NFATC1,cg05687091,0.00061,0.54,0.64,Body,Island,False,257,0.1,⋯,1,0,0,1,1,0,0,0,4,
PLCG2,cg07671055,0.0056,0.48,0.59,Body,Open sea,False,952,0.11,⋯,1,0,0,0,0,0,0,0,2,


In [58]:
write.table(dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df,
            file = "../CFMethylomeSysbio/dm_CpG_CF_NCF_in_CF_network_analysis_20241009.tsv",
            sep = "\t",
            row.names = F,
            quote = FALSE)

## Adding a tag in the CF network

In [60]:
CF_PPI_network.pruned.nodes$diff_methylated <- CF_PPI_network.pruned.nodes$Symbol %in% dm_CF_NCF_Genes_in_CF_network.list



FALSE  TRUE 
  329    10 

In [62]:
colnames(CF_PPI_network.pruned.nodes)

# 3. Differentially methylated CpG sites between severe and mild dataframe

In [None]:
dm_severe_mild_sup_table4_189CpG <- 
  read.table(file = "../CFMethylomeSysbio/dm_CpG_CF_severe_mild_Magalhaes_2018_sup_table_4.tsv",
             skip = 1,
             quote = "\"",
             sep = "\t",
             header = T,
             check.names = F)

dm_severe_mild_sup_table4_189CpG <- 
  dm_severe_mild_sup_table4_189CpG[order(dm_severe_mild_sup_table4_189CpG$`p-value`),]
dm_severe_mild_sup_table4_189CpG$CpG_rank <- 1:dim(dm_severe_mild_sup_table4_189CpG)[1]

## 3.1 Keeping only CpG sites in the body of the genes

In [None]:
dm_severe_mild_sup_table4_189CpG_onlyGenes <- 
  dm_severe_mild_sup_table4_189CpG[which(dm_severe_mild_sup_table4_189CpG$`Genomic location`=="Body"),]

dm_severe_mild_sup_table4_189CpG_onlyGenes$Gene_rank <- 1:dim(dm_severe_mild_sup_table4_189CpG_onlyGenes)[1]

# 3.2 Searching for dm genes in the CF network

In [None]:
dm_severe_mild_sup_table4_189CpG_Genes.list <- unique(dm_severe_mild_sup_table4_189CpG_onlyGenes$Gene)

dm_CF_NCF_Genes_in_CF_network.list <- 
  dm_severe_mild_sup_table4_189CpG_Genes.list[which(dm_severe_mild_sup_table4_189CpG_Genes.list %in% CF_PPI_network.pruned.nodes$Symbol)]

dm_CF_NCF_Genes_in_CF_network.CF_network_df <- 
  CF_PPI_network.lcc.node_type.nodes[which(CF_PPI_network.pruned.nodes$Symbol %in% dm_CF_NCF_Genes_in_CF_network.list),]

dm_CF_NCF_Genes_in_CF_network.dm_CF_NCF_df <-
  dm_severe_mild_sup_table4_189CpG[which(dm_severe_mild_sup_table4_189CpG$Gene %in% dm_CF_NCF_Genes_in_CF_network.list),]

In [None]:
dm_CF_NCF_Genes_in_CF_network.list