# individPath

## prepare data

In [3]:
library(Biobase)
library(usethis)
library(GSVAdata)
library(hu6800.db)

#### Expression data with filtered entezID
##### get set_entrezID with matrix data
convert_probe_to_entrezID = function(matrix_file, bioc_package){
  #### matrix data
  matrixdata = read.csv(matrix_file, header = TRUE, row.names = 1)
  matrixdata = as.data.frame(matrixdata)
  matrixdata$probe_id = rownames(matrixdata)
  if (bioc_package == "hu6800.db") { probe_ENTREZID = toTable(hu6800ENTREZID)}
  newmatrixdata = merge(probe_ENTREZID, matrixdata, by = "probe_id")
  newmatrixdata$probe_id = NULL
  newmatrixdata = aggregate(newmatrixdata[,-1], list(newmatrixdata$gene_id), FUN = median)
  rownames(newmatrixdata) = newmatrixdata$Group.1
  newmatrixdata = newmatrixdata[,-1]
  newmatrixdata = as.matrix(newmatrixdata)
  newmatrixdata
}
##get matrix data with filtered entrezID
GSE1122_setentrez = convert_probe_to_entrezID("F:/lab_data/0. data/datasets/csv data/GSE1122_series_matrix.csv", "hu6800.db")

#### file data as example

In [5]:
setwd("F:/lab_related_R/individPath")
#tumorFile = "GSE1122_disease.txt"
#refFile = "GSE1122_control.txt"
PathwayFile = "file.txt"
ControlData <- as.matrix(GSE1122_setentrez[,1:5])
CaseData <- as.matrix(GSE1122_setentrez[,6:10])

### Load R function

> source("D:/individPath_1.0/individPath/R/individPath.r") <br>
> source("D:/individPath_1.0/individPath/R/individPathCal.r")  <br>
> source("D:/individPath_1.0/individPath/R/SRGgenePair.r") <br>

In [17]:
SampleInfo <- colnames(CaseData)[-1]
NumSample <- length(SampleInfo)
PathData <- read.table(PathwayFile, header=F,sep="\t",fill=T)
PathName <- as.matrix(PathData[,1])
OutPath <- paste("individPath_Result_", Sys.Date(),sep="")
dir.create(OutPath)

###---------Identifying Stable gene pair -------###
print("Identifying stable and reversal intra-pathway gene pairs")
cutoff <- 0.99
GP.result <- SRGgenePair(ControlData, CaseData, PathData, cutoff)
StableGP <- GP.result$BG.GenePairs
NumStable <- nrow(StableGP)
ReversalGP <- GP.result$ReversalStat
PathGP <- GP.result$PathGP

###---------Individualized altered pathway -------###
Result <- NULL
AD.result <- NULL
for( i in 1:NumSample){
  print(paste("individPath_processing ", i, "/", NumSample," : ", SampleInfo[i], sep=""))
  patient <- CaseData[,i+1]
  names(patient) <- CaseData[,1]
  
  Result.tmp <- individPathCal(patient, StableGP, ReversalGP, NumStable, PathGP)
  Result <- cbind(Result,Result.tmp)
  AD.p <- as.matrix(p.adjust(Result.tmp, "BH"))
  AD.result <- cbind(AD.result,AD.p)
}
Last.Result <- cbind( PathName, Result)
Last.ADResult <- cbind( PathName, AD.result)

colnames(Last.Result)[2:(NumSample+1)] <- SampleInfo
colnames(Last.Result)[1] <- "PathwayID"
colnames(Last.ADResult)[2:(NumSample+1)] <- SampleInfo
colnames(Last.ADResult)[1] <- "PathwayID"

[1] "Identifying stable and reversal intra-pathway gene pairs"
[1] "Processing 1/186 : KEGG_GLYCOLYSIS_GLUCONEOGENESIS"
[1] "Processing 2/186 : KEGG_CITRATE_CYCLE_TCA_CYCLE"
[1] "Processing 3/186 : KEGG_PENTOSE_PHOSPHATE_PATHWAY"
[1] "Processing 4/186 : KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS"
[1] "Processing 5/186 : KEGG_FRUCTOSE_AND_MANNOSE_METABOLISM"
[1] "Processing 6/186 : KEGG_GALACTOSE_METABOLISM"
[1] "Processing 7/186 : KEGG_ASCORBATE_AND_ALDARATE_METABOLISM"
[1] "Processing 8/186 : KEGG_FATTY_ACID_METABOLISM"
[1] "Processing 9/186 : KEGG_STEROID_BIOSYNTHESIS"
[1] "Processing 10/186 : KEGG_PRIMARY_BILE_ACID_BIOSYNTHESIS"
[1] "Processing 11/186 : KEGG_STEROID_HORMONE_BIOSYNTHESIS"
[1] "Processing 12/186 : KEGG_OXIDATIVE_PHOSPHORYLATION"
[1] "Processing 13/186 : KEGG_PURINE_METABOLISM"
[1] "Processing 14/186 : KEGG_PYRIMIDINE_METABOLISM"
[1] "Processing 15/186 : KEGG_ALANINE_ASPARTATE_AND_GLUTAMATE_METABOLISM"
[1] "Processing 16/186 : KEGG_GLYCINE_SERINE_AND_THREONINE_METAB

In [20]:
###export results
write.table(Last.Result, file=paste(OutPath,"/individPath_BH_result.txt",sep=""),
            row.names=FALSE, col.names=F, sep="\t",quote=FALSE)
write.table(Last.ADResult, file=paste(OutPath,"/individPath_pvalue_result.txt",sep=""),
            row.names=FALSE, col.names=F, sep="\t",quote=FALSE)

In [21]:
sessionInfo()

R version 3.5.3 (2019-03-11)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17763)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] individPath_1.0      hu6800.db_3.2.3      GSVAdata_1.18.0     
 [4] hgu95a.db_3.2.3      org.Hs.eg.db_3.6.0   GSEABase_1.42.0     
 [7] graph_1.58.2         annotate_1.58.0      XML_3.98-1.19       
[10] AnnotationDbi_1.42.1 IRanges_2.14.12      S4Vectors_0.18.3    
[13] usethis_1.5.0        Biobase_2.40.0       BiocGenerics_0.26.0 

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.1      pillar_1.3.1    compiler_3.5.3  base64enc_0.1-3
 [5] bitops_1.0-6    tools_3.5.3 