In [None]:
# from redmine (end of ticket): https://imbredmine.medizin.tu-dresden.de/redmine/issues/187

In [None]:
# for each of the 1-n networks: 
#  this will create the basic regNet folder structure
#  and train a network with the corresponding training data (expression and methylation files)
#  the results are split up into single parts for each network, which then are combined

In [38]:
myPath = "/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/"

In [2]:
library(parallel)

In [3]:
localRlibs = paste0(myPath,"conda/lib/R/library/")
library(regNet)

Loading required package: glmnet

Loading required package: Matrix

Loaded glmnet 4.1-6

Loading required package: lars

Loaded lars 1.3


Loading required package: covTest

Loading required package: glmpath

Loading required package: survival

Loading required package: MASS



In [28]:
# we train 25 networks in total
# here you set the range of which networks to train in the current run
startRun = 1
endRun = 2

In [13]:
dataSubPath = "Data/"
regNetPath = paste0(myPath, "regNet/")
output=T
setwd(regNetPath)
loadPath = paste0( regNetPath, dataSubPath )

### check if expression and methylation training data set files exist
if not, run scripts/create-training-test-data.r.ipynb

In [14]:
# get data file directions:
geneExprFiles = paste0("TrainSet_ExpressionData_regNet_Run_",startRun:endRun,".txt")
geneExprFiles
if(all(file.exists(paste0(dataSubPath,geneExprFiles)))) {
    cat("good: all gene expression training set files exist\n")
} else {
    cat("bad: not all gene expression training set files exist, please check:\n",
        paste0(dataSubPath,geneExprFiles,collapse = ", "),"\n")
    q("no")
}

good: all gene expression training set files exist


In [15]:
geneMethFiles = paste0("TrainSet_MethylationData_regNet_Run_",startRun:endRun,".txt")
geneMethFiles
if(all(file.exists(paste0(dataSubPath,geneMethFiles)))) {
    cat("good: all gene methylation training set files exist\n")
} else {
    cat("bad: not all gene methylation training set files exist, please check:\n",
        paste0(dataSubPath,geneMethFiles,collapse = ", "),"\n")
    q("no")
}

good: all gene methylation training set files exist


In [16]:
networkName = "TcgaMelanomaExprMeth"
# set low totalNumberOfJobs 
totalNumberOfJobs = 20 # number of regNetJobs
# count number of jobs/genes in input data:
nbParallelJobs = 10 #  how many jobs in parallel with mclapply
# when totalNumberOfJobs == nbParallelJobs, all regNet jobs are calculated at the same time
# if you have less CPUs and RAM, lower both totalNumberOfJobs and nbParallelJobs

In [26]:
data$loc[1:4]

### run training
this can take several hours, CPUs and GB of RAM

In [27]:
# start this with nohup and RScript, not in jupyter

starttTotal = Sys.time()

for( idx in 1:length(geneExprFiles)) {
    projectName = paste0("TrainNetwork-",(startRun:endRun)[idx])
    cat("project",projectName,"\n")
    cat("  ",geneExprFiles[idx],"\n")
    cat("  ",geneMethFiles[idx],"\n")
    
    startt = Sys.time()
    projectPath = createBasicFolderStructure( projectName = projectName, path = regNetPath, output = output )
    
    data = loadGeneExpressionAndCopyNumberDataSet( 
        geneExpressionFile = geneExprFiles[idx], 
        geneCopyNumberFile = geneMethFiles[idx], 
        path = loadPath )

    # start parallel calculation of this training set:
    tmp = mclapply(1:totalNumberOfJobs,mc.cores = nbParallelJobs, function(j) 
        learnNetwork_ParallelComputation( data = data, networkName = networkName, cores = totalNumberOfJobs, 
            job = j, path = projectPath, nfolds = 10, cvReplicates = 10, output = output ))

    #save.image(file = paste0("TrainNetwork-",(startRun:endRun)[j],"-image.RData"))
    endt = Sys.time()
    timeDiff = difftime(endt,startt,units = "min")
    message(timeDiff," needed")
}

endtTotal = Sys.time()
cat("all network trainings done. Needed.\n")
endtTotal - starttTotal

# to see the progress, check the subdirectories "TrainNetwork-*" that appear in the regNet directory

### combine single network parts to whole network for each TrainNetwork

In [40]:
myPath = paste0(myPath,"regNet/")
setwd(myPath)

In [42]:
# combine single network parts from network-training.r to whole network, for each of the 1-n networks

startt = Sys.time()
for(nwSubdir in paste0("TrainNetwork-",startRun:endRun)) {
    path = paste0(myPath, nwSubdir)
    message(path,"\n")
    combineSingleJobs(networkName = networkName, cores = totalNumberOfJobs, path = path, output = output)
}
endt = Sys.time()
endt-startt# 1.5 min for 2 Networks
# creates TcgaMelanomaExprMeth_NetworkCreator_CVStatistics.txt and TcgaMelanomaExprMeth_NetworkCreator.Rout
#  in the TrainNetwork-*/NetworkModel/WholeNetwork/  subdirectories

/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/regNet/TrainNetwork-1




[1] "Combine single jobs"
[1] "1 of 20"
[1] "2 of 20"
[1] "3 of 20"
[1] "4 of 20"
[1] "5 of 20"
[1] "6 of 20"
[1] "7 of 20"
[1] "8 of 20"
[1] "9 of 20"
[1] "10 of 20"
[1] "11 of 20"
[1] "12 of 20"
[1] "13 of 20"
[1] "14 of 20"
[1] "15 of 20"
[1] "16 of 20"
[1] "17 of 20"
[1] "18 of 20"
[1] "19 of 20"
[1] "20 of 20"
[1] "Save network:"
[1] "/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/regNet/TrainNetwork-1/NetworkModel/WholeNetwork/TcgaMelanomaExprMeth_NetworkCreator.Rout"
[1] "Save network statistics:"
[1] "/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/regNet/TrainNetwork-1/NetworkModel/WholeNetwork/TcgaMelanomaExprMeth_NetworkCreator_CVStatistics.txt"


/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/regNet/TrainNetwork-2




[1] "Combine single jobs"
[1] "1 of 20"
[1] "2 of 20"
[1] "3 of 20"
[1] "4 of 20"
[1] "5 of 20"
[1] "6 of 20"
[1] "7 of 20"
[1] "8 of 20"
[1] "9 of 20"
[1] "10 of 20"
[1] "11 of 20"
[1] "12 of 20"
[1] "13 of 20"
[1] "14 of 20"
[1] "15 of 20"
[1] "16 of 20"
[1] "17 of 20"
[1] "18 of 20"
[1] "19 of 20"
[1] "20 of 20"
[1] "Save network:"
[1] "/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/regNet/TrainNetwork-2/NetworkModel/WholeNetwork/TcgaMelanomaExprMeth_NetworkCreator.Rout"
[1] "Save network statistics:"
[1] "/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/regNet/TrainNetwork-2/NetworkModel/WholeNetwork/TcgaMelanomaExprMeth_NetworkCreator_CVStatistics.txt"


Time difference of 1.470975 mins