In [None]:
# calculates for each metastases pair 
#   average impacts log-ratios (brain/extra-cranial) on pathways
#   median impact log-ratios of all altered genes on all genes
#   average impacts log-ratios on each gene
#   
# 
# from the altered genes of all met. pairs
# as basis for other scripts
# saved for later: metPairs-impactRatios-onPathways.rds,
#   logMedianImpRatios, metPairs-impactRatios-onAllTargetGenes.rds

In [6]:
library(parallel)

In [47]:
basePath = "/data/bcu_projects/MelBrainSys_PostdocProject_Gruetzmann/publications/2022-my-MelBrainSys-paper/scripts-etc-for-publication/"
myPath = paste0(basePath,"regNet/")

In [48]:
setwd(basePath)

In [49]:
localRlibs = paste0(basePath,"conda/lib/R/library/")
library(regNet)

In [50]:
localGeneCutoff = 30
pValCutoff = 0.001
numOfNWs = 2
networkName = "TcgaMelanomaExprMeth"
dataSetName = "MelBrainSys"
nbCPUs = 10 
colSumsThreshold = 0.001

### pathway definitions

In [76]:
pwCategories = readRDS(file = "pathway-categories.rds")
pwCategories$signalingPWs$PPAR

In [54]:
genePerPathwayList = unlist(unname(pwCategories),recursive = F)

In [133]:
lapply(head(genePerPathwayList,3), head)

### altered genes

In [161]:
tmp = readRDS(file = "altered-genes-per-patient.rds")
regGenesPerPat = tmp$regGenesPerPat

### samples and sample pairs

In [None]:
# define met. pairs, and annotation colors 
tmp = readRDS(file = "annotation/samplePairs-annotation-colors-clusters.rds")
sampleMapping = tmp$sampleMapping

# impacts on pathways

### calculate mean log impact ratios

In [58]:
getRealImpacts = function(
    samplePair, sample, sourceGenes, targetGenes, pValCutoff, localGeneCutoff, colSumsThreshold, numOfNWs) {
    nwSubdirPrefix = 'TrainNetwork-'
    impacts = matrix(data = 0,nrow = length(sourceGenes),ncol = 0, dimnames = list(c(sourceGenes)))
    for(networkNum in 1:numOfNWs){
        path = paste0(myPath,nwSubdirPrefix,networkNum,"/")
        impactsTmp = getAverageImpacts_PatientSpecificAbsoluteImpacts(
            patient = sample,sourceGenes = sourceGenes,targetGenes = targetGenes, dataSetName = dataSetName, 
            networkName = networkName, pValCutoff = pValCutoff, localGeneCutoff = localGeneCutoff, 
            colSumsThreshold = colSumsThreshold, path = path, outputFile = "tmp")
        impacts = cbind(impacts,NA)
        impacts[impactsTmp$SourceGene,ncol(impacts)] = impactsTmp$AverageImpactOnTargetGenes
    }
    impacts
}

In [67]:
allSamplImp = NULL
nbTotalIt = length(sampleMapping)*2
currIt=0
startt = Sys.time()
for(samplePair in names(sampleMapping)) {
    message(samplePair)
    samplePairList = NULL
    for (sampleIdx in 1:2) {
        currIt = currIt + 1
        sample = sampleMapping[[samplePair]][sampleIdx]
        message("  sampleIdx ",sampleIdx, " ",sample)
        impPerPW = mclapply(X = genePerPathwayList,  mc.cores = nbCPUs,FUN = function(pwGenes) {
            getRealImpacts(samplePair = samplePair, sample = sample,
                                 sourceGenes = regGenesPerPat[[samplePair]], targetGenes = pwGenes, 
                                 pValCutoff = pValCutoff, localGeneCutoff = localGeneCutoff, 
                                 colSumsThreshold = colSumsThreshold, numOfNWs = numOfNWs)
        })
        samplePairList = append( samplePairList, list(impPerPW) )
        names(samplePairList)[sampleIdx] = sample

        endt = Sys.time()
        totalTime = difftime(endt,startt, units = "min")*nbTotalIt/currIt
        restTime = round(totalTime * (nbTotalIt-currIt)/nbTotalIt)
        message("    ",currIt," of ",nbTotalIt,", ",restTime," min still needed")
    }
    allSamplImp = append( allSamplImp, list(samplePairList) )
}
names(allSamplImp) = names(sampleMapping)
endt = Sys.time()
message(difftime(endt,startt,units = "min")," min in total needed")
# 15 min in total for 2 networks 10 metastases pairs with 10 CPUs 

P03_BLun

  sampleIdx 1 P03_Br

    1 of 22, 13 min still needed

  sampleIdx 2 P03_Lu

    2 of 22, 13 min still needed

P04_BSki_1

  sampleIdx 1 P04_Br

    3 of 22, 12 min still needed

  sampleIdx 2 P04_Sk_GA

    4 of 22, 11 min still needed

P08_BSof_1

  sampleIdx 1 P08_Br

    5 of 22, 11 min still needed

  sampleIdx 2 P08_St_GA

    6 of 22, 10 min still needed

P08_BSof_2

  sampleIdx 1 P08_Br

    7 of 22, 9 min still needed

  sampleIdx 2 P08_St_BA

    8 of 22, 9 min still needed

P08_BSof_3

  sampleIdx 1 P08_Br

    9 of 22, 8 min still needed

  sampleIdx 2 P08_St_YA

    10 of 22, 8 min still needed

P16_BLun

  sampleIdx 1 P16_Br

    11 of 22, 7 min still needed

  sampleIdx 2 P16_Lu

    12 of 22, 6 min still needed

P18_BLun_1

  sampleIdx 1 P18_Br

    13 of 22, 6 min still needed

  sampleIdx 2 P18_Lu_GA

    14 of 22, 5 min still needed

P18_BLun_2

  sampleIdx 1 P18_Br

    15 of 22, 4 min still needed

  sampleIdx 2 P18_Lu_YA

    16 of 22, 4 min still neede

In [69]:
# sample pair - sample mate - pathway - matrix of genes x networks: mean impact of gene on pathway in sample mate
head(allSamplImp[[1]][[1]][[2]])
class(allSamplImp[[1]][[1]][[37]])
dim(allSamplImp[[1]][[1]][[37]])
head(allSamplImp[[1]][[1]][[37]])

0,1,2
AIM2,1.507818e-09,1.265727e-05
ZNF677,,
CDCA7,2.73121e-07,4.232613e-06
ELOVL6,1.44349e-08,0.0
HIST1H1B,,
TTBK1,,


0,1,2
AIM2,2.032828e-10,2.314754e-07
ZNF677,,
CDCA7,7.579938e-09,0.001168118
ELOVL6,9.89243e-10,0.0
HIST1H1B,,
TTBK1,,


In [71]:
# transform values to 1 data.frame for later
# for each met pair and pathway
# 1 column for impacts in brain / non-brain, log-ratio

metPairImpacts = NULL
oldWarningOpt <- getOption("warn")
options(warn = -1)
for (samplePair in names(sampleMapping)) {
    message(samplePair)
    dataAllPw = NULL
    for (pw in names(genePerPathwayList)) {
        sample1 = sampleMapping[[samplePair]][[1]]
        sample2 = sampleMapping[[samplePair]][[2]]
        realImp1 = allSamplImp[[samplePair]][[sample1]][[pw]]
        realImp2 = allSamplImp[[samplePair]][[sample2]][[pw]]
        dataImp = data.frame(
            median_brain_impact = apply(realImp1,1,function(r) median(na.omit(r))),
            median_nonBrain_impact = apply(realImp2,1,function(r) median(na.omit(r))),
                        stringsAsFactors=F )
        dataImp$log2_median_impact_ratio = 
                        log2(dataImp$median_brain_impact/dataImp$median_nonBrain_impact)
        dataImp$log2_mean_impact_ratio = 
            log2(apply(realImp1,1,function(r) 
                mean(na.omit(r)))/apply(realImp2,1,function(r) mean(na.omit(r))))
        data1pw = data.frame(
            samplePair=samplePair, gene = rownames(allSamplImp[[samplePair]][[1]][[pw]]), pathway = pw, 
            dataImp, stringsAsFactors = F)
        metPairImpacts = rbind(metPairImpacts, data1pw)
    }
}
options(warn = oldWarningOpt)
#names(metPairImpacts) = names(sampleMapping)[1:length(metPairImpacts)]

P03_BLun

P04_BSki_1

P08_BSof_1

P08_BSof_2

P08_BSof_3

P16_BLun

P18_BLun_1

P18_BLun_2

P39_BLun

P42_BLym_1

P42_BLym_2



In [72]:
dim(metPairImpacts)
head(metPairImpacts)

Unnamed: 0_level_0,samplePair,gene,pathway,median_brain_impact,median_nonBrain_impact,log2_median_impact_ratio,log2_mean_impact_ratio
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
AIM2,P03_BLun,AIM2,PPAR,5.75574e-08,1.896864e-08,1.601385,1.601385
ZNF677,P03_BLun,ZNF677,PPAR,,,,
CDCA7,P03_BLun,CDCA7,PPAR,1.104414e-07,4.341709e-08,1.346946,1.346946
ELOVL6,P03_BLun,ELOVL6,PPAR,1.518836e-07,3.99042e-08,1.928353,1.928353
HIST1H1B,P03_BLun,HIST1H1B,PPAR,,,,
TTBK1,P03_BLun,TTBK1,PPAR,,,,


In [75]:
saveRDS(object = metPairImpacts, file = "metPairs-impactRatios-onPathways.rds")

# impacts on single genes

In [87]:
myFullDataSet = loadGeneExpressionAndCopyNumberDataSet(
    geneExpressionFile = "MelBrainSys-expression.csv", 
    geneCopyNumberFile = "MelBrainSys-methylation.csv", path = paste0(myPath,"Data/") )

In [88]:
allGenesNWs = myFullDataSet$genes
numGenes = length(allGenesNWs)
numGenes; head(allGenesNWs)

In [115]:
calc1sampleImpacts = function(paramSets=paramSets,DGEsDMPs=DGEsDMPs,nbCPUs=nbCPUs) {
    allImpacts1 = mclapply(X = paramSets, mc.cores = nbCPUs, FUN = function(params) {
        sample = params["sample"]
        nw = params["nw"]
        path = paste0(myPath,"TrainNetwork-",nw,"/")
        impacts = getImpacts_PatientSpecificAbsoluteImpacts(
            patient = sample, sourceGenes = DGEsDMPs, targetGenes = allGenesNWs, 
            dataSetName = dataSetName,networkName = networkName, 
            pValCutoff = pValCutoff, localGeneCutoff = localGeneCutoff, 
            colSumsThreshold = colSumsThreshold, path = path, 
            outputFile =  paste0("tmp-",nw,"-",sample), output=F )
        f = file(description = logFile,open = "a")
        writeLines(con = f,text = paste0("netw ",nw,", sample ",sample))
        close(f)
        rownames(impacts) = DGEsDMPs
        as.matrix(impacts[,-1])
    })
}

In [117]:
# extract for each sample pair, each sample, each one of the 25 NWs, DGEsDMPs impacts on all other genes:
logFile="log-getImpacts.txt";
if(file.exists(logFile)){file.remove(logFile)}
startt = Sys.time()
allImpacts = NULL
for( samplePair in names(sampleMapping)) {
    message(samplePair)
    samples = sampleMapping[[samplePair]]
    message("  ",samples[1])
    paramSets = lapply(samples[1], function(sample) lapply(1:numOfNWs, function(nw) c(sample=sample, nw=nw)))
    paramSets = unlist(paramSets,recursive = F)
    DGEsDMPs = regGenesPerPat[[samplePair]]
    DGEsDMPs = intersect(DGEsDMPs, allGenesNWs) # only DGEs that are in our data
    message("    ",length(DGEsDMPs), " DGEsDMPs")
    brainImpacts = calc1sampleImpacts(paramSets = paramSets, DGEsDMPs = DGEsDMPs,nbCPUs = nbCPUs)

    message("  ",samples[2])
    paramSets = lapply(samples[2], function(sample) lapply(1:numOfNWs, function(nw) c(sample=sample, nw=nw)))
    paramSets = unlist(paramSets,recursive = F)
    DGEsDMPs = regGenesPerPat[[samplePair]]
    DGEsDMPs = intersect(DGEsDMPs, allGenesNWs) # only DGEs that are in our data
    message("    ",length(DGEsDMPs), " DGEsDMPs")
    nonBrainImpacts = calc1sampleImpacts(paramSets = paramSets, DGEsDMPs = DGEsDMPs,nbCPUs = nbCPUs)
    tmp = list(brainImpacts, nonBrainImpacts)
    names(tmp) = samples
    allImpacts[[length(allImpacts)+1]] = tmp
}
endt = Sys.time()
round(difftime(endt, startt,units="min"))
names(allImpacts) = names(sampleMapping)
# ca 5 min for 2 networks and 10 met. pairs

P03_BLun

  P03_Br

    23 DGEsDMPs

  P03_Lu

    23 DGEsDMPs

P04_BSki_1

  P04_Br

    48 DGEsDMPs

  P04_Sk_GA

    48 DGEsDMPs

P08_BSof_1

  P08_Br

    43 DGEsDMPs

  P08_St_GA

    43 DGEsDMPs

P08_BSof_2

  P08_Br

    40 DGEsDMPs

  P08_St_BA

    40 DGEsDMPs

P08_BSof_3

  P08_Br

    69 DGEsDMPs

  P08_St_YA

    69 DGEsDMPs

P16_BLun

  P16_Br

    11 DGEsDMPs

  P16_Lu

    11 DGEsDMPs

P18_BLun_1

  P18_Br

    21 DGEsDMPs

  P18_Lu_GA

    21 DGEsDMPs

P18_BLun_2

  P18_Br

    30 DGEsDMPs

  P18_Lu_YA

    30 DGEsDMPs

P39_BLun

  P39_Br

    88 DGEsDMPs

  P39_Lu

    88 DGEsDMPs

P42_BLym_1

  P42_Br_GA

    279 DGEsDMPs

  P42_Ly_GA

    279 DGEsDMPs

P42_BLym_2

  P42_Br_GA

    49 DGEsDMPs

  P42_Ly_YA

    49 DGEsDMPs



Time difference of 4 mins

In [119]:
# calc. median impacts for each sample over all 25 networks
#  and from these calc. log median impact ratios for each sample pair
nbCPUs=8
if(file.exists(logFile)){file.remove(logFile)}
startt=Sys.time()
logMedianImpRatios = NULL

for(sp in names(allImpacts)) {
    message(sp)
    medianImpacts = NULL
    for( sampleName in names(allImpacts[[sp]])) {
        message("   ",sampleName)
        impacts = allImpacts[[sp]][[sampleName]]
        message("   ",length(impacts))
        numSourcGenes = nrow(impacts[[1]])
        #numSourcGenes
        arr = array(data = NA,dim = c(numOfNWs,numSourcGenes,numGenes))
        for(j in 1:length(impacts)) {
            arr[j,,]=impacts[[j]]
        }
        medianSampleImpacts = mclapply(1:numGenes,mc.cores = nbCPUs,function(c) {
            sapply(1:numSourcGenes, function(r)
                median(x=arr[,r,c], na.rm=T)
            )
        })
        medianSampleImpacts = do.call(cbind,medianSampleImpacts)
        rownames(medianSampleImpacts)=rownames(impacts[[1]])
        colnames(medianSampleImpacts)=allGenesNWs
        medianImpacts[[length(medianImpacts)+1]] = medianSampleImpacts
        f = file(description = logFile,open = "a")
        writeLines(con = f,text = paste0("sample ",sampleName))
        close(f)
    }
    names(medianImpacts) = names(allImpacts[[sp]])
    # calc. log median impact ratios 
    logMedianImpRatios[[length(logMedianImpRatios)+1]] = 
            log2(medianImpacts[[1]]/medianImpacts[[2]])
}
# 1-3 min
names(logMedianImpRatios) = names(allImpacts)
endt=Sys.time()
round(difftime(endt, startt,units="min"))

P03_BLun

   P03_Br

   2

   P03_Lu

   2

P04_BSki_1

   P04_Br

   2

   P04_Sk_GA

   2

P08_BSof_1

   P08_Br

   2

   P08_St_GA

   2

P08_BSof_2

   P08_Br

   2

   P08_St_BA

   2

P08_BSof_3

   P08_Br

   2

   P08_St_YA

   2

P16_BLun

   P16_Br

   2

   P16_Lu

   2

P18_BLun_1

   P18_Br

   2

   P18_Lu_GA

   2

P18_BLun_2

   P18_Br

   2

   P18_Lu_YA

   2

P39_BLun

   P39_Br

   2

   P39_Lu

   2

P42_BLym_1

   P42_Br_GA

   2

   P42_Ly_GA

   2

P42_BLym_2

   P42_Br_GA

   2

   P42_Ly_YA

   2



Time difference of 1 mins

In [127]:
logMedianImpRatios$P03_BLun[1:4,1:3]
# for each metastases pair source genes x target genes matrix with impact log ratios on these target genes

Unnamed: 0,NOC2L,KLHL17,HES4
AIM2,1.3741435,-0.7504903,1.9597567
CDCA7,1.0236865,2.3013262,2.3037679
ELOVL6,1.6849782,1.1693858,-0.6388257
IMPA1,0.6407736,1.3500918,1.6205505


In [155]:
# and now mean of log median imp ratio over source genes
meanLogMedianImpRatios=NULL
for(sp in names(logMedianImpRatios)) {
    meanLogMedianImpRatios[[length(meanLogMedianImpRatios)+1]] = 
        apply(logMedianImpRatios[[sp]], 2,function(m) mean(m,na.rm=T))
}
names(meanLogMedianImpRatios) = names(logMedianImpRatios)
names(meanLogMedianImpRatios)[1:3]
head(meanLogMedianImpRatios[[1]],4)

In [156]:
meanLogMedianImpRatiosMat = sapply(meanLogMedianImpRatios,function(v) v)
head(meanLogMedianImpRatiosMat)
dim(meanLogMedianImpRatiosMat)

Unnamed: 0,P03_BLun,P04_BSki_1,P08_BSof_1,P08_BSof_2,P08_BSof_3,P16_BLun,P18_BLun_1,P18_BLun_2,P39_BLun,P42_BLym_1,P42_BLym_2
NOC2L,-0.5131848,0.2623647,-0.8237658,-0.04970504,-0.3760514,0.197820589,-0.29186426,-0.6056519,-1.0009443,0.427153,-0.31009925
KLHL17,-0.375472,0.5195951,-0.8366981,-0.26829236,-0.3536398,-0.008449568,-0.90867937,-0.5893162,-1.1208155,0.3428841,-0.29978671
HES4,-1.0220673,0.3757407,-0.7503114,-0.014979,0.2059771,-1.191426658,-0.16027045,-0.816423,-1.9369733,-1.0808609,-0.5177954
ISG15,-0.4778416,0.2964649,-0.855836,0.30105722,0.0270101,0.398693258,0.0584264,-0.3556355,-0.8483902,0.3199234,-0.3710177
AGRN,-0.8571891,0.5567957,-0.9730449,0.11305796,-0.3439669,-0.111755277,-0.09810649,-0.4588938,-1.0630752,0.6730077,-0.09384669
C1orf159,-0.4562156,0.3341612,-0.8622403,-0.06471666,-0.6813369,0.540506357,-0.21481722,-0.4516024,-1.1938917,0.41877,0.25238287


In [158]:
saveRDS(object = list(logMedianImpRatios = logMedianImpRatios,
                      meanLogMedianImpRatiosMat = meanLogMedianImpRatiosMat), 
        file="metPairs-impactRatios-onAllTargetGenes.rds")