In [None]:
#Read Expression Matrix

In [None]:
readexprs = function(path, lbl){
    
    df = read.csv(path , sep=',')
    rownames(df) = df$X
    df = df[, -1]
    
    names = paste0(lbl, 1:ncol(df))
    colnames(df) = names
    
    return(df)
}

In [27]:
#Column Data

In [9]:
prep = function(case, control){
    
    library(dplyr)
    
    all = cbind(case, control)
    
    coldata_late = data.frame(Sample=colnames(case), Group=rep(c('C'), each=ncol(case)))
    coldata_early = data.frame(Sample=colnames(control), Group=rep(c('N'), each=ncol(control)))
    
    coldata = rbind(coldata_late, coldata_early)
    rownames(coldata) = coldata$Sample
    
    return(coldata)
}

In [16]:
readexprs = function(path, lbl){
    
    df = read.csv(path , sep=',')
    rownames(df) = df$X
    df = df[, -1]
    
    names = paste0(lbl, 1:ncol(df))
    colnames(df) = names
    
    return(df)
}

In [21]:
#Differential Gene Expression

In [22]:
edgeR = function(R, S){
    
    library(edgeR)
    library(clusterProfiler)
    library(dplyr)
    library(org.Hs.eg.db)
    library(enrichplot)
    library(mixOmics)
    library(RColorBrewer)
    library(HTSFilter)
    
    Res = 'R'
    Sen = 'S'
    
    samples = factor(c(rep(Res, ncol(R)), rep(Sen, ncol(S))))
    replicates = c(ncol(R), ncol(S))
    
    ncolR = ncol(R)
    ncolS = ncol(S)
    
    Group = relevel(samples, ref="R")
    design = model.matrix(~0+Group)
    colnames(design) = levels(Group)    
    
    dgList = cbind(R, S) 
    
    sampleDists = as.matrix(dist(t(dgList)))
    
    options(repr.plot.width=6,repr.plot.height=6,repr.plot.res=200)
    boxplot(sampleDists, col="gray", las=3)
}

In [27]:
#DESEQ2

In [117]:
de = function(sensitive, resistance, coldata){
    
    library(DESeq2)
    library(dplyr)
    library(devtools)
    library(BiocParallel)
    library(data.table)
    
    register(MulticoreParam())
    
    dataset = cbind(resistance, sensitive)
    
    dds = DESeqDataSetFromMatrix(countData = round(dataset), 
                                 colData = coldata,
                                 design = ~ Group)
    
    dds$Group = relevel(dds$Group, ref = "R")
    
    keep = rowSums(counts(dds)) >= 10
    dds = dds[keep,]
        
    dds_Deseq = DESeq(dds, parallel=TRUE, fitType = "local")
    options(repr.plot.width=5,repr.plot.height=5,repr.plot.res=200)
    plotDispEsts(dds_Deseq)
    
    dds_var = DESeq2::vst(dds, blind = TRUE, fitType = "local")
    options(repr.plot.width=5,repr.plot.height=5,repr.plot.res=250)
    meanSdPlot(assay(dds_var))
    plotPCA(dds_var, intgroup=c("Group"), returnData=FALSE)
    
    res = results(dds_Deseq, contrast=c("Group", "R", "S"), alpha=0.05, parallel=TRUE)
    
    options(repr.plot.width=5,repr.plot.height=6,repr.plot.res=200)
    plotMA(res, ylim=c(-10,10), cex.lab = 1.5)
    resOrdered = res[order(res$padj), ]
    
    all_genes = as.character(rownames(resOrdered))
    all_genes = sub("\\.\\d+", "", all_genes)
    rownames(resOrdered) = all_genes
    
    up = resOrdered[which(resOrdered$log2FoldChange > 1), ]
    up = up[which(up$padj < 0.05), ]
    
    down = resOrdered[which(resOrdered$log2FoldChange < -1), ]
    down = down[which(down$padj < 0.05), ]
    
    eg_up = bitr(rownames(up), fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
    eg_dwn = bitr(rownames(down), fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
    
    all = cbind.fill(as.integer(eg_up$ENTREZID), as.integer(eg_dwn$ENTREZID))
    colnames(all) = c('Up', 'Down')
    all = as.data.frame(all)      
    
    cck_up = compareCluster(geneCluster = all, fun = "enrichKEGG", pvalueCutoff = 0.05, pAdjustMethod = "BH")
    cbp_up = compareCluster(geneCluster = all, fun = "enrichGO", pvalueCutoff = 0.05, OrgDb = org.Hs.eg.db, ont = "BP", pAdjustMethod = "BH")
    cmf_up = compareCluster(geneCluster = all, fun = "enrichGO", pvalueCutoff = 0.05, OrgDb = org.Hs.eg.db, ont = "MF", pAdjustMethod = "BH")

    
    up_genes = resistance[rownames(up), ]
    keep_up = rowSums(up_genes) >= 10
    up_genes = up_genes[keep_up,]
    
    up_genes = varistran::vst(up_genes)
    
    return(up_genes)

}

In [None]:
limma = function(sensitive, resistance){
    
    library(limma)
    library(data.table)
    library(dplyr)
    library(EnhancedVolcano)
    
    design = model.matrix(~ 0 + factor(c(rep(1, ncol(resistance)), rep(2, ncol(sensitive)))))
    colnames(design) = c('R', 'S')
    
    dataset = cbind(resistance, sensitive) 
    dataset = normalizeBetweenArrays(dataset, method="quantile")
    
    fit = lmFit(dataset, design, method='ls') 
    
    contr = makeContrasts(R - S, levels = colnames(coef(fit)))
    tmp = contrasts.fit(fit, contr)
    
    fit2 = eBayes(tmp, robust=TRUE) 
    
    plotSA(fit2, main="Gene-level")
        
    results = decideTests(fit2, method='hierarchical', adjust.method='BH', p.value=0.05)
    sig = topTable(fit2, n=Inf, adjust='BH', coef=1, sort.by='P', p.value=0.05)
    
    vennDiagram(results, include=c("up","down"), counts.col=c("red", "blue"))
    volcanoplot(fit2, coef=1, names = fit2$genes$Gene.symbol)
    
    limma::plotMA(fit2, coef=1, main="R vs S")
    abline(h=0, col="red", lwd=2)
    
    up = subset(sig, logFC > 1)
    down = subset(sig, logFC < -1)
    
    eg_up = bitr(rownames(up), fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
    eg_dwn = bitr(rownames(down), fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
    
    all = cbind.fill(as.integer(eg_up$ENTREZID), as.integer(eg_dwn$ENTREZID))
    colnames(all) = c('Up', 'Down')
    all = as.data.frame(all)      
    
    cck_up = compareCluster(geneCluster = all, fun = "enrichKEGG", pvalueCutoff = 0.05, pAdjustMethod = "BH")
    cbp_up = compareCluster(geneCluster = all, fun = "enrichGO", pvalueCutoff = 0.05, OrgDb = org.Hs.eg.db, ont = "BP", pAdjustMethod = "BH")
    cmf_up = compareCluster(geneCluster = all, fun = "enrichGO", pvalueCutoff = 0.05, OrgDb = org.Hs.eg.db, ont = "MF", pAdjustMethod = "BH")
        
    up = resistance[rownames(up) , ]
    
    return(up)

}

In [70]:
#Soft-Thresholding

In [71]:
power = function(dataset){
    
    suppressMessages(library(WGCNA))
    
    enableWGCNAThreads(nThreads=8)
    allowWGCNAThreads()
    options(stringsAsFactors = FALSE)
    
    dataset = t(dataset) 
    
    powers = c(c(1:10), seq(from = 12, to=20, by=2))
    sft = pickSoftThreshold(dataset, powerVector = powers, verbose = 5)
    
    par(mfrow = c(1,2))
    plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
         xlab='Soft Threshold (power)', cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2,
         ylab='Scale Free Topology Model Fit,signed R^2',type='n',
         main = paste('Scale independence'))
    
    text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3])*sft$fitIndices[, 2],
         labels=powers,cex=1, col='red')
    abline(h=0.85,col='red')
    
    plot(sft$fitIndices[,1], sft$fitIndices[,5], cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2,
         xlab='Soft Threshold (power)',ylab='Mean Connectivity', type='n',
         main = paste('Mean connectivity'))
    
    text(sft$fitIndices[, 1], sft$fitIndices[, 5],
         labels=powers, cex=1, col='red')

}

In [78]:
#Co-Expression Analysis

In [79]:
TOM_Matrices = function(dataset, power){

  library(WGCNA)

  enableWGCNAThreads(nThreads=12)
  allowWGCNAThreads()
  options(stringsAsFactors = FALSE)

  dataset = t(dataset)
  adj = abs(bicor(dataset, use = 'pairwise.complete.obs'))^power
  dissTOM = TOMdist(adj, TOMType = "signed")
    
  hierTOMa = hclust(as.dist(dissTOM), method='average')

  Gene_Modules = labels2colors(cutreeDynamic(hierTOMa, method='tree', cutHeight=0.99))
  Gene_Clusters = labels2colors(cutreeDynamic(hierTOMa, distM= dissTOM , cutHeight = 0.99,
                                               deepSplit=4, pamRespectsDendro = FALSE))
    
  options(repr.plot.width=5,repr.plot.height=5,repr.plot.res=200)
  plotDendroAndColors(hierTOMa,
                      colors = data.frame(Gene_Clusters),
                      dendroLabels = FALSE,
                      cex.axis = 1.2)

  return(Gene_Modules)

}

In [86]:
eigenetic_network = function(dataset, colorh1, power){

  enableWGCNAThreads(nThreads=32)
  allowWGCNAThreads()
  options(stringsAsFactors = FALSE)

  ADJ1 = abs(bicor(t(dataset), use = 'all.obs'))^power

  colors = unique(colorh1)
  Alldegrees1 = intramodularConnectivity(ADJ1, colorh1)

  datME = moduleEigengenes(t(dataset),colorh1)$eigengenes
  MET = orderMEs(cbind(datME))
  datKME = signedKME(t(dataset), datME, outputColumnName='')

  return(datKME)

}

In [93]:
#Functional Enrichment Co-Expressed Clusters

In [94]:
enrichment = function(dataset, colorh1, datKME, name){
    
    library(clusterProfiler)
    library(dplyr)
    library(org.Hs.eg.db)
    library(enrichplot)
    
    intModules = table(colorh1)
    intModules = as.data.frame(intModules)
    intModules =intModules$colorh1
    intModules = as.character(intModules)
    
    dat = data.frame()
    dat_new = data.frame()
    survival = data.frame()
    
    colrs = c()
    newclors = c()
    
    dataset = t(dataset)
    
    for (color in intModules){
        
        color =  color
        FilterGenes = abs(subset(datKME, select=c(color))) > 0.7
        genes = dimnames(data.frame(dataset))[[2]][FilterGenes]
        
        dat = cbind.fill(dat, genes, fill = NA)
        colrs = append(color, colrs)
    
    }
    
    dat = dat[,seq(1,ncol(dat),2)]
    colnames(dat) = colrs
    dat = as.data.frame(dat)
    
    dat = dat[,!names(dat) %in% c("grey")]
    colrs = colnames(dat)
    
    for (j in 1:ncol(dat)){
        
        gene = dat[, j]
        
        if (all(is.na(gene)) == FALSE){
            
            eg = bitr(gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
            genes = eg$ENTREZID
            dat_new = cbind.fill(dat_new, genes, fill = NA)
            
            symbols = eg$SYMBOL
            survival = cbind.fill(survival, symbols, fill = NA)
            newclors = append(newclors, colrs[j])
            
            
        }
    }
    
    dat_new = dat_new[,seq(1, ncol(dat_new), 2)]
    survival = survival[,seq(1, ncol(survival), 2)]
    
    colnames(dat_new) = newclors
    colnames(survival) = newclors
    
    dat_new = as.data.frame(dat_new)
    survival = as.data.frame(survival)
    
    names = paste0('C', 1:ncol(dat_new))
    names_survival = paste0('C', 1:ncol(survival))
    
    colnames(dat_new) = names
    colnames(survival) = names_survival
    
    #write.csv(survival, paste('/Users/lebohangmashatola/downloads/', name, '.csv', sep=''))
    
    return(dat_new)


}

In [67]:
clusterPrBP = function(dat_new, w, h){
    
    dat_new = dat_new[,!names(dat_new) %in% c("grey")]
    cBp = compareCluster(geneCluster = dat_new, fun = "enrichGO", pvalueCutoff = 0.05, OrgDb = org.Hs.eg.db,
                         ont = "BP", pAdjustMethod = "BH")
    
    return(cBp)

}

In [73]:
clusterPrMf = function(dat_new, w, h){
    
    dat_new = dat_new[,!names(dat_new) %in% c("grey")]
    cMf = compareCluster(geneCluster = dat_new, fun = "enrichGO", 
                         pvalueCutoff = 0.05, OrgDb = org.Hs.eg.db,
                         ont = "MF", pAdjustMethod = "BH")
    
    return(cMf)

}

In [79]:
clusterKEGG = function(dat_new, w, h){
    
    dat_new = dat_new[,!names(dat_new) %in% c("grey")]
    ck = compareCluster(geneCluster = dat_new, fun = enrichKEGG, 
                        pvalueCutoff = 0.05)
    
    return(ck)

}

In [2]:
options(repr.plot.width=8,repr.plot.height=5,repr.plot.res=250)
enrichplot::dotplot(CRCbp, showCategory=1)

options(repr.plot.width=5,repr.plot.height=6,repr.plot.res=250)
enrichplot::dotplot(Stagesbp, showCategory=5)

options(repr.plot.width=6,repr.plot.height=9,repr.plot.res=250)
enrichplot::dotplot(BRCABp, showCategory=3)

options(repr.plot.width=7,repr.plot.height=11,repr.plot.res=250)
enrichplot::dotplot(LUADBp, showCategory=3)

options(repr.plot.width=7,repr.plot.height=11,repr.plot.res=250)
enrichplot::dotplot(PRADBp, showCategory=3)

In [3]:
options(repr.plot.width=8,repr.plot.height=7,repr.plot.res=250)
enrichplot::dotplot(CRCKEGG, showCategory=3)

options(repr.plot.width=5,repr.plot.height=4,repr.plot.res=250)
enrichplot::dotplot(StagesKEGG, showCategory=3)

options(repr.plot.width=6,repr.plot.height=5,repr.plot.res=250)
enrichplot::dotplot(BRCAKEGG, showCategory=2)

options(repr.plot.width=6,repr.plot.height=6,repr.plot.res=250)
enrichplot::dotplot(LUADKEGG, showCategory=3)

options(repr.plot.width=6,repr.plot.height=5,repr.plot.res=250)
enrichplot::dotplot(PRADKEGG, showCategory=10)

In [4]:
options(repr.plot.width=8,repr.plot.height=7,repr.plot.res=250)
enrichplot::dotplot(CRCMf,showCategory=2)

options(repr.plot.width=5,repr.plot.height=5,repr.plot.res=250)
enrichplot::dotplot(StagesMf,showCategory=3)

options(repr.plot.width=5,repr.plot.height=5,repr.plot.res=250)
enrichplot::dotplot(BRCAMf, showCategory=2)

options(repr.plot.width=6,repr.plot.height=7,repr.plot.res=250)
enrichplot::dotplot(LUADMf, showCategory=2)

options(repr.plot.width=6,repr.plot.height=7,repr.plot.res=250)
enrichplot::dotplot(PRADMf, showCategory=2)