In [None]:
# generate bed and wig files for the diff. expressed regions
# use results from samtools depth

In [1]:
setwd("..")

In [2]:
outDir = "Figures-and-Tables/"
sampleAnnotFile = "annotation/annotation-63.csv"
diffExprValuesFile =  paste0(outDir,"DGE-results.RData") # differential expression results
diffExprTilesFile = "differentially-expressed-tiles.bed" # will be produced for UCSC
regionsFile = "data/signif-regions-extended-for-covg-plot.bed" # regions around differential expr. results
ntWiseCoverageFile = "data/sam-depth-25MpercIDS-uniqMap-wFlags-all-signif-regions-extended.txt" 
  # -> for each nucleotide and sample what's the coverage with reads, from samtool
wigOutFileBase = "signif-region"

In [3]:
sampleAnnot = read.csv(file = sampleAnnotFile,
                        sep = "\t", dec=".", stringsAsFactors = F )
head(sampleAnnot)

Unnamed: 0_level_0,ID,gender,age,group
Unnamed: 0_level_1,<chr>,<chr>,<int>,<chr>
1,SXR0002,f,80,ccRCC
2,SXR0004,m,50,ccRCC
3,SXR0006,m,68,ccRCC
4,SXR0010,m,65,urolithiasis
5,SXR0014,m,57,ccRCC
6,SXR0016,m,59,ccRCC


In [4]:
group = ifelse(sampleAnnot$group=="ccRCC","tumor","normal")
names(group) = sampleAnnot$ID
group
tumorSamples = names(group[ group == "tumor"])
normalSamples = names(group[ group == "normal"])
cat(length(tumorSamples)," tumorSamples"); tumorSamples
cat(length(normalSamples)," normalSamples"); normalSamples

47  tumorSamples

16  normalSamples

### diff. expressed tiles

In [5]:
load(diffExprValuesFile)
head(deseqRes_signif)

Unnamed: 0_level_0,chr,start,end,gene,EnsgID,geneType,baseMean,log2FoldChange,lfcSE,stat,⋯,SXR0049,SXR0060,SXR0070,SXR0071,SXR0084,SXR0098,SXR0117,SXR0118,SXR0121,SXR0122
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
chr1_28905201_28905300,chr1,28905201,28905300,SNORD99,ENSG00000197989;ENSG00000221539,antisense;snoRNA,3.886967,-3.450529,0.6894305,-5.004897,⋯,0,7,23,11,2,0,23,1,7,5
chr1_153643701_153643800,chr1,153643701,153643800,TRNA_Met,ENSG00000224870;ENSG00000242485,protein_coding;protein_coding,20.566221,-1.632384,0.3768256,-4.331934,⋯,25,58,33,76,34,8,42,0,24,34
chr1_153643801_153643900,chr1,153643801,153643900,,ENSG00000224870;ENSG00000242485,protein_coding;protein_coding,11.730551,-2.187097,0.553558,-3.950981,⋯,9,49,20,44,24,3,30,0,10,17
chr3_186504601_186504700,chr3,186504601,186504700,SNORA81,ENSG00000156976;ENSG00000221420,protein_coding;snoRNA,5.329213,-1.799555,0.4319252,-4.166357,⋯,2,18,7,6,15,1,29,0,7,5
chr6_86387301_86387400,chr6,86387301,86387400,SNORD50B,ENSG00000203875,processed_transcript,11.751297,-2.178942,0.584209,-3.72973,⋯,4,12,23,13,1,3,32,1,21,3
chr11_62609001_62609100,chr11,62609001,62609100,RNU2-2P,ENSG00000133316;ENSG00000222328,protein_coding;snRNA,84.285631,-2.188605,0.5840148,-3.747516,⋯,11,59,19,8,29,10,1557,1,1148,21


In [6]:
# for upload into UCSC browser, marks the diff. expressed regions, next to the 
#  nucleotide-wise coverage produced below:
f = file(description = paste0(outDir,diffExprTilesFile), open = "w")
writeLines(con = f,text = "track name=\"significant regions\" description=\"differentially expressed regions\" color=50,255,50")
write.table(cbind(deseqRes_signif[,c("chr","start","end")],
                  "region",1000,"+",deseqRes_signif[,c("start","end")],"50,255,50"), 
            quote = F,row.names = F,col.names = F, sep="\t",
            file = f)
close(f)

### load coverage of regions to display

In [7]:
regions  = read.csv(file=regionsFile, header=F, sep="\t", stringsAsFactors=F, 
                    col.names = c("chr","start","end","descr"))
regions = regions[ order(regions$chr, regions$start, regions$end),]
regions

Unnamed: 0_level_0,chr,start,end,descr
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>
6,chr1,28904520,28912021,SNORD99
7,chr1,153643650,153643999,tRNA-Met
2,chr11,62599082,62618679,RNU2-2P
3,chr11,62619136,62628032,SNORD22/26
4,chr11,65264662,65282872,mascRNA
1,chr17,62223674,62223919,SNORA50C
8,chr3,186501100,186507750,SNORA81
5,chr6,86386590,86389928,SNORD50B


In [8]:
bedCovgData  = read.csv(file=ntWiseCoverageFile, header=T, sep="\t", stringsAsFactors=F)
colnames(bedCovgData) = gsub(".Aligned.sortedByCoord.out.25M10percIDS.uniqMap.bam","",colnames(bedCovgData))
colnames(bedCovgData)[1:2] = c("chr","pos")
head(bedCovgData,3)

Unnamed: 0_level_0,chr,pos,SXR0002,SXR0004,SXR0006,SXR0008,SXR0010,SXR0012,SXR0014,SXR0016,⋯,SXR0112,SXR0113,SXR0114,SXR0115,SXR0116,SXR0117,SXR0118,SXR0120,SXR0121,SXR0122
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,chr1,28904521,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
2,chr1,28904522,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
3,chr1,28904523,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0


In [9]:
bedCovgData$mean_coverage_normals = rowMeans(bedCovgData[,normalSamples])
bedCovgData$mean_coverage_tumors = rowMeans(bedCovgData[,tumorSamples])
wh = grep("mean",colnames(bedCovgData))
bedCovgData = cbind(bedCovgData[,wh],bedCovgData[,-wh])
head(bedCovgData,3)

Unnamed: 0_level_0,mean_coverage_normals,mean_coverage_tumors,chr,pos,SXR0002,SXR0004,SXR0006,SXR0008,SXR0010,SXR0012,⋯,SXR0112,SXR0113,SXR0114,SXR0115,SXR0116,SXR0117,SXR0118,SXR0120,SXR0121,SXR0122
Unnamed: 0_level_1,<dbl>,<dbl>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,0,0.0212766,chr1,28904521,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
2,0,0.0212766,chr1,28904522,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
3,0,0.0212766,chr1,28904523,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0


## export to wig
 each region 1 file (and 1 track):

In [10]:
regionsCopyPasteUCSC = ""

for (regIdx in 1:nrow(regions)) {
    
    cat(regions[regIdx,"chr"],regions[regIdx,"start"],regions[regIdx,"end"],regions[regIdx,"descr"],"\n")
    
    regionsCopyPasteUCSC = 
        paste0(regionsCopyPasteUCSC,
               regions[regIdx,"chr"],":",
               regions[regIdx,"start"],"-",regions[regIdx,"end"],"  ",
               regions[regIdx,"descr"],"\n")
    
    chr = regions[regIdx,"chr"]; start = regions[regIdx,"start"]; end = regions[regIdx,"end"]
    wh = which(bedCovgData$chr == chr & bedCovgData$pos >= start & bedCovgData$pos <= end)
    bedCovgDataSub = bedCovgData[wh, ]
    descr = regions[regIdx,"descr"]
    descrSimple = gsub("[; \\/]+","_",descr)
    viewRange = range(c(bedCovgDataSub$mean_coverage_normals, bedCovgDataSub$mean_coverage_tumors))
    viewRange[2] = viewRange[2] + 1
    viewRange = paste0(round(viewRange),collapse=":")
    
    f = file(description = paste0(outDir,wigOutFileBase,"-",descrSimple,".wig"),open = "w")
    
    writeLines(con=f, text= paste0("browser position ",chr,":",start,"-",end))

    bedCovgDataSub$mean_coverage_normals = round(bedCovgDataSub$mean_coverage_normals)
    bedCovgDataSub$mean_coverage_tumors = round(bedCovgDataSub$mean_coverage_tumors)
    bedCovgDataSub = bedCovgDataSub[ order(bedCovgDataSub$pos),]
    wh = which(duplicated(bedCovgDataSub$pos))
    if(length(wh) > 0) {bedCovgDataSub = bedCovgDataSub[-wh,]}

    # normal:
    wigHeadStr = paste0('track type=wiggle_0 name="',descr,
                        ' - urolithiasis" description="',descr,
                        ' - urolithiasis" visibility=dense autoScale=off ',
            'viewLimits=',viewRange,' color=50,50,255 priority=9\nvariableStep chrom=',chr)
    writeLines(text = wigHeadStr,con = f)
    wigLinesOut = apply(bedCovgDataSub[,c("pos","mean_coverage_normals")],1,function(r) paste0(r,collapse=" "))
    writeLines(text = wigLinesOut,con=f)

    # tumor:
    wigHeadStr = paste0('track type=wiggle_0 name="',descr,
                        ' - ccRCC" description="',descr,
                        ' - ccRCC" visibility=dense autoScale=off ',
            'viewLimits=',viewRange,' color=255,50,50 priority=9\nvariableStep chrom=',chr)
    writeLines(text = wigHeadStr,con = f)
    wigLinesOut = apply(bedCovgDataSub[,c("pos","mean_coverage_tumors")],1,function(r) paste0(r,collapse=" "))
    writeLines(text = wigLinesOut,con=f)

    close(f)
                    
    cat("  -> ",paste0(outDir,wigOutFileBase,"-",descrSimple,".wig\n"))

}
                        
cat("\n\nregions to copy & paste into UCSC browser:\n\n",regionsCopyPasteUCSC, sep="")

chr1 28904520 28912021 SNORD99 
  ->  Figures-and-Tables/signif-region-SNORD99.wig
chr1 153643650 153643999 tRNA-Met 
  ->  Figures-and-Tables/signif-region-tRNA-Met.wig
chr11 62599082 62618679 RNU2-2P 
  ->  Figures-and-Tables/signif-region-RNU2-2P.wig
chr11 62619136 62628032 SNORD22/26 
  ->  Figures-and-Tables/signif-region-SNORD22_26.wig
chr11 65264662 65282872 mascRNA 
  ->  Figures-and-Tables/signif-region-mascRNA.wig
chr17 62223674 62223919 SNORA50C 
  ->  Figures-and-Tables/signif-region-SNORA50C.wig
chr3 186501100 186507750 SNORA81 
  ->  Figures-and-Tables/signif-region-SNORA81.wig
chr6 86386590 86389928 SNORD50B 
  ->  Figures-and-Tables/signif-region-SNORD50B.wig


regions to copy & paste into UCSC browser:

chr1:28904520-28912021  SNORD99
chr1:153643650-153643999  tRNA-Met
chr11:62599082-62618679  RNU2-2P
chr11:62619136-62628032  SNORD22/26
chr11:65264662-65282872  mascRNA
chr17:62223674-62223919  SNORA50C
chr3:186501100-186507750  SNORA81
chr6:86386590-86389928  SNORD50B
