<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Goal" data-toc-modified-id="Goal-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Goal</a></span></li><li><span><a href="#Var" data-toc-modified-id="Var-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Var</a></span></li><li><span><a href="#Init" data-toc-modified-id="Init-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Init</a></span></li><li><span><a href="#Run1" data-toc-modified-id="Run1-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Run1</a></span></li><li><span><a href="#sessionInfo" data-toc-modified-id="sessionInfo-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>sessionInfo</a></span></li></ul></div>

# Goal

* Visualize per-position resmico scores on contigs along with mapped reads

# Var

In [4]:
base_dir = '/ebio/abt3_projects/databases_no-backup/DeepMAsED/version2/real_data_eval/UHGG/LLMGQC_r100/LLMGA/rmc-sm/map/'
scores_dir = '/ebio/abt3_projects/databases_no-backup/DeepMAsED/version2/real_data_eval/UHGG/LLMGQC_r100/resmico_scores/'


# Init

In [5]:
library(dplyr)
library(tidyr)
library(ggplot2)
library(data.table)
library(tidytable)
library(LeyLabRMisc)
Init()

In [6]:
library(Gviz)
library(Rsamtools)

In [3]:
options(ucscChromosomeNames=FALSE)

# Run1

In [7]:
score_files_9feat = list(
'ERS436684' = '9_feat_contr_ERS436684.wig',
'ERS537292' = '9_feat_contr_ERS537292.wig',
'ERS971961' = '9_feat_contr_ERS971961.wig',
'ERS1069689' = '9_feat_contr_ERS1069689.wig',
'SRS476211' = '9_feat_contr_SRS476211.wig',
'SRS820603' = '9_feat_contr_SRS820603.wig'
)
score_files_contr = list(
'ERS537292' = 'all_feat_contr_ERS537292.wig',
'ERS971961' = 'all_feat_contr_ERS971961.wig',
'ERS1069689' = 'all_feat_contr_ERS1069689.wig',
'SRS476114' = 'all_feat_contr_SRS476114.wig',
'SRS476211' = 'all_feat_contr_SRS476211.wig',
'SRS820603' = 'all_feat_contr_SRS820603.wig'
)
score_files_low = list(
'ERS235630' = 'all_feat_low_ERS235630.wig',
'ERS396506' = 'all_feat_low_ERS396506.wig',
'ERS1015611' = 'all_feat_low_ERS1015611.wig',
'SRS1858592' = 'all_feat_low_SRS1858592.wig',
'SRS1876707' = 'all_feat_low_SRS1876707.wig'
)
score_files_medium = list(
'ERS235591' = 'all_feat_medium_ERS235591.wig',
'ERS396506' = 'all_feat_medium_ERS396506.wig',
'ERS848736' = 'all_feat_medium_ERS848736.wig',
'ERS1015876' = 'all_feat_medium_ERS1015876.wig',
'SRS476114' = 'all_feat_medium_SRS476114.wig'
)

In [31]:
plot_region = function(x, tracks, out_dir=NULL){
    if(! is.null(out_dir)){
        if(! dir.exists(out_dir)){
            make_dir(out_dir)
        }
        out_file = file.path(out_dir, paste0(x[['contig']], '.png'))
        png(file=out_file, width=8, height=3.5, units='in', res=300)
    }
    plotTracks(tracks, 
               sizes = c(1,3,2),
               from = 1, to = x[['length_bp']], 
               chromosome = x[['contig']], 
               col.mates = 'purple',
               main = x[['contig']],
               cex.main = 0.8,
               cex = 0.8)
    if(! is.null(out_dir)){
        dev.off()
    }
}

plot_contig = function(sample, score_files, base_dir, scores_dir, out_dir=NULL){
    # input
    scores_file = glue::glue('all_feat_contr_{s}.wig', s=sample)
    D = file.path(base_dir, sample, glue::glue('{s}_contigs', s=sample), 
                  'NA', 'NA', 'NA', 'NA')
    contigs_file = file.path(D, 'contigs.fasta')
    bam_file = file.path(D, 'mapped.bam')
    wig_file = file.path(scores_dir, score_files[[sample]])
    # getting target contigs
    target_contigs = Fread(wig_file, sep=' ', header=FALSE) %>%
        filter.(V1 == 'variableStep') %>%
        mutate.(V2 = gsub('^chrom=', '', V2)) %>%
        .$V2
    # getting contig lengths
    contigs = FaFile(contigs_file) %>% seqinfo %>% as.data.frame
    contigs$contig = rownames(contigs)
    contigs = contigs %>% as.data.table %>%
        rename.('length_bp' = seqlengths) %>%
        select.(contig, length_bp) %>%
        filter.(contig %in% target_contigs)
    # tracks
    tracks = list(
        SequenceTrack(contigs_file),
        AlignmentsTrack(bam_file, isPaired = TRUE),
        DataTrack(wig_file, type='l', name='ResMiCo')
    )
    # plotting
    contigs %>%
        group_split.(contig) %>%
        lapply(plot_region, tracks=tracks, out_dir=out_dir)
}

In [32]:
p.dims(8,3)
out_dir = file.path(scores_dir, '9_feat_contr')
ret = score_files_9feat %>% names %>% 
    lapply(plot_contig, score_files=score_files_9feat, base_dir=base_dir, 
           scores_dir=scores_dir, out_dir=out_dir)

In [33]:
p.dims(8,3)
out_dir = file.path(scores_dir, 'all_feat_contr')
ret = score_files_contr %>% names %>% 
    lapply(plot_contig, score_files=score_files_contr, base_dir=base_dir, 
           scores_dir=scores_dir, out_dir=out_dir)

In [34]:
p.dims(8,3)
out_dir = file.path(scores_dir, 'all_feat_low')
ret = score_files_low %>% names %>% 
    lapply(plot_contig, score_files=score_files_low, base_dir=base_dir, 
           scores_dir=scores_dir, out_dir=out_dir)

In [36]:
p.dims(8,3)
out_dir = file.path(scores_dir, 'all_feat_medium')
ret = score_files_medium %>% names %>% 
    lapply(plot_contig, score_files=score_files_medium, base_dir=base_dir, 
           scores_dir=scores_dir, out_dir=out_dir)

Created directory: /ebio/abt3_projects/databases_no-backup/DeepMAsED/version2/real_data_eval/UHGG/LLMGQC_r100/resmico_scores//all_feat_medium 


# sessionInfo

In [16]:
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 18.04.6 LTS

Matrix products: default
BLAS/LAPACK: /tmp/global2/nyoungblut/code/DeepMAsED/conda_envs/dm-genome/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] grid      stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] Rsamtools_2.10.0     Biostrings_2.62.0    XVector_0.34.0      
 [4] Gviz_1.38.0          GenomicRanges_1.46.0 GenomeInfoDb_1.30.0 
 [7] IRanges_2.28.0       S4Vectors_0.32.0     BiocGenerics_0.40.0 
[10] clustermq_0.8.95.3   LeyLabRMisc_0.2.1    tidyt