# GTM Project

In [2]:
#libraries
library(tidyverse)
library(vcfR) 

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.6
[32m✔[39m [34mforcats  [39m 1.0.1     [32m✔[39m [34mstringr  [39m 1.6.0
[32m✔[39m [34mggplot2  [39m 4.0.1     [32m✔[39m [34mtibble   [39m 3.3.0
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.2.0     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

   *****       ***   vcfR   ***       *****
   This is vcfR 1.15.0 
     browseVignettes('vcfR') # Documentation
     citation('vcfR') # Citation
   *****       *****  

In [4]:
#load mutect2 vcf files

vcf.dir <- "/compute_space/GT_project/submission/vcf/annot"

# pre/during/post cetuximab treatment
pre_treat <- read.vcfR(file.path(vcf.dir, "SRR13974004.dup.mutec2.chr17.filtered.annot.vcf.gz"))
during_treat <-read.vcfR(file.path(vcf.dir, "SRR13974005.dup.mutec2.chr17.filtered.annot.vcf.gz"))
post_treat <- read.vcfR(file.path(vcf.dir, "SRR13974006.dup.mutec2.chr17.filtered.annot.vcf.gz"))

Scanning file to determine attributes.
File attributes:
  meta lines: 3439
  header_line: 3440
  variant count: 5400
  column count: 10
Meta line 3439 read in.
All meta lines processed.
gt matrix initialized.
Character matrix gt created.
  Character matrix gt rows: 5400
  Character matrix gt cols: 10
  skip: 0
  nrows: 5400
  row_num: 0
Processed variant: 5400
All variants processed
Scanning file to determine attributes.
File attributes:
  meta lines: 3439
  header_line: 3440
  variant count: 1311
  column count: 10
Meta line 3439 read in.
All meta lines processed.
gt matrix initialized.
Character matrix gt created.
  Character matrix gt rows: 1311
  Character matrix gt cols: 10
  skip: 0
  nrows: 1311
  row_num: 0
Processed variant: 1311
All variants processed
Scanning file to determine attributes.
File attributes:
  meta lines: 3439
  header_line: 3440
  variant count: 8313
  column count: 10
Meta line 3439 read in.
All meta lines processed.
gt matrix initialized.
Character matrix gt

In [5]:
#Define helpers

#' getDataframe()
#' generate a comprehensive dataframe of variants
#' @param vcfR_object: funcotator annoated vcf file loaded with vcfR package
#' @return (data.frame) tidy dataframe
getDataframe <- function(vcfR_object){
    
    info <- vcfR_object@fix %>% data.frame() %>% 
        
        # Split INFO into multiple columns
        separate_rows(INFO, sep=";") %>% 
        separate(INFO, into=c("key","value"), sep="=", fill="right") %>% 
        pivot_wider(names_from = key, values_from = value)%>%
        mutate(across(all_of(c('DP','ECNT','ECNTH','GERMQ','MBQ','MFRL',
                         'MMQ','MPOS','POPAF','TLOD','STR','STRQ','PON')
                      ), ~ as.numeric(.))) %>%
    
        # Split FUNCOTATION into multiple columns
        mutate(FUNCOTATION = gsub("^\\[|\\]$", "", FUNCOTATION),
          
          )
    
    return(info)
    }


info <- getDataframe(pre_treat)

head(info)

[1m[22m[36mℹ[39m In argument: `across(...)`.
[33m![39m NAs introduced by coercion


CHROM,POS,ID,REF,ALT,QUAL,FILTER,AS_FilterStatus,AS_SB_TABLE,DP,⋯,MFRL,MMQ,MPOS,POPAF,TLOD,RPA,RU,STR,STRQ,PON
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
chr17,166107,,T,C,,PASS,SITE,"0,0|0,0",2,⋯,,,56,7.3,4.64,,,,,
chr17,442968,,C,T,,PASS,SITE,"0,0|0,0",2,⋯,,,14,7.3,7.18,,,,,
chr17,443122,,G,A,,PASS,SITE,"0,0|0,0",1,⋯,,,67,7.3,3.68,,,,,
chr17,444296,,C,T,,PASS,SITE,"0,0|0,0",1,⋯,,,48,7.3,3.48,,,,,
chr17,514655,,T,G,,PASS,SITE,"0,0|0,0",2,⋯,,,39,7.3,4.6,,,,,
chr17,535345,,T,C,,PASS,SITE,"0,0|0,0",2,⋯,,,55,7.3,4.54,,,,,
