# Compare CGmaps between control and deletion strains. 
Comparision datasets are published in supplenetary files. 

In [2]:
library(tidyverse, lib='/usr/local/lib/R/site-library')

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.1     [32m✔[39m [34mtibble   [39m 3.1.8
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [3]:
cols = c('chromosome', 'reference_nucleotide', 'position', 'trinuc_context','dinuc_context', 
         "methylation_frequency", "count_mC", "coverage")

TS559exoS <- read.delim('../processed_cgmaps/bs7v1/TS559exoS_coverage', sep='\t') %>%
    dplyr::rename(
          methylation_frequency_TS559_rep1 = methylation_frequency_TS559_March2016,
          mC_count_TS559_rep1 = mC_count_TS559_March2016,
          coverage_TS559_rep1 = coverage_TS559_March2016,
        
          methylation_frequency_TS559_rep2 = methylation_frequency_TS559_Dec2016,
          mC_count_TS559_rep2 = mC_count_TS559_Dec2016,
          coverage_TS559_rep2 = coverage_TS559_Dec2016,
           
          methylation_frequency_TS559_rep3 = methylation_frequency_TS559_Oct2020,
          mC_count_TS559_rep3 = mC_count_TS559_Oct2020,
          coverage_TS559_rep3 = coverage_TS559_Oct2020
          )

annotation <- data.frame(read.delim('/home/kristin/jupyter_notebooks/RNA_mt_proj/bs7/annotation_ready', 
                                    sep = '\t') %>%
    dplyr::rename(position=TS559_position) )

## HELPERS 


In [4]:
# HELPERS 

load_cgmap <- function(cgmap,strain, replicate, growth_phase, metabolic_condition='S'){
    cols = c('chromosome', 'base', 'position', 'trinuc_context','dinuc_context', 
         "methylation_frequency", "count_mC", "coverage")
    outdata <- read.delim(cgmap, sep='\t', header=FALSE,col.names=cols) %>%
        mutate(strain=strain, growth_phase=growth_phase, 
               metabolic_condition=metabolic_condition, replicate=replicate)

}


#clean up raw CGmaps, add some and rename some columns
processCGmap <- function(cgmap){
    strain = unique(cgmap$strain)
    rep = unique(cgmap$replicate)
    
    final_data <- cgmap %>% 
        dplyr::select(-trinuc_context, -dinuc_context) %>%
        dplyr::rename(!!paste("coverage","_",strain,"_",rep, sep="") := coverage) %>%
        dplyr::rename(!!paste("mC_count","_",strain,"_",rep, sep="") := count_mC) %>%
        dplyr::rename(!!paste("methylation_frequency","_",strain,"_",rep, sep="") := methylation_frequency) %>%
        dplyr::select(-replicate, -strain)
    return(final_data)
}



#merge data for replicate deletion strains
merge_del_reps <- function(x,y, strain){
    cov_1 <- paste("coverage_",strain, "_rep1", sep='')
    cov_2 <- paste("coverage_",strain, "_rep2", sep='')

    d <- x %>%
        merge(y=y, 
              by=c("chromosome","position", 'reference_nucleotide', 'growth_phase','metabolic_condition'), 
              all=TRUE) %>%
    
            mutate(!!as.name(cov_1) := replace_na(!!as.name(cov_1),0)) %>%
            mutate(!!as.name(cov_2) := replace_na(!!as.name(cov_2),0))
    
    return(d)
}

#merge data for triplicate deletion strains
merge_del_3reps <- function(x,y,z,strain){
    cov_1 <- paste("coverage_",strain, "_rep1", sep='')
    cov_2 <- paste("coverage_",strain, "_rep2", sep='')
    cov_3 <- paste("coverage_",strain, "_rep3", sep='')

    d <- x %>%
        merge(y=y, 
              by=c("chromosome","position", 'reference_nucleotide', 'growth_phase','metabolic_condition'), 
              all=TRUE) %>%
        merge(y=z, 
              by=c("chromosome","position", 'reference_nucleotide', 'growth_phase','metabolic_condition'), 
              all=TRUE) %>%
            mutate(!!as.name(cov_1) := replace_na(!!as.name(cov_1),0)) %>%
            mutate(!!as.name(cov_2) := replace_na(!!as.name(cov_2),0)) %>%
            mutate(!!as.name(cov_3) := replace_na(!!as.name(cov_3),0))

    return(d)
}


# determine if high confidence, reproducible sites with 3 reps
#f  frequency
#m  m5C coverage
#p  99 percentile m5C coverage
#c  total coverage
detect_hiconf_3reps <- function(f1,m1,p1,c1,f2,m2,p2,c2,f3,m3,p3,c3, min_cov=47,
                                min_freq=.10, min_freq2 = 0.05) {
    ls <- c()
    
    if( is.na(c1) ) {c1<-0}
    if( is.na(c2) ) {c2<-0}
    if( is.na(c3) ) {c3<-0}
    if(c1 < min_cov & c2 < min_cov & c3 < min_cov) {return(NA)}
    
    else{
    
    if(c1<min_cov){ls <- c(ls,NA)}
    else if(  (c1>=min_cov & m1>=p1 & f1>=min_freq) | (m1>50 & f1>=min_freq2) ) {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}
    
    if(c2<min_cov){ls <- c(ls,NA)}
    else if( (c2>=min_cov & m2>=p2 & f2>=min_freq) | (m2>50 & f2>=min_freq2) ) {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}
    
    if(c3<min_cov){ls <- c(ls,NA)}
    else if( (c3>=min_cov & m3>=p3 & f3>=min_freq)| (m3>50 & f3>=min_freq2) )  {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}
    
    ans_t <- length(which(ls==TRUE))
    ans_na <- length(which(ls==NA))
    ans_f <- length(which(ls==FALSE))

    if(ans_na >= 3){return(NA)}
    if(ans_na == 2 & ans_t == 1){return(NA)}
    else if(ans_t >= 2){return(TRUE)}
    else { return(FALSE)}
        }
}



# determine if high confidence, reproducible sites with 2 reps
#f  frequency
#m  m5C coverage
#p  99 percentile m5C coverage
#c  total coverage
detect_hiconf_2reps <- function(f1,m1,p1,c1,f2,m2,p2,c2, min_cov=47,
                                min_freq=.10, min_freq2 = 0.05) {
    ls <- c()
    
    if( is.na(c2) ) {c1<-0}
    if( is.na(c2) ) {c2<-0}
    
    if(c1 < min_cov & c2 < min_cov) {return(NA)}
    else{
            
    if(c1<min_cov){ls <- c(ls,NA)}
    else if(  (c1>=min_cov & m1>=p1 & f1>=min_freq) | (m1>50 & f1>=min_freq2) ) {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}
    
    if(c2<min_cov){ls <- c(ls,NA)}
    else if( (c2>=min_cov & m2>=p2 & f2>=min_freq) | (m2>50 & f2>=min_freq2) ) {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}

    ans_t <- length(which(ls==TRUE))
    ans_na <- length(which(ls==NA))
    ans_f <- length(which(ls==FALSE))

    if(ans_f == 1 & ans_t == 1 ){return(TRUE)}
    if(ans_f >= 1  ){return(FALSE)}
    else if(ans_t >= 2){return(TRUE)}
    else if(ans_t == 1 & ans_na ==1){return(TRUE)}
    else if(ans_f == 1 & ans_na ==1){return(FALSE)}

    else { return(NA)}
        
        }
}



# determine if high confidence in all three 3 reps
super_hiconf_3reps <- function(f1,m1,p1,c1,f2,m2,p2,c2,f3,m3,p3,c3, min_cov=47,
                                min_freq=.10, min_freq2 = 0.05) {
    ls <- c()
    
    if( is.na(c1) ) {c1<-0}
    if( is.na(c2) ) {c2<-0}
    if( is.na(c3) ) {c3<-0}

    
    if(  (c1>=min_cov & m1>=p1 & f1>=min_freq) | (c1>1000 & f1>=min_freq2)  ) {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}
    
    
    if( (c2>=min_cov & m2>=p2 & f2>=min_freq) | (c2>=1000 & f2>=min_freq2) ) {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}
    
    if( (c3>=min_cov & m3>=p3 & f3>=min_freq) | (c3>=1000 & f3>=min_freq2) )  {ls <- c(ls,TRUE)}
    else {ls <- c(ls,FALSE)}
    
    ans <- length(which(ls==TRUE))
    
    if(ans >= 3){return(TRUE)}
    else { return(FALSE)}
}



#determine if the sample m5C frequency are within a 90% confidence interval
rm_overlap <- function(sp, ssd, cp,csd){
    ssd <- ssd*1.645
    csd <- csd*1.645

    r1 <- c(sp-ssd, sp+ssd)
    r2 <- c(cp-csd, cp+csd)
    ans <- r1 %overlaps% r2
    return(ans)
    
}


# get p value of each m5C site using binomial distribution
#glm binomial

get_pvalue <- function(s1_mC,s1_cov, s2_mC,s2_cov,
                       c1_mC,c1_cov,c2_mC,c2_cov,c3_mC,c3_cov){
    
    
    if(s1_cov == 0 & s2_cov == 0){return(NA)} 
    if(c1_cov == 0 & c2_cov == 0){return(NA)} 

    else{

    sample_success = s1_mC + s2_mC  + 1
    sample_failure = (s1_cov+s2_cov)-sample_success  +1
    
    control_success = c1_mC + c2_mC + c3_mC  +1
    control_failure = (c1_cov + c2_cov + c3_cov) - control_success  +1
    
    if(is.na(sample_failure) |  is.na(control_failure) |
      is.na(sample_failure) | is.na(sample_failure)
      
      ){return(NA)}
    
    treatment <- c('sample','control')
    success <- c(sample_success, control_success)
    failure <- c(sample_failure, control_failure)
        
    da <- data.frame(treatment, success, failure)
        da$success <- as.integer(da$success)
        da$failure <- as.integer(da$failure)
        da$treatment <- as.factor(da$treatment)

        table <- cbind(da$success,da$failure)

    fit <- glm(table ~ treatment, family = binomial, data = da)
    p_val <- summary(fit)$coefficients[8]
    return(p_val) 
    }
}

get_pvalue_deprecated <- function(s1_mC,s1_cov, s2_mC,s2_cov,
                       c1_mC,c1_cov,c2_mC,c2_cov,c3_mC,c3_cov){
    
    meth_freq_r1 <- (s1_mC+1)/(s1_cov+1)
    meth_freq_r2 <- (s2_mC+1)/(s2_cov+1)
    cnt_meth_freq_r1 <- (c1_mC+1)/(c1_cov+1)
    cnt_meth_freq_r2 <- (c2_mC+1)/(c2_cov+1)
    cnt_meth_freq_r3 <- (c3_mC+1)/(c3_cov+1)
    
    
    strain <- c('trt','trt','cnt','cnt','cnt')
    freq <- c(meth_freq_r1, meth_freq_r2,
              cnt_meth_freq_r1, cnt_meth_freq_r2, cnt_meth_freq_r3)
    
    
    da <- data.frame(cbind(strain, freq))
    
    da$strain <- as.factor(da$strain)
    da$freq <- as.double(da$freq)

    pval <- round(summary(aov(freq ~ strain, data=da))[[1]][1,5], 4)    
    return(pval)

}

    
get_pvalue_3reps <- function(s1_mC,s1_cov, s2_mC,s2_cov,s3_mC,s3_cov,
                       c1_mC,c1_cov,c2_mC,c2_cov,c3_mC,c3_cov){
    
    
    if(s1_cov == 0 & s2_cov==0 & s3_cov == 0){return(NA)} 
    if(c1_cov == 0 & c2_cov==0 & c3_cov == 0){return(NA)} 

    sample_success = s1_mC + s2_mC + s3_mC +1
    sample_failure = (s1_cov + s2_cov + s3_cov)-sample_success +1
    
    control_success = c1_mC + c2_mC + c3_mC +1
    control_failure = (c1_cov + c2_cov + c3_cov) - control_success +1
        
    if(is.na(sample_failure) |  is.na(control_failure) |
      is.na(sample_failure) | is.na(sample_failure) ){return(NA)}
    
    treatment <- c('sample','control')
    success <- c(sample_success, control_success)
    failure <- c(sample_failure,control_failure)
        
    da <- data.frame(treatment, success, failure)
        da$success <- as.integer(da$success)
        da$failure <- as.integer(da$failure)
        da$treatment <- as.factor(da$treatment)

        table <- cbind(da$success,da$failure)

    fit <- glm(table ~ treatment, family = binomial, data = da)
    p_val <- summary(fit)$coefficients[8]
    return(p_val)
}
    

get_quantiles <- function(strain_cgmap, strain, reps = 2, min_cov=47){
    
    m5C_count_r1 <- paste("mC_count_", strain, "_rep1", sep="")
    cov_r1 <- paste("coverage_", strain, "_rep1", sep="")

    m5C_count_r2 <- paste("mC_count_", strain, "_rep2", sep="")
    cov_r2 <- paste("coverage_", strain, "_rep2", sep="")
    
    #get quantile values for m5C count
    q_strain_rep1 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r1)>=min_cov, !!as.name(m5C_count_r1)>=0) )[[m5C_count_r1]], .99)[[1]]
    
    q_strain_rep2 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r2)>=min_cov, !!as.name(m5C_count_r2)>=0) )[[m5C_count_r2]], .99)[[1]]
    
    q <- c(q_strain_rep1, q_strain_rep2)
    
    if(reps==3){
        m5C_count_r3 <- paste("mC_count_", strain, "_rep3", sep="")
        cov_r3 <- paste("coverage_", strain, "_rep3", sep="")
        
        q_strain_rep3 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r3)>=min_cov, !!as.name(m5C_count_r3)>=0) )[[m5C_count_r3]], .99)[[1]]
        
        q = c(q, q_strain_rep3)
    }
    
    return(q)
}
    

get_difference <- function(hiconf_TS559, hiconf_strain, strain_poolf, control_poolf, CI90_overlap){
    
    if(is.na(hiconf_TS559) | is.na(hiconf_strain)){return(NA)}
    else if(is.na(CI90_overlap)){return(NA)}
    
    else if(hiconf_TS559 == FALSE & hiconf_strain == FALSE){return("NOT MODIFIED")}
    else if(CI90_overlap == TRUE){return("OUTSIDE OF DIFFERENTIATION LIMITS")}

    else if(hiconf_TS559 == TRUE & hiconf_strain == FALSE & strain_poolf <= 0.02){return("ABS LOSS")} 
    else if(hiconf_TS559 == TRUE & hiconf_strain == FALSE & strain_poolf > 0.02){return("REL LOSS")} 

    else if(hiconf_TS559 == FALSE & hiconf_strain == TRUE & control_poolf <= 0.02){return("ABS GAIN")}
    else if(hiconf_TS559 == FALSE & hiconf_strain == TRUE & control_poolf > 0.02){return("REL GAIN")}

    else if(hiconf_TS559 == TRUE & hiconf_strain == TRUE & strain_poolf > control_poolf){return("REL GAIN")} 
    else if(hiconf_TS559 == TRUE & hiconf_strain == TRUE & strain_poolf < control_poolf){return("REL LOSS")} 
    
    else{return("error")}
    }

        
CompareCGmap <- function(strain_cgmap, TS559_cgmap, strain, annotation, minimum_freq=.10, min_cov=47){
    
    #making some column names
    meth_freq_r1 <- paste("methylation_frequency_", strain, "_rep1", sep="")
    m5C_count_r1 <- paste("mC_count_", strain, "_rep1", sep="")
    cov_r1 <- paste("coverage_", strain, "_rep1", sep="")

    meth_freq_r2 <- paste("methylation_frequency_", strain, "_rep2", sep="")
    m5C_count_r2 <- paste("mC_count_", strain, "_rep2", sep="")
    cov_r2 <- paste("coverage_", strain, "_rep2", sep="")
    
    pooled_methylation_frequency_strain <- paste("pooled_methylation_frequency_", strain, sep='')

    #get quantile values for m5C count
    q_TS559_rep1 <- quantile((TS559_cgmap%>%
        filter(coverage_TS559_rep1>=min_cov, mC_count_TS559_rep1>=0))$mC_count_TS559_rep1, .99)[[1]]
    
    q_TS559_rep2 <- quantile((TS559_cgmap%>%
        filter(coverage_TS559_rep2>=min_cov, mC_count_TS559_rep2>=0))$mC_count_TS559_rep2, .99)[[1]]
    
    q_TS559_rep3 <- quantile((TS559_cgmap%>%
        filter(coverage_TS559_rep3>=min_cov, mC_count_TS559_rep3>=0))$mC_count_TS559_rep3, .99)[[1]]

    q_strain_rep1 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r1)>=min_cov, !!as.name(m5C_count_r1)>=0) )[[m5C_count_r1]], .99)[[1]]
    
    q_strain_rep2 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r2)>=min_cov, !!as.name(m5C_count_r2)>=0) )[[m5C_count_r2]], .99)[[1]]
    
    
    #build dataframe
    data <- strain_cgmap %>%
        merge(y = TS559_cgmap, all = TRUE, 
             by= c("chromosome","position","reference_nucleotide","growth_phase","metabolic_condition")) %>%
        dplyr::select(-chromosome,-standard_devition) %>%
    
        # house keeping
        mutate(!!as.name(cov_r1) := replace_na(!!as.name(cov_r1),0)) %>%
        mutate(!!as.name(cov_r2) := replace_na(!!as.name(cov_r2),0)) %>%
        
        mutate(coverage_TS559_rep1 = replace_na(coverage_TS559_rep1,0)) %>%
        mutate(coverage_TS559_rep2 = replace_na(coverage_TS559_rep2,0)) %>%
        mutate(coverage_TS559_rep3 = replace_na(coverage_TS559_rep3,0)) %>%
    rowwise() %>%
        mutate(hiconf_TS559 = detect_hiconf_3reps(
            f1=methylation_frequency_TS559_rep1,
            m1=mC_count_TS559_rep1,
            p1=q_TS559_rep1,
            c1=coverage_TS559_rep1,
            f2=methylation_frequency_TS559_rep2,
            m2=mC_count_TS559_rep2,
            p2=q_TS559_rep2,
            c2=coverage_TS559_rep2,
            f3=methylation_frequency_TS559_rep3,
            m3=mC_count_TS559_rep3,
            p3=q_TS559_rep3,
            c3=coverage_TS559_rep3,
            min_cov=min_cov,
            min_freq = minimum_freq) 
              ) %>%
        
        rowwise() %>%
        mutate(hiconf_strain = detect_hiconf_2reps(
                f1=!!as.name(meth_freq_r1),
                m1=!!as.name(m5C_count_r1),
                p1=q_strain_rep1,
                c1=!!as.name(cov_r1),
                f2=!!as.name(meth_freq_r2),
                m2=!!as.name(m5C_count_r2),
                p2=q_strain_rep2,
                c2=!!as.name(cov_r2), 
                min_cov= min_cov,
                min_freq = minimum_freq)
                 
                ) %>%
    
    # keep sites where m5C is high confidence in either strain
    filter(hiconf_TS559 == TRUE | hiconf_strain == TRUE) %>%
    filter(!is.na(hiconf_TS559) & !is.na(hiconf_strain)) %>%

    # calculate the mean m5C frequency at each site by pooling replicates (round to 2 decimals)
    # calculate standard deviation of each site as well
    mutate(!!as.name(pooled_methylation_frequency_strain) := 
             ( !!as.name(m5C_count_r1) +  !!as.name(m5C_count_r2) ) / ( !!as.name(cov_r1) + !!as.name(cov_r2) ) ) %>%
        
    mutate(!!as.name(pooled_methylation_frequency_strain) := round(!!as.name(pooled_methylation_frequency_strain),2))%>%
    mutate(strain_sd = sd(c(!!as.name(meth_freq_r1),!!as.name(meth_freq_r2))))%>%
    
    mutate(pooled_methylation_frequency_TS559 =
               (mC_count_TS559_rep1 + mC_count_TS559_rep2 + mC_count_TS559_rep3) /
               (coverage_TS559_rep1+coverage_TS559_rep2+coverage_TS559_rep3)    ) %>%
    mutate(pooled_methylation_frequency_TS559=round(pooled_methylation_frequency_TS559,2)) %>%
    mutate(TS559_sd = sd(c(methylation_frequency_TS559_rep1,methylation_frequency_TS559_rep2,
                              methylation_frequency_TS559_rep3))) %>%

    # calculate fold change of methylaton frequency at each site between parent and deletion strain
    mutate(FoldChange = (!!as.name(pooled_methylation_frequency_strain)+.01) / 
                               (pooled_methylation_frequency_TS559+.01),
           log2FC = log2( (!!as.name(pooled_methylation_frequency_strain)+.01) / 
                               (pooled_methylation_frequency_TS559+.01) ) ) %>%
    
    mutate(CI90_overlap = 
                   rm_overlap(!!as.name(pooled_methylation_frequency_strain), strain_sd,
                    pooled_methylation_frequency_TS559, TS559_sd )) %>%
    
    mutate(difference = get_difference(hiconf_TS559, hiconf_strain,   
                                            !!as.name(pooled_methylation_frequency_strain), 
                                            pooled_methylation_frequency_TS559,
                                            CI90_overlap
                                      ) ) %>%
    filter(difference == "ABS LOSS" | difference == "REL LOSS" | difference == "ABS GAIN" | 
           difference == "REL GAIN" | difference == "error") %>%
    filter(log2FC >= 1 | log2FC <= -1) %>%

    # add p vlaue
    rowwise() %>%
    mutate(pvalue = get_pvalue(
            s1_mC = !!as.name(m5C_count_r1),
            s1_cov = !!as.name(cov_r1), 
            s2_mC = !!as.name(m5C_count_r2),
            s2_cov = !!as.name(cov_r2),
            c1_mC = mC_count_TS559_rep1,
            c1_cov = coverage_TS559_rep1,
            c2_mC = mC_count_TS559_rep2,
            c2_cov = coverage_TS559_rep2,
            c3_mC = mC_count_TS559_rep3,
            c3_cov = coverage_TS559_rep3) ) %>%
    
    filter(!is.na(pvalue), pvalue <= 0.01)
    
        
    # adjust p values
    # adj_pvalue <- p.adjust(data$pvalue, method = 'fdr')
    #data <- cbind(data, adj_pvalue)
    
    data <- data %>% merge(y=annotation,by='position', all.x=TRUE) %>%
            dplyr::rename(TS559_position = position) %>%
           #filter(CI90_overlap==TRUE) %>%
           dplyr::select(-reproducible, -highly_reproducible,-reference_nucleotide) %>%
           arrange(log2FC) %>%
           arrange(difference) %>%
           dplyr::select(TS559_position, KOD1_position, strand, everything())
    
  
    return(data)
}
    

enumerate_hiconf_2reps <- function(strain_cgmap, strain, min_cov=47){
    
    meth_freq_r1 <- paste("methylation_frequency_", strain, "_rep1", sep="")
    m5C_count_r1 <- paste("mC_count_", strain, "_rep1", sep="")
    cov_r1 <- paste("coverage_", strain, "_rep1", sep="")

    meth_freq_r2 <- paste("methylation_frequency_", strain, "_rep2", sep="")
    m5C_count_r2 <- paste("mC_count_", strain, "_rep2", sep="")
    cov_r2 <- paste("coverage_", strain, "_rep2", sep="")
    
    q_strain_rep1 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r1)>=min_cov, !!as.name(m5C_count_r1)>=0) )[[m5C_count_r1]], .99)[[1]]
    
    q_strain_rep2 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r2)>=min_cov, !!as.name(m5C_count_r2)>=0) )[[m5C_count_r2]], .99)[[1]]
    
    
    data <- strain_cgmap %>%
        dplyr::select(-chromosome, -reference_nucleotide,-growth_phase) %>%
    
        mutate(!!as.name(cov_r1) := replace_na(!!as.name(cov_r1),0)) %>%
        mutate(!!as.name(cov_r2) := replace_na(!!as.name(cov_r2),0)) %>%  
        
        rowwise() %>%
        mutate(hiconf_strain = detect_hiconf_2reps(
                f1=!!as.name(meth_freq_r1),
                m1=!!as.name(m5C_count_r1),
                p1=q_strain_rep1,
                c1=!!as.name(cov_r1),
                f2=!!as.name(meth_freq_r2),
                m2=!!as.name(m5C_count_r2),
                p2=q_strain_rep2,
                c2=!!as.name(cov_r2), 
                min_cov=min_cov)
                 
                ) %>%
        dplyr::rename(TS559_position = position) %>%
        
        mutate( 'm5C_cov/total_cov_rep1' := 
            paste(!!as.name(paste('mC_count_',strain,'_rep1',sep='')), 
                  '/', 
                  !!as.name(paste('coverage_',strain,'_rep1',sep='')) 
                 ) 
              ) %>%
        
        mutate( 'm5C_cov/total_cov_rep2' := 
            paste(!!as.name(paste('mC_count_',strain,'_rep2',sep='')), 
                  '/', 
                  !!as.name(paste('coverage_',strain,'_rep2',sep='')) 
                 ) 
              ) %>%
        dplyr::select(TS559_position, 
                  !!as.name(meth_freq_r1), 
                  'm5C_cov/total_cov_rep1',
                  !!as.name(meth_freq_r2), 
                  'm5C_cov/total_cov_rep2',
                     hiconf_strain) %>%

    dplyr::rename(!!paste(strain,'_hiconf',sep=''):= hiconf_strain )  %>%
    dplyr::rename(!!paste('m5C_cov/total_cov_',strain,'_rep1',sep=''):= 'm5C_cov/total_cov_rep1' )  %>%
    dplyr::rename(!!paste('m5C_cov/total_cov_',strain,'_rep2',sep=''):= 'm5C_cov/total_cov_rep2' ) %>%
    dplyr::rename(!!paste('freq_',strain,'_rep1',sep=''):= !!as.name(paste('methylation_frequency_',strain,'_rep1',sep='') ) )%>%
    dplyr::rename(!!paste('freq_',strain,'_rep2',sep=''):= !!as.name(paste('methylation_frequency_',strain,'_rep2',sep='') ) )  
    
    return(data)
}

### analysis for 3 replicates (deletion of TK2241)

In [5]:
CompareCGmap_3reps <- function(strain_cgmap, TS559_cgmap, strain, annotation, minimum_freq=.10, min_cov=47){
    
    #making some column names
    meth_freq_r1 <- paste("methylation_frequency_", strain, "_rep1", sep="")
    m5C_count_r1 <- paste("mC_count_", strain, "_rep1", sep="")
    cov_r1 <- paste("coverage_", strain, "_rep1", sep="")

    meth_freq_r2 <- paste("methylation_frequency_", strain, "_rep2", sep="")
    m5C_count_r2 <- paste("mC_count_", strain, "_rep2", sep="")
    cov_r2 <- paste("coverage_", strain, "_rep2", sep="")
    
    
    meth_freq_r3 <- paste("methylation_frequency_", strain, "_rep3", sep="")
    m5C_count_r3 <- paste("mC_count_", strain, "_rep3", sep="")
    cov_r3 <- paste("coverage_", strain, "_rep3", sep="")
    
    
    pooled_methylation_frequency_strain <- paste("pooled_methylation_frequency_", strain, sep='')

    #get quantile values for m5C count
    q_TS559_rep1 <- quantile((TS559_cgmap%>%
        filter(coverage_TS559_rep1>=min_cov, mC_count_TS559_rep1>=0))$mC_count_TS559_rep1, .99)[[1]]
    
    q_TS559_rep2 <- quantile((TS559_cgmap%>%
        filter(coverage_TS559_rep2>=min_cov, mC_count_TS559_rep2>=0))$mC_count_TS559_rep2, .99)[[1]]
    
    q_TS559_rep3 <- quantile((TS559_cgmap%>%
        filter(coverage_TS559_rep3>=min_cov, mC_count_TS559_rep3>=0))$mC_count_TS559_rep3, .99)[[1]]

    q_strain_rep1 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r1)>=min_cov, !!as.name(m5C_count_r1)>=0) )[[m5C_count_r1]], .99)[[1]]
    
    q_strain_rep2 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r2)>=min_cov, !!as.name(m5C_count_r2)>=0) )[[m5C_count_r2]], .99)[[1]]
   
    q_strain_rep3 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r3)>=min_cov, !!as.name(m5C_count_r3)>=0) )[[m5C_count_r3]], .99)[[1]] 
    
    #build dataframe
    data <- strain_cgmap %>%
        merge(y = TS559_cgmap, all = TRUE, 
             by= c("chromosome","position","reference_nucleotide","growth_phase","metabolic_condition")) %>%
        dplyr::select(-chromosome,-standard_devition) %>%
    
        # If coverage is NA, it is also 0
        mutate(!!as.name(cov_r1) := replace_na(!!as.name(cov_r1),0)) %>%
        mutate(!!as.name(cov_r2) := replace_na(!!as.name(cov_r2),0)) %>%
        mutate(!!as.name(cov_r3) := replace_na(!!as.name(cov_r3),0)) %>%

        mutate(coverage_TS559_rep1 = replace_na(coverage_TS559_rep1,0)) %>%
        mutate(coverage_TS559_rep2 = replace_na(coverage_TS559_rep2,0)) %>%
        mutate(coverage_TS559_rep3 = replace_na(coverage_TS559_rep3,0)) %>%  
                                                      
        # identify high confidence and reproducible m5C sites
        rowwise() %>%
        mutate(hiconf_TS559 = detect_hiconf_3reps(
            f1=methylation_frequency_TS559_rep1,
            m1=mC_count_TS559_rep1,
            p1=q_TS559_rep1,
            c1=coverage_TS559_rep1,
            f2=methylation_frequency_TS559_rep2,
            m2=mC_count_TS559_rep2,
            p2=q_TS559_rep2,
            c2=coverage_TS559_rep2,
            f3=methylation_frequency_TS559_rep3,
            m3=mC_count_TS559_rep3,
            p3=q_TS559_rep3,
            c3=coverage_TS559_rep3,
            min_cov=min_cov,
            min_freq = minimum_freq) 
              ) %>%
        
        rowwise() %>%
        mutate(hiconf_strain = detect_hiconf_3reps(
                f1=!!as.name(meth_freq_r1),
                m1=!!as.name(m5C_count_r1),
                p1=q_strain_rep1,
                c1=!!as.name(cov_r1),
                f2=!!as.name(meth_freq_r2),
                m2=!!as.name(m5C_count_r2),
                p2=q_strain_rep2,
                c2=!!as.name(cov_r2), 
                f3=!!as.name(meth_freq_r3),
                m3=!!as.name(m5C_count_r3),
                p3=q_strain_rep3,
                c3=!!as.name(cov_r3), 
                min_cov= min_cov,
                min_freq = minimum_freq)
                 
                ) %>%
        # we are only interested in keeping sites were either the parent or deletion
        # strain have a high-confidence m5C site
        filter(hiconf_TS559 == TRUE | hiconf_strain == TRUE) %>%
        filter(!is.na(hiconf_TS559) & !is.na(hiconf_strain)) %>%
                                                    
                                                          
        # calculate the mean m5C frequency at each site by pooling replicates (round to 2 decimals)
        # calculate standard deviation of each site as well
        mutate(
                !!as.name(pooled_methylation_frequency_strain) := 
                ( !!as.name(m5C_count_r1) + !!as.name(m5C_count_r2) + !!as.name(m5C_count_r3) )/ 
                ( !!as.name(cov_r1) + !!as.name(cov_r2) + !!as.name(cov_r3) ) 
              ) %>%
    
        
        mutate(!!as.name(pooled_methylation_frequency_strain) := 
               round(!!as.name(pooled_methylation_frequency_strain),2)
              ) %>%
        mutate(strain_sd = sd(c(!!as.name(meth_freq_r1),!!as.name(meth_freq_r2),!!as.name(meth_freq_r3)))
              )%>%
    
        mutate(pooled_methylation_frequency_TS559 =
               (mC_count_TS559_rep1 + mC_count_TS559_rep2 + mC_count_TS559_rep3) /
               (coverage_TS559_rep1+coverage_TS559_rep2+coverage_TS559_rep3)    ) %>%
        mutate(pooled_methylation_frequency_TS559=round(pooled_methylation_frequency_TS559,2)) %>%
        mutate(TS559_sd = sd(c(methylation_frequency_TS559_rep1,methylation_frequency_TS559_rep2,
                              methylation_frequency_TS559_rep3))) %>%
    

        # calculate fold change of methylaton frequency at each site between parent and deletion strain
        mutate(FoldChange = (!!as.name(pooled_methylation_frequency_strain)+.01) / 
                               (pooled_methylation_frequency_TS559+.01),
               log2FC = log2( (!!as.name(pooled_methylation_frequency_strain)+.01) / 
                               (pooled_methylation_frequency_TS559+.01) ) ) %>%
        
        mutate(CI90_overlap = 
                   rm_overlap(!!as.name(pooled_methylation_frequency_strain), strain_sd,
                    pooled_methylation_frequency_TS559, TS559_sd )) %>%
    
        mutate(difference = get_difference(hiconf_TS559, hiconf_strain,   
                                            !!as.name(pooled_methylation_frequency_strain), 
                                            pooled_methylation_frequency_TS559,
                                            CI90_overlap
                                      ) ) %>%
        filter(difference == "ABS LOSS" | difference == "REL LOSS" | difference == "ABS GAIN" | 
           difference == "REL GAIN" | difference == "error") %>%
        filter(log2FC >= 1 | log2FC <= -1) %>%

    
        # add p vlaue
        rowwise() %>%
        mutate(pvalue = get_pvalue_3reps(
            s1_mC = !!as.name(m5C_count_r1),
            s1_cov = !!as.name(cov_r1), 
            s2_mC = !!as.name(m5C_count_r2),
            s2_cov = !!as.name(cov_r2),
            s3_mC = !!as.name(m5C_count_r3),
            s3_cov = !!as.name(cov_r3),
            c1_mC = mC_count_TS559_rep1,
            c1_cov = coverage_TS559_rep1,
            c2_mC = mC_count_TS559_rep2,
            c2_cov = coverage_TS559_rep2,
            c3_mC = mC_count_TS559_rep3,
            c3_cov = coverage_TS559_rep3) ) %>%
        
    
        filter(!is.na(pvalue), pvalue <= 0.0)
            
        # adjust p values
        # adj_pvalue <- p.adjust(data$pvalue, method = 'fdr')
        # data <- cbind(data, adj_pvalue)
    
        data <- data %>% merge(y=annotation,by='position', all.x=TRUE) %>%
            dplyr::rename(TS559_position = position) %>%
           dplyr::select(-reproducible, -highly_reproducible, -reference_nucleotide) %>%
           arrange(log2FC) %>%
            arrange(difference) %>%
            dplyr::select(TS559_position, KOD1_position, strand, everything())
    
    return(data)
}

enumerate_hiconf_3reps <- function(strain_cgmap, strain, min_cov=47){
    
    meth_freq_r1 <- paste("methylation_frequency_", strain, "_rep1", sep="")
    m5C_count_r1 <- paste("mC_count_", strain, "_rep1", sep="")
    cov_r1 <- paste("coverage_", strain, "_rep1", sep="")

    meth_freq_r2 <- paste("methylation_frequency_", strain, "_rep2", sep="")
    m5C_count_r2 <- paste("mC_count_", strain, "_rep2", sep="")
    cov_r2 <- paste("coverage_", strain, "_rep2", sep="")
    
    q_strain_rep1 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r1)>=min_cov, !!as.name(m5C_count_r1)>=0) )[[m5C_count_r1]], .99)[[1]]
    
    q_strain_rep2 <- quantile(
        (strain_cgmap %>%
        filter(!!as.name(cov_r2)>=min_cov, !!as.name(m5C_count_r2)>=0) )[[m5C_count_r2]], .99)[[1]]
    
    
    data <- strain_cgmap %>%
        dplyr::select(-chromosome, -reference_nucleotide,-growth_phase) %>%
    
        mutate(!!as.name(cov_r1) := replace_na(!!as.name(cov_r1),0)) %>%
        mutate(!!as.name(cov_r2) := replace_na(!!as.name(cov_r2),0)) %>%   
        
        rowwise() %>%
        mutate(hiconf_strain = detect_hiconf_2reps(
                f1=!!as.name(meth_freq_r1),
                m1=!!as.name(m5C_count_r1),
                p1=q_strain_rep1,
                c1=!!as.name(cov_r1),
                f2=!!as.name(meth_freq_r2),
                m2=!!as.name(m5C_count_r2),
                p2=q_strain_rep2,
                c2=!!as.name(cov_r2), 
                min_cov=min_cov)
                 
                ) %>%
        dplyr::rename(TS559_position = position) %>%
        
        mutate( 'm5C_cov/total_cov_rep1' := 
            paste(!!as.name(paste('mC_count_',strain,'_rep1',sep='')), 
                  '/', 
                  !!as.name(paste('coverage_',strain,'_rep1',sep='')) 
                 ) 
              ) %>%
        
        mutate( 'm5C_cov/total_cov_rep2' := 
            paste(!!as.name(paste('mC_count_',strain,'_rep2',sep='')), 
                  '/', 
                  !!as.name(paste('coverage_',strain,'_rep2',sep='')) 
                 ) 
              ) %>%
        dplyr::select(TS559_position, 
                  !!as.name(meth_freq_r1), 
                  'm5C_cov/total_cov_rep1',
                  !!as.name(meth_freq_r2), 
                  'm5C_cov/total_cov_rep2',
                     hiconf_strain) %>%

    dplyr::rename(!!paste(strain,'_hiconf',sep=''):= hiconf_strain )  %>%
    dplyr::rename(!!paste('m5C_cov/total_cov_',strain,'_rep1',sep=''):= 'm5C_cov/total_cov_rep1' )  %>%
    dplyr::rename(!!paste('m5C_cov/total_cov_',strain,'_rep2',sep=''):= 'm5C_cov/total_cov_rep2' ) %>%
    dplyr::rename(!!paste('freq_',strain,'_rep1',sep=''):= !!as.name(paste('methylation_frequency_',strain,'_rep1',sep='') ) )%>%
    dplyr::rename(!!paste('freq_',strain,'_rep2',sep=''):= !!as.name(paste('methylation_frequency_',strain,'_rep2',sep='') ) )  
    
    return(data)
}

### CGmap comparisons


In [8]:
###strain TK0224

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0224exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0224_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0224", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0224exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0224_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0224", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0224exoS_rep1_df <- processCGmap(TK0224exoS_CGmap_rep1)
TK0224exoS_rep2_df <- processCGmap(TK0224exoS_CGmap_rep2)

#merge reps
TK0224exoS <- merge_del_reps(x=TK0224exoS_rep1_df, y=TK0224exoS_rep2_df, strain='TK0224')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0224exoS, strain="TK0224", min_cov = 47)

#compare methylation frequencies & annotate
TK0224exoS_anal <- CompareCGmap(strain_cgmap = TK0224exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0224", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0224exoS_anal, file = "../processed_cgmaps/TK0224exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0224exoS_anal %>% filter(log2FC < 0) )
nrow( TK0224exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK0224exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0224exoS_anal %>% filter(difference == "ABS GAIN") )



head(TK0224exoS_anal)

# TK0224exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0224exoS, strain="TK0224",min_cov = 47)

Sys.sleep(10)

ERROR: [1m[33mError[39m in `mutate()`:[22m
[1m[22m[36mℹ[39m In argument: `CI90_overlap = rm_overlap(...)`.
[36mℹ[39m In row 1.
[1mCaused by error in `r1 %overlaps% r2`:[22m
[33m![39m could not find function "%overlaps%"


In [7]:
###strain TK0234

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0234exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0234_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0234", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0234exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0234_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0234", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0234exoS_rep1_df <- processCGmap(TK0234exoS_CGmap_rep1)
TK0234exoS_rep2_df <- processCGmap(TK0234exoS_CGmap_rep2)

#merge reps
TK0234exoS <- merge_del_reps(x=TK0234exoS_rep1_df, y=TK0234exoS_rep2_df, strain='TK0234')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0234exoS, strain="TK0234", min_cov = 47)

#compare methylation frequencies & annotate
TK0234exoS_anal <- CompareCGmap(strain_cgmap = TK0234exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0234", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0234exoS_anal, file = "../processed_cgmaps/TK0234exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0234exoS_anal %>% filter(log2FC < 0) )
nrow( TK0234exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK0234exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0234exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK0234exoS_anal)

# TK0234exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0234exoS, strain="TK0234",min_cov = 47)

Sys.sleep(10)

Unnamed: 0_level_0,TS559_position,KOD1_position,strand,growth_phase,metabolic_condition,methylation_frequency_TK0234_rep1,mC_count_TK0234_rep1,coverage_TK0234_rep1,methylation_frequency_TK0234_rep2,mC_count_TK0234_rep2,⋯,amino_acid_sequence,amino_acid_ID,local_41bp_predicted_fold,m5C_position_fold,MFE,associated_TSS_id,TSS_direction,TSS_description,total_annotations,alternate_annotations
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
1,1407584,1408575,+,exoponential,S,0.04,20,535,0.05,67,⋯,CAG,Q,||||||........|||||....||.....|||||...|||,single_stranded,-85.61,.,.,.,1,
2,48659,48659,-,exoponential,S,0.04,4,107,0.07,65,⋯,GCC,A,......................................|||,single_stranded,-68.21,.,.,.,1,
3,226308,227299,-,exoponential,S,0.06,5,85,0.08,58,⋯,ACC,T,|||||||.....|||..||....||||||............,single_stranded,-44.59,.,.,.,1,
4,1865353,1866344,+,exoponential,S,0.06,13,227,0.07,233,⋯,GCC,A,||||||..........||||||||..||||...........,base_paired,-31.14,.,.,.,1,
5,624690,625681,+,exoponential,S,0.2,17,84,0.19,57,⋯,CCG,P,....|||||...|||||.||||..|||....|||..||||.,base_paired,-62.61,.,.,.,1,
6,2035075,2036066,+,exoponential,S,0.11,13,115,0.14,88,⋯,CUU,L,|||||||..||||||.......||||..|||||.|||||||,single_stranded,-36.23,.,.,.,1,


In [6]:
###strain TK0360

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0360exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0360_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0360", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0360exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0360_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0360", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0360exoS_rep1_df <- processCGmap(TK0360exoS_CGmap_rep1)
TK0360exoS_rep2_df <- processCGmap(TK0360exoS_CGmap_rep2)

#merge reps
TK0360exoS <- merge_del_reps(x=TK0360exoS_rep1_df, y=TK0360exoS_rep2_df, strain='TK0360')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0360exoS, strain="TK0360", min_cov = 47)

#compare methylation frequencies & annotate
TK0360exoS_anal <- CompareCGmap(strain_cgmap = TK0360exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0360", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0360exoS_anal, file = "../processed_cgmaps/TK0360exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0360exoS_anal %>% filter(log2FC < 0) )
nrow( TK0360exoS_anal %>% filter(log2FC > 0) )
"absolute losses & gains:"
nrow( TK0360exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0360exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK0360exoS_anal)

# TK0360exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0360exoS, strain="TK0360",min_cov = 47)

Sys.sleep(10)

Unnamed: 0_level_0,TS559_position,KOD1_position,strand,growth_phase,metabolic_condition,methylation_frequency_TK0360_rep1,mC_count_TK0360_rep1,coverage_TK0360_rep1,methylation_frequency_TK0360_rep2,mC_count_TK0360_rep2,⋯,amino_acid_sequence,amino_acid_ID,local_41bp_predicted_fold,m5C_position_fold,MFE,associated_TSS_id,TSS_direction,TSS_description,total_annotations,alternate_annotations
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
1,1408656,1409647,+,exoponential,S,0.06,139,2445,0.05,134,⋯,CUG,L,........||||.||||||||||..||....||||.....|,base_paired,-39.93,.,.,.,1,
2,119783,119783,-,exoponential,S,0.12,71,613,0.07,51,⋯,CUU,L,||..||....|||||||||..||..|||......||||.||,single_stranded,-49.49,.,.,.,1,
3,1238962,1239953,-,exoponential,S,0.15,20,134,0.14,17,⋯,.,.,.,,.,.,.,.,0,
4,1023490,1024481,-,exoponential,S,0.0,0,172,0.0,0,⋯,CAG,Q,||................||||.|||............|||,base_paired,-53.21,.,.,.,1,
5,1570046,1571037,+,exoponential,S,0.0,0,51,0.0,0,⋯,.,.,.,,.,.,.,.,1,
6,1944808,1945799,+,exoponential,S,0.01,4,552,0.0,3,⋯,.,.,.,,.,.,.,.,1,


In [8]:
###strain TK0704

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0704exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0704_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0704", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0704exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0704_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0704", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0704exoS_rep1_df <- processCGmap(TK0704exoS_CGmap_rep1)
TK0704exoS_rep2_df <- processCGmap(TK0704exoS_CGmap_rep2)

#merge reps
TK0704exoS <- merge_del_reps(x=TK0704exoS_rep1_df, y=TK0704exoS_rep2_df, strain='TK0704')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0704exoS, strain="TK0704", min_cov = 47)

#compare methylation frequencies & annotate
TK0704exoS_anal <- CompareCGmap(strain_cgmap = TK0704exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0704", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0704exoS_anal, file = "../processed_cgmaps/TK0704exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0704exoS_anal %>% filter(log2FC < 0) )
nrow( TK0704exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK0704exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0704exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK0704exoS_anal)

# TK0704exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0704exoS, strain="TK0704",min_cov = 47)

Sys.sleep(10)


ERROR: [1m[33mError[39m in `mutate()`:[22m
[1m[22m[36mℹ[39m In argument: `pvalue = get_pvalue(...)`.
[1mCaused by error in `if (s1_cov == 0 & s2_cov == 0) ...`:[22m
[33m![39m argument is of length zero


In [None]:
###strain TK0729

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0729exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0729_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0729", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0729exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0729_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0729", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0729exoS_rep1_df <- processCGmap(TK0729exoS_CGmap_rep1)
TK0729exoS_rep2_df <- processCGmap(TK0729exoS_CGmap_rep2)

#merge reps
TK0729exoS <- merge_del_reps(x=TK0729exoS_rep1_df, y=TK0729exoS_rep2_df, strain='TK0729')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0729exoS, strain="TK0729", min_cov = 47)

#compare methylation frequencies & annotate
TK0729exoS_anal <- CompareCGmap(strain_cgmap = TK0729exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0729", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0729exoS_anal, file = "../processed_cgmaps/TK0729exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0729exoS_anal %>% filter(log2FC < 0) )
nrow( TK0729exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK0729exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0729exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK0729exoS_anal)

# TK0729exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0729exoS, strain="TK0729",min_cov = 47)

Sys.sleep(10)

In [7]:
###strain TK0872

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0872exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0872_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0872", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0872exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0872_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0872", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0872exoS_rep1_df <- processCGmap(TK0872exoS_CGmap_rep1)
TK0872exoS_rep2_df <- processCGmap(TK0872exoS_CGmap_rep2)

#merge reps
TK0872exoS <- merge_del_reps(x=TK0872exoS_rep1_df, y=TK0872exoS_rep2_df, strain='TK0872')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0872exoS, strain="TK0872", min_cov = 47)

#compare methylation frequencies & annotate
TK0872exoS_anal <- CompareCGmap(strain_cgmap = TK0872exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0872", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0872exoS_anal, file = "../processed_cgmaps/TK0872exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0872exoS_anal %>% filter(log2FC < 0) )
nrow( TK0872exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK0872exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0872exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK0872exoS_anal)

# TK0872exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0872exoS, strain="TK0872",min_cov = 47)

Sys.sleep(10)

Unnamed: 0_level_0,TS559_position,KOD1_position,strand,growth_phase,metabolic_condition,methylation_frequency_TK0872_rep1,mC_count_TK0872_rep1,coverage_TK0872_rep1,methylation_frequency_TK0872_rep2,mC_count_TK0872_rep2,⋯,amino_acid_sequence,amino_acid_ID,local_41bp_predicted_fold,m5C_position_fold,MFE,associated_TSS_id,TSS_direction,TSS_description,total_annotations,alternate_annotations
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
1,797194,798185,-,exoponential,S,0.02,5,219,0.06,56,⋯,CUU,L,...||||||....................|||....|||..,single_stranded,-32.93,.,.,.,1,
2,2026195,2027186,+,exoponential,S,0.0,0,462,0.04,11,⋯,.,.,|..||||||||........|||||||||||||.||||||||,base_paired,-440.69,.,.,.,1,
3,908004,908995,-,exoponential,S,0.0,0,417,0.01,3,⋯,CUC,L,||||...............|||||.......||||||.|||,base_paired,-63.12,.,.,.,1,
4,109789,109789,-,exoponential,S,0.0,2,1007,0.0,5,⋯,CUC,L,.....|||||||......||||||||||||||..||||||.,base_paired,-134.88,.,.,.,1,
5,1570046,1571037,+,exoponential,S,0.0,0,106,0.0,0,⋯,.,.,.,,.,.,.,.,1,
6,1069082,1070073,-,exoponential,S,0.0,0,374,0.01,1,⋯,CAC,H,...|||||...........||||||.....||||||.||||,base_paired,-112.98,.,.,.,1,


In [None]:
###strain TK1273

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK1273exoS_CGmap_rep1 <-read.delim("../cgmaps/TK1273_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1273", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK1273exoS_CGmap_rep2 <-read.delim("../cgmaps/TK1273_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1273", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK1273exoS_rep1_df <- processCGmap(TK1273exoS_CGmap_rep1)
TK1273exoS_rep2_df <- processCGmap(TK1273exoS_CGmap_rep2)

#merge reps
TK1273exoS <- merge_del_reps(x=TK1273exoS_rep1_df, y=TK1273exoS_rep2_df, strain='TK1273')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK1273exoS, strain="TK1273", min_cov = 47)

#compare methylation frequencies & annotate
TK1273exoS_anal <- CompareCGmap(strain_cgmap = TK1273exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK1273", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK1273exoS_anal, file = "../processed_cgmaps/TK1273exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK1273exoS_anal %>% filter(log2FC < 0) )
nrow( TK1273exoS_anal %>% filter(log2FC > 0) )
"absolute losses & gains:"
nrow( TK1273exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK1273exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK1273exoS_anal)

# TK1273exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK1273exoS, strain="TK1273",min_cov = 47)

Sys.sleep(10)


In [None]:
###strain TK1784

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK1784exoS_CGmap_rep1 <-read.delim("../cgmaps/TK1784_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1784", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK1784exoS_CGmap_rep2 <-read.delim("../cgmaps/TK1784_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1784", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK1784exoS_rep1_df <- processCGmap(TK1784exoS_CGmap_rep1)
TK1784exoS_rep2_df <- processCGmap(TK1784exoS_CGmap_rep2)

#merge reps
TK1784exoS <- merge_del_reps(x=TK1784exoS_rep1_df, y=TK1784exoS_rep2_df, strain='TK1784')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK1784exoS, strain="TK1784", min_cov = 47)

#compare methylation frequencies & annotate
TK1784exoS_anal <- CompareCGmap(strain_cgmap = TK1784exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK1784", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK1784exoS_anal, file = "../processed_cgmaps/TK1784exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK1784exoS_anal %>% filter(log2FC < 0) )
nrow( TK1784exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK1784exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK1784exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK1784exoS_anal)

# TK1784exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK1784exoS, strain="TK1784",min_cov = 47)

Sys.sleep(10)

In [None]:
###strain TK1917

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK1917exoS_CGmap_rep1 <-read.delim("../cgmaps/TK1917_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1917", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK1917exoS_CGmap_rep2 <-read.delim("../cgmaps/TK1917_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1917", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK1917exoS_rep1_df <- processCGmap(TK1917exoS_CGmap_rep1)
TK1917exoS_rep2_df <- processCGmap(TK1917exoS_CGmap_rep2)

#merge reps
TK1917exoS <- merge_del_reps(x=TK1917exoS_rep1_df, y=TK1917exoS_rep2_df, strain='TK1917')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK1917exoS, strain="TK1917", min_cov = 47)

#compare methylation frequencies & annotate
TK1917exoS_anal <- CompareCGmap(strain_cgmap = TK1917exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK1917", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK1917exoS_anal, file = "../processed_cgmaps/TK1917exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK1917exoS_anal %>% filter(log2FC < 0) )
nrow( TK1917exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK1917exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK1917exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK1917exoS_anal)

# TK1917exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK1917exoS, strain="TK1917",min_cov = 47)

Sys.sleep(10)

In [8]:
###strain TK1935

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK1935exoS_CGmap_rep1 <-read.delim("../cgmaps/TK1935_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1935", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK1935exoS_CGmap_rep2 <-read.delim("../cgmaps/TK1935_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK1935", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK1935exoS_rep1_df <- processCGmap(TK1935exoS_CGmap_rep1)
TK1935exoS_rep2_df <- processCGmap(TK1935exoS_CGmap_rep2)

#merge reps
TK1935exoS <- merge_del_reps(x=TK1935exoS_rep1_df, y=TK1935exoS_rep2_df, strain='TK1935')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK1935exoS, strain="TK1935", min_cov = 47)

#compare methylation frequencies & annotate
TK1935exoS_anal <- CompareCGmap(strain_cgmap = TK1935exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK1935", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK1935exoS_anal, file = "../processed_cgmaps/TK1935exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK1935exoS_anal %>% filter(log2FC < 0) )
nrow( TK1935exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK1935exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK1935exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK1935exoS_anal)

# TK1935exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK1935exoS, strain="TK1935",min_cov = 47)

Sys.sleep(10)

Unnamed: 0_level_0,TS559_position,KOD1_position,strand,growth_phase,metabolic_condition,methylation_frequency_TK1935_rep1,mC_count_TK1935_rep1,coverage_TK1935_rep1,methylation_frequency_TK1935_rep2,mC_count_TK1935_rep2,⋯,amino_acid_sequence,amino_acid_ID,local_41bp_predicted_fold,m5C_position_fold,MFE,associated_TSS_id,TSS_direction,TSS_description,total_annotations,alternate_annotations
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
1,2022329,2023320,+,exoponential,S,0,2,5145,0,1,⋯,.,.,||.........|||||...||||||....|||.....|||.,base_paired,-224.51,.,.,.,1,
2,2025660,2026651,+,exoponential,S,0,1,3022,0,0,⋯,.,.,........||||||.....||||||................,base_paired,-440.69,.,.,.,1,
3,1010753,1011744,-,exoponential,S,0,0,46,0,0,⋯,CUG,L,.....|....|.........|..||||......|...|||.,base_paired,-62.39,.,.,.,1,
4,2026031,2027022,+,exoponential,S,0,1,7425,0,3,⋯,.,.,...||||...|||||....||||||||...|||||||....,base_paired,-440.69,.,.,.,1,
5,162074,162541,-,exoponential,S,0,0,462,0,1,⋯,GCU,A,|||....||||||..............|||..|....||||,single_stranded,-107.55,.,.,.,1,
6,1323164,1324155,-,exoponential,S,0,0,512,0,0,⋯,CUG,L,|||..|||||..|.....|||..||||||....|..||...,base_paired,-62.76,.,.,.,1,


In [9]:
###strain TK2122

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK2122exoS_CGmap_rep1 <-read.delim("../cgmaps/TK2122_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2122", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK2122exoS_CGmap_rep2 <-read.delim("../cgmaps/TK2122_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2122", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK2122exoS_rep1_df <- processCGmap(TK2122exoS_CGmap_rep1)
TK2122exoS_rep2_df <- processCGmap(TK2122exoS_CGmap_rep2)

#merge reps
TK2122exoS <- merge_del_reps(x=TK2122exoS_rep1_df, y=TK2122exoS_rep2_df, strain='TK2122')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK2122exoS, strain="TK2122", min_cov = 47)

#compare methylation frequencies & annotate
TK2122exoS_anal <- CompareCGmap(strain_cgmap = TK2122exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK2122", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK2122exoS_anal, file = "../processed_cgmaps/TK2122exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK2122exoS_anal %>% filter(log2FC < 0) )
nrow( TK2122exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK2122exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK2122exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK2122exoS_anal)

# TK2122exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK2122exoS, strain="TK2122",min_cov = 47)

Sys.sleep(10)

Unnamed: 0_level_0,TS559_position,KOD1_position,strand,growth_phase,metabolic_condition,methylation_frequency_TK2122_rep1,mC_count_TK2122_rep1,coverage_TK2122_rep1,methylation_frequency_TK2122_rep2,mC_count_TK2122_rep2,⋯,amino_acid_sequence,amino_acid_ID,local_41bp_predicted_fold,m5C_position_fold,MFE,associated_TSS_id,TSS_direction,TSS_description,total_annotations,alternate_annotations
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
1,2023350,2024341,+,exoponential,S,0.0,0,694,0,0,⋯,.,.,|||||||....|||||||||........||||...||||||,single_stranded,-224.51,.,.,.,1,
2,1101098,1102089,-,exoponential,S,0.0,0,422,0,0,⋯,CCC,P,......||||||||.....||||...........||||...,base_paired,-14.43,.,.,.,1,
3,898452,899443,-,exoponential,S,0.0,0,145,0,0,⋯,GCG,A,.|..|..|||||.....||...|||||||||||||.|||||,single_stranded,-47.75,.,.,.,1,
4,150040,150507,-,exoponential,S,0.01,2,341,0,1,⋯,GCU,A,.|||||||.|||||..|||..|||||......|||||....,single_stranded,-82.72,.,.,.,1,
5,1027413,1028404,+,exoponential,S,0.0,0,1155,0,0,⋯,CCG,P,|||..|..|||||..................|||||..|||,single_stranded,-117.74,.,.,.,1,
6,1985559,1986550,+,exoponential,S,0.01,1,200,0,0,⋯,GCG,A,.||||.........||||............||||..|||||,single_stranded,-92.31,.,.,.,1,


In [None]:
###strain TK2241

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK2241exoS_CGmap_rep1 <-read.delim("../cgmaps/TK2241_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2241", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK2241exoS_CGmap_rep2 <-read.delim("../cgmaps/TK2241_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2241", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

TK2241exoS_CGmap_rep3 <-read.delim("../cgmaps/TK2241_exoS_totalRNA_rep3.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2241", growth_phase='exoponential', metabolic_condition="S", replicate="rep3")



# process raw CGmaps
TK2241exoS_rep1_df <- processCGmap(TK2241exoS_CGmap_rep1)
TK2241exoS_rep2_df <- processCGmap(TK2241exoS_CGmap_rep2)
TK2241exoS_rep3_df <- processCGmap(TK2241exoS_CGmap_rep3)


#merge reps
TK2241exoS <- merge_del_3reps(x=TK2241exoS_rep1_df, y=TK2241exoS_rep2_df,z=TK2241exoS_rep3_df, strain='TK2241')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK2241exoS, strain="TK2241", reps=3, min_cov = 47)

#compare methylation frequencies & annotate
TK2241exoS_anal <- CompareCGmap_3reps(strain_cgmap = TK2241exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK2241", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK2241exoS_anal, file = "../processed_cgmaps/TK2241exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK2241exoS_anal %>% filter(log2FC < 0) )
nrow( TK2241exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK2241exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK2241exoS_anal %>% filter(difference == "ABS GAIN") )

# TK2241exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK2241exoS, strain="TK2241",min_cov = 47)

Sys.sleep(10)

In [10]:
###strain TK2304

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK2304exoS_CGmap_rep1 <-read.delim("../cgmaps/TK2304_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2304", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK2304exoS_CGmap_rep2 <-read.delim("../cgmaps/TK2304_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2304", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK2304exoS_rep1_df <- processCGmap(TK2304exoS_CGmap_rep1)
TK2304exoS_rep2_df <- processCGmap(TK2304exoS_CGmap_rep2)

#merge reps
TK2304exoS <- merge_del_reps(x=TK2304exoS_rep1_df, y=TK2304exoS_rep2_df, strain='TK2304')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK2304exoS, strain="TK2304", min_cov = 47)

#compare methylation frequencies & annotate
TK2304exoS_anal <- CompareCGmap(strain_cgmap = TK2304exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK2304", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK2304exoS_anal, file = "../processed_cgmaps/TK2304exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK2304exoS_anal %>% filter(log2FC < 0) )
nrow( TK2304exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK2304exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK2304exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK2304exoS_anal)

# TK2304exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK2304exoS, strain="TK2304",min_cov = 47)

Sys.sleep(10)


Unnamed: 0_level_0,TS559_position,KOD1_position,strand,growth_phase,metabolic_condition,methylation_frequency_TK2304_rep1,mC_count_TK2304_rep1,coverage_TK2304_rep1,methylation_frequency_TK2304_rep2,mC_count_TK2304_rep2,⋯,amino_acid_sequence,amino_acid_ID,local_41bp_predicted_fold,m5C_position_fold,MFE,associated_TSS_id,TSS_direction,TSS_description,total_annotations,alternate_annotations
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
1,2025555,2026546,+,exoponential,S,0.0,2,3599,0,2,⋯,.,.,...||||||||....||||||...|||||||.......|||,base_paired,-440.69,.,.,.,1,
2,2025624,2026615,+,exoponential,S,0.0,2,1444,0,0,⋯,.,.,..||||||.................................,single_stranded,-440.69,.,.,.,1,
3,2025625,2026616,+,exoponential,S,0.0,1,1447,0,1,⋯,.,.,.||||||..................................,single_stranded,-440.69,.,.,.,1,
4,2025645,2026636,+,exoponential,S,0.0,2,2012,0,0,⋯,.,.,.......................||||||.....||||||.,single_stranded,-440.69,.,.,.,1,
5,2023357,2024348,+,exoponential,S,0.01,5,391,0,5,⋯,.,.,....|||||||||........||||...||||||-------,single_stranded,-224.51,.,.,.,1,
6,1728374,1729365,+,exoponential,S,0.0,0,167,0,1,⋯,CUU,L,...........||||||..|||.....|||||.........,base_paired,-20.17,.,.,.,1,


In [None]:
###strain TK0234_0224

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0234_0224exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0234_0224_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0234_0224", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0234_0224exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0234_0224_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0234_0224", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0234_0224exoS_rep1_df <- processCGmap(TK0234_0224exoS_CGmap_rep1)
TK0234_0224exoS_rep2_df <- processCGmap(TK0234_0224exoS_CGmap_rep2)

#merge reps
TK0234_0224exoS <- merge_del_reps(x=TK0234_0224exoS_rep1_df, y=TK0234_0224exoS_rep2_df, strain='TK0234_0224')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0234_0224exoS, strain="TK0234_0224", min_cov = 47)

#compare methylation frequencies & annotate
TK0234_0224exoS_anal <- CompareCGmap(strain_cgmap = TK0234_0224exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0234_0224", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0234_0224exoS_anal, file = "../processed_cgmaps/TK0234_0224exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0234_0224exoS_anal %>% filter(log2FC < 0) )
nrow( TK0234_0224exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK0234_0224exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0234_0224exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK0234_0224exoS_anal)

# TK0234_0224exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0234_0224exoS, strain="TK0234_0224",min_cov = 47)

Sys.sleep(10)

In [None]:
###strain TK0234_0729

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK0234_0729exoS_CGmap_rep1 <-read.delim("../cgmaps/TK0234_0729_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0234_0729", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK0234_0729exoS_CGmap_rep2 <-read.delim("../cgmaps/TK0234_0729_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK0234_0729", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK0234_0729exoS_rep1_df <- processCGmap(TK0234_0729exoS_CGmap_rep1)
TK0234_0729exoS_rep2_df <- processCGmap(TK0234_0729exoS_CGmap_rep2)

#merge reps
TK0234_0729exoS <- merge_del_reps(x=TK0234_0729exoS_rep1_df, y=TK0234_0729exoS_rep2_df, strain='TK0234_0729')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK0234_0729exoS, strain="TK0234_0729", min_cov = 47)

#compare methy8lation frequencies & annotate
TK0234_0729exoS_anal <- CompareCGmap(strain_cgmap = TK0234_0729exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK0234_0729", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK0234_0729exoS_anal, file = "../processed_cgmaps/TK0234_0729exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK0234_0729exoS_anal %>% filter(log2FC < 0) )
nrow( TK0234_0729exoS_anal %>% filter(log2FC > 0) )

"absolute losses & gains:"
nrow( TK0234_0729exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK0234_0729exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK0234_0729exoS_anal)

# TK0234_0729exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK0234_0729exoS, strain="TK0234_0729",min_cov = 47)

Sys.sleep(20)

In [11]:
###strain TK2304_1935

defaultW <- getOption("warn")
options(warn = -1)


#load CGmaps into session
TK2304_1935exoS_CGmap_rep1 <-read.delim("../cgmaps/TK2304_1935_exoS_totalRNA_rep1.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2304_1935", growth_phase='exoponential', metabolic_condition="S", replicate="rep1")

TK2304_1935exoS_CGmap_rep2 <-read.delim("../cgmaps/TK2304_1935_exoS_totalRNA_rep2.CGmap", sep='\t', header=FALSE,col.names=cols) %>%
    mutate(strain="TK2304_1935", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")

# process raw CGmaps
TK2304_1935exoS_rep1_df <- processCGmap(TK2304_1935exoS_CGmap_rep1)
TK2304_1935exoS_rep2_df <- processCGmap(TK2304_1935exoS_CGmap_rep2)

#merge reps
TK2304_1935exoS <- merge_del_reps(x=TK2304_1935exoS_rep1_df, y=TK2304_1935exoS_rep2_df, strain='TK2304_1935')

# get quantiles
"quantiles:"
get_quantiles(strain_cgmap = TK2304_1935exoS, strain="TK2304_1935", min_cov = 47)

#compare methylation frequencies & annotate
TK2304_1935exoS_anal <- CompareCGmap(strain_cgmap = TK2304_1935exoS, 
                                TS559_cgmap = TS559exoS, 
                                strain="TK2304_1935", 
                                annotation = annotation,
                                min_cov = 47)

write.table(TK2304_1935exoS_anal, file = "../processed_cgmaps/TK2304_1935exoS_annotated", sep = "\t", row.names = F)

#enumerate gains & losses
"losses & gains:"
nrow( TK2304_1935exoS_anal %>% filter(difference == 'LOSS') )
nrow( TK2304_1935exoS_anal %>% filter(difference == 'GAIN') )

"absolute losses & gains:"
nrow( TK2304_1935exoS_anal %>% filter(difference == "ABS LOSS") )
nrow( TK2304_1935exoS_anal %>% filter(difference == "ABS GAIN") )

head(TK2304_1935exoS_anal)

# TK2304_1935exoS_cat <- enumerate_hiconf_2reps(strain_cgmap = TK2304_1935exoS, strain="TK2304_1935",min_cov = 47)

Sys.sleep(20)


Unnamed: 0_level_0,TS559_position,KOD1_position,strand,growth_phase,metabolic_condition,methylation_frequency_TK2304_1935_rep1,mC_count_TK2304_1935_rep1,coverage_TK2304_1935_rep1,methylation_frequency_TK2304_1935_rep2,mC_count_TK2304_1935_rep2,⋯,amino_acid_sequence,amino_acid_ID,local_41bp_predicted_fold,m5C_position_fold,MFE,associated_TSS_id,TSS_direction,TSS_description,total_annotations,alternate_annotations
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
1,779427,780418,+,exoponential,S,0.05,68,1364,0.05,88,⋯,.,.,.,,.,.,.,.,1,
2,578337,579328,+,exoponential,S,0.05,88,1868,0.05,110,⋯,.,.,.,,.,TTS0579315_+,+,primary,1,
3,2026306,2027297,+,exoponential,S,0.07,65,995,0.05,70,⋯,.,.,....|.......||||....|||...|||||||||...|||,base_paired,-440.69,.,.,.,1,
4,931389,932380,-,exoponential,S,0.12,62,524,0.11,66,⋯,GCC,A,----.............................|||||||.,single_stranded,-2.42,TTS0932396_-,-,primary,1,
5,2025555,2026546,+,exoponential,S,0.0,3,3943,0.0,1,⋯,.,.,...||||||||....||||||...|||||||.......|||,base_paired,-440.69,.,.,.,1,
6,2022329,2023320,+,exoponential,S,0.0,7,7058,0.0,4,⋯,.,.,||.........|||||...||||||....|||.....|||.,base_paired,-224.51,.,.,.,1,


# CAT TABLE

In [26]:
# TS559 --  control/parent strain

TS559exoS_CGmap_rep1 <-load_cgmap("../cgmaps/TS559_exoS_totalRNA_rep1.CGmap",
    strain="TS559", growth_phase='exoponential', metabolic_condition="S", replicate="rep1") %>%
    dplyr::rename(reference_nucleotide=base)

TS559exoS_CGmap_rep2 <-load_cgmap("../cgmaps/TS559_exoS_totalRNA_rep2.CGmap", 
    strain="TS559", growth_phase='exoponential', metabolic_condition="S", replicate="rep2")%>%
    dplyr::rename(reference_nucleotide=base)

TS559exoS_CGmap_rep3 <-load_cgmap("../cgmaps/TS559_exoS_totalRNA_rep3.CGmap",
    strain="TS559", growth_phase='exoponential', metabolic_condition="S", replicate="rep3")%>%
    dplyr::rename(reference_nucleotide=base)
   
    
TS559exoS_rep1_df <- processCGmap(TS559exoS_CGmap_rep1)
TS559exoS_rep2_df <- processCGmap(TS559exoS_CGmap_rep2)
TS559exoS_rep3_df <- processCGmap(TS559exoS_CGmap_rep3)


m5C_count_e1 = 6
m5C_count_e2 = 18
m5C_count_e3 = 7
m5C_count_s1 = 5
m5C_count_s2 = 17
m5C_count_s3 = 5

TS559exoS_all <- merge(x=TS559exoS_rep1_df, y= TS559exoS_rep2_df, 
                   by=c("chromosome","position", 'reference_nucleotide', 'growth_phase','metabolic_condition'), 
                   all=TRUE) %>%
            merge(y=TS559exoS_rep3_df, 
                   by=c("chromosome","position", 'reference_nucleotide', 'growth_phase','metabolic_condition'), 
                   all=TRUE) %>%
            mutate(coverage_TS559_rep1 = replace_na(coverage_TS559_rep1,0)) %>%
            mutate(coverage_TS559_rep2 = replace_na(coverage_TS559_rep2,0)) %>%
            mutate(coverage_TS559_rep3 = replace_na(coverage_TS559_rep3,0)) %>%
            rowwise()%>%
            mutate(reproducible = detect_hiconf_3reps(
                f1=methylation_frequency_TS559_rep1,
                m1=mC_count_TS559_rep1,
                p1=m5C_count_e1,
                c1=coverage_TS559_rep1,
                f2=methylation_frequency_TS559_rep2,
                m2=mC_count_TS559_rep2,
                p2=m5C_count_e2,
                c2=coverage_TS559_rep2,
                f3=methylation_frequency_TS559_rep3,
                m3=mC_count_TS559_rep3,
                p3=m5C_count_e3,
                c3=coverage_TS559_rep3, 
                min_cov=47))

head(TS559exoS_all)

TS559exoS_cat <-TS559exoS_all %>%
    dplyr::select(-chromosome, -metabolic_condition) %>%
    dplyr::rename(TS559_position = position, TS559_hiconf = reproducible) %>%
    dplyr::rename(freq_TS559_rep1 = methylation_frequency_TS559_rep1, freq_TS559_rep2 = methylation_frequency_TS559_rep2, 
                  freq_TS559_rep3=methylation_frequency_TS559_rep3) %>%
    mutate('m5C_cov/total_cov_TS559_rep1' = paste(mC_count_TS559_rep1, '/', coverage_TS559_rep1) ) %>%
    mutate('m5C_cov/total_cov_TS559_rep2' = paste(mC_count_TS559_rep2, '/', coverage_TS559_rep2) ) %>%
    mutate('m5C_cov/total_cov_TS559_rep3' = paste(mC_count_TS559_rep3, '/', coverage_TS559_rep3) ) %>%
    dplyr::select(-mC_count_TS559_rep1,-coverage_TS559_rep1,-mC_count_TS559_rep2, 
                  -coverage_TS559_rep2,-mC_count_TS559_rep3, -coverage_TS559_rep3) %>%
    dplyr::select("TS559_position","freq_TS559_rep1", 
          "m5C_cov/total_cov_TS559_rep1","freq_TS559_rep2","m5C_cov/total_cov_TS559_rep2",
           "freq_TS559_rep3","m5C_cov/total_cov_TS559_rep3", 'TS559_hiconf'
          )        

#Sys.sleep(10)


chromosome,position,reference_nucleotide,growth_phase,metabolic_condition,methylation_frequency_TS559_rep1,mC_count_TS559_rep1,coverage_TS559_rep1,methylation_frequency_TS559_rep2,mC_count_TS559_rep2,coverage_TS559_rep2,methylation_frequency_TS559_rep3,mC_count_TS559_rep3,coverage_TS559_rep3,reproducible
<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<int>,<int>,<dbl>,<int>,<int>,<lgl>
TS559_Genomic_Sequence.seq,3,G,exoponential,S,,,0,,,0,0,0,1,
TS559_Genomic_Sequence.seq,6,C,exoponential,S,0.0,0.0,8,0.0,0.0,58,0,0,102,False
TS559_Genomic_Sequence.seq,7,C,exoponential,S,0.0,0.0,8,0.0,0.0,58,0,0,108,False
TS559_Genomic_Sequence.seq,9,C,exoponential,S,0.0,0.0,8,0.0,0.0,59,0,0,114,False
TS559_Genomic_Sequence.seq,10,G,exoponential,S,,,0,,,0,0,0,1,
TS559_Genomic_Sequence.seq,12,C,exoponential,S,0.0,0.0,8,0.0,0.0,60,0,0,116,False


In [27]:
cat_exoS_data <-
    merge(x=TS559exoS_cat, y = TK0224exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK0234exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK0360exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK0704exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK0729exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK0872exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK1273exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK1917exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK1784exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK1935exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK2122exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK2241exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK2304exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK0234_0224exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK0234_0729exoS_cat, by='TS559_position', all=TRUE) %>%
    merge(y = TK2304_1935exoS_cat, by='TS559_position', all=TRUE) %>%
    #merge(y = TK2045exoS_cat, by='TS559_position', all=TRUE) %>%
    #merge(y = TK2045NdelexoS_cat, by='TS559_position', all=TRUE) %>%

    filter(TS559_hiconf==TRUE |
    TK0224_hiconf==TRUE | 
    TK0234_hiconf==TRUE |
    TK0360_hiconf==TRUE | 
    TK0704_hiconf==TRUE | 
    TK0729_hiconf==TRUE | 
    TK0872_hiconf==TRUE | 
    TK1273_hiconf==TRUE | 
    TK1917_hiconf==TRUE |
    TK1784_hiconf==TRUE | 
    TK1935_hiconf==TRUE | 
    TK2122_hiconf==TRUE | 
    TK2241_hiconf==TRUE | 
    TK2304_hiconf==TRUE |
    TK0234_0224_hiconf==TRUE |
    TK0234_0729_hiconf==TRUE |
    TK2304_1935_hiconf==TRUE  
    )

#write.table(cat_exoS_data, file = "../processed_cgmaps/master_exoS_data", sep = "\t", row.names = F)
nrow(cat_exoS_data)
head(cat_exoS_data)

Unnamed: 0_level_0,TS559_position,freq_TS559_rep1,m5C_cov/total_cov_TS559_rep1,freq_TS559_rep2,m5C_cov/total_cov_TS559_rep2,freq_TS559_rep3,m5C_cov/total_cov_TS559_rep3,TS559_hiconf,freq_TK0224_rep1,m5C_cov/total_cov_TK0224_rep1,⋯,freq_TK0234_0729_rep1,m5C_cov/total_cov_TK0234_0729_rep1,freq_TK0234_0729_rep2,m5C_cov/total_cov_TK0234_0729_rep2,TK0234_0729_hiconf,freq_TK2304_1935_rep1,m5C_cov/total_cov_TK2304_1935_rep1,freq_TK2304_1935_rep2,m5C_cov/total_cov_TK2304_1935_rep2,TK2304_1935_hiconf
Unnamed: 0_level_1,<int>,<dbl>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<lgl>,<dbl>,<chr>,⋯,<dbl>,<chr>,<dbl>,<chr>,<lgl>,<dbl>,<chr>,<dbl>,<chr>,<lgl>
1,857,0.11,7 / 61,0.14,36 / 256,0.07,34 / 507,True,0.0,0 / 38,⋯,0.05,29 / 571,0.03,23 / 752,False,0.02,5 / 233,0.01,3 / 255,False
2,13012,0.0,0 / 39,0.01,3 / 319,0.01,11 / 1363,False,0.0,0 / 45,⋯,0.02,29 / 1376,0.02,36 / 1624,False,0.02,16 / 798,0.01,10 / 847,False
3,18383,0.06,4 / 72,0.07,32 / 445,0.03,13 / 511,False,0.08,6 / 77,⋯,0.01,6 / 669,0.01,7 / 840,False,0.0,2 / 451,0.0,2 / 555,False
4,19557,0.21,7 / 33,0.13,19 / 143,0.11,44 / 413,True,0.2,7 / 35,⋯,0.06,30 / 482,0.09,55 / 612,True,0.03,8 / 275,0.03,8 / 276,False
5,22822,0.07,14 / 192,0.09,55 / 635,0.05,62 / 1307,True,0.09,16 / 182,⋯,0.03,40 / 1523,0.02,41 / 1720,False,0.01,12 / 1359,0.0,8 / 1742,False
6,23579,0.05,3 / 65,0.14,25 / 177,0.0,0 / 277,False,0.04,2 / 46,⋯,0.0,0 / 200,0.0,0 / 243,False,0.0,0 / 169,0.0,0 / 238,False
