In [2]:
library(gdata)
library(RColorBrewer)

In [3]:
meta_data <- read.xls('metadata_updated_rates.xlsx', stringsAsFactors = F)
meta_data <- meta_data[-grep('N. meningitis', meta_data$Name), ]
#meta_data <- meta_data[, -ncol(meta_data)]
meta_data$short_name <- gsub('_+$', '', gsub('ucld|_rc|_rs|_sc|_ss|_bs|constant|strict', '', meta_data$file_name))
head(meta_data[, c('beast_genomic_rate', 'long_names')])
meta_data <- meta_data[order(meta_data$long_names),]
head(meta_data)
colnames(meta_data)


Unnamed: 0,beast_genomic_rate,long_names
1,1.562162e-08,Mycobacterium leprae
2,1.572643e-08,Yersinia pestis
3,2.260972e-08,Yersinia pestis (second pandemic)
4,5.675213e-08,Mycobacterium tuberculosis Lineage 4
6,7.604037e-08,Salmonella Paratyphi A (two clades)
7,8.016475e-08,Salmonella Paratyphi A (clade 1)


Unnamed: 0,X,n_recomb,RDP_methods,code,species,Name,CG.,titv,ac,ag,ellip.h,A,C,G,T,ref,reg_rsquared,reg_squared,reg_conf_int,long_names,short_name
29,28,1,R,A. baum.,Acinetobacter baumannii,A. baumannii GC1,39.0,0.7004062,0.396,1.069,⋯,0.2706,0.226,0.2259,0.2769,,,0.73,"[1.11607935392489e-06,1.6210860022724e-06]",Acinetobacter baumannii GC1,A_baumanii_2
32,31,1,R,A. baum.,Acinetobacter baumannii,A. baumannii GC2,39.0,0.7525907,0.481,1.324,⋯,0.225,0.265,0.214,0.2964,,,0.71,"[2.10569584307426e-06,2.89310672127826e-06]",Acinetobacter baumannii GC2,A_baumanii_1
9,8,0,,B. pert.,Bordetella pertussis,B. pertussis,67.7,1.4154,0.132765,1.056287,⋯,0.08703574,0.4599019,0.3762812,0.07678113,"Bart, M. J., Harris, S. R., Advani, A., Arakawa, Y., Bottero, D., Bouchez, V., ... & Mooi, F. R. (2014). Global population structure and evolution of Bordetella pertussis and their relationship with vaccination. MBio, 5(2), e01074-14.",,0.7,"[2.24250043312779e-07,2.77216941572175e-07]",Bordetella pertussis,b_pertussis
31,30,4,RGBMST,E. faec.,Enterococcus faecium,E. faecium (clade1),38.0,0.8705608,0.351707,0.963501,⋯,0.2384732,0.2716731,0.2760906,0.2137631,"Howden, B. P., Holt, K. E., Lam, M. M., Seemann, T., Ballard, S., Coombs, G. W., ... & Stinear, T. P. (2013). Genomic insights to control the emergence of vancomycin-resistant enterococci. MBio, 4(4), e00412-13.",,0.243,[-1.03e-06 5.68e-06],Enterococcus faecium (clade1),e_faecium_clade1_RDP
35,34,8,RGBMST,E. faec.,Enterococcus faecium,E. faecium (clade2),38.0,0.945975,0.326433,1.112886,⋯,0.2355818,0.2656311,0.2940762,0.2047109,"Howden, B. P., Holt, K. E., Lam, M. M., Seemann, T., Ballard, S., Coombs, G. W., ... & Stinear, T. P. (2013). Genomic insights to control the emergence of vancomycin-resistant enterococci. MBio, 4(4), e00412-13.",,0.0003489,[ 5.8365e-06 -6.2175e-06],Enterococcus faecium (clade2),e_faecium_clade2__skyline
34,33,8,RGBMST,E. faec.,Enterococcus faecium,E. faecium (two clades),38.0,0.9179449,0.3342837,1.011097,⋯,0.2366279,0.267817,0.2875693,0.2079858,"Howden, B. P., Holt, K. E., Lam, M. M., Seemann, T., Ballard, S., Coombs, G. W., ... & Stinear, T. P. (2013). Genomic insights to control the emergence of vancomycin-resistant enterococci. MBio, 4(4), e00412-13.",,0.0192,[ 2.5793e-06 -1.3799e-06],Enterococcus faecium (two clades),e_faecium_clades12


In [4]:
#meta_data <- meta_data[order(meta_data$Name), ]

In [5]:
addalpha <- function(colors, alpha=1.0) {
  r <- col2rgb(colors, alpha=T)
  # Apply alpha
  r[4,] <- alpha*255
  r <- r/255.0
  return(rgb(r[1,], r[2,], r[3,], r[4,]))
}


pdf('regression_plots.pdf', useDingbats = F, width = 15, height = 20)
par(mfrow = c(9, 4))
par(mar = c(3, 3, 3, 3))
cols <- addalpha(colorRampPalette(c('red', 'yellow', 'green'), 
                                  space = 'rgb')(10)[as.numeric(cut(meta_data$temp_stucture_degree, 10))], 0.3)

strong <- which(meta_data$temp_stucture_degree >=0.9)
moderate <- which(meta_data$temp_stucture_degree >= 0.5 & meta_data$temp_stucture_degree < 0.9)
low <- which(meta_data$temp_stucture_degree < 0.5)

cols <- vector(length = nrow(meta_data))
cols[strong] <- rgb(0, 0, 1, 0.3)
cols[moderate] <- rgb(1, 1, 0, 0.3)
cols[low] <- rgb(1, 0, 0, 0.3)

for(i in 1:nrow(meta_data)){
    print(meta_data[i, 'reg_name'])
    f <- read.table(paste0('all_regressions/', meta_data[i, 'reg_name']), head = T, stringsAsFactors = F)
    f$distance <- f$distance * meta_data$conversion_factor[i]
    f$date <- f$date
    f_reg <- lm(f$distance ~ f$date)
    plot(f$date, f$distance, ylab = '', xlab = '', pch = 20, col = rgb(0, 0, 0, 0.5) , 
         main = meta_data[i, 'long_names'],
    ylim = c(min(f$distance)*0.95, max(f$distance) *1.1), xlim = c(min(f$date)-5, max(f$date)), cex = 2)
    abline(f_reg, lwd = 3, col = rgb(1, 0, 0, 0.7), lty = 2)
    rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = cols[i])

#To report P value
    text(x = (max(f$date) - min(f$date)) / 4 + min(f$date), 
         y = (max(f$distance) - min(f$distance)) / 1.5 + min(f$distance), labels  = format(summary(f_reg)$coeff[8], T))
    
#To report slope (rate)
#    text(x = (max(f$date) - min(f$date)) / 4 + min(f$date), 
#         y = (max(f$distance) - min(f$distance)) / 1.5 + min(f$distance), labels  = format(f_reg$coefficients[2], T))

    text(x = (max(f$date) - min(f$date)) / 4 + min(f$date), 
         y = (max(f$distance) - min(f$distance)) / 1.1  + min(f$distance), labels  = round(summary(f_reg)$r.squared, 2))

    meta_data[i, 'reg_squared'] <-  round(summary(f_reg)$r.squared, 2)
    meta_data[i, 'regression_rate'] <- summary(f_reg)$coeff[2]
}
#
dev.off()

[1] "A_baumanii_2_reg.txt"
[1] "A_baumanii_1_reg.txt"
[1] "b_pertussis_reg.txt"
[1] "e_faecium_clade1_reg.txt"
[1] "e_faecium_clade2_reg.txt"
[1] "e_faecium_clades12_reg.txt"
[1] "klebsiella_clade1_reg.txt"
[1] "klebsiella_clade2_reg.txt"
[1] "klebsiella_snps_no_outliers_reg.txt"
[1] "m_leprae_reg.txt"
[1] "TB_animal_L6_rooted_reg.txt"
[1] "TB_beijing_reg.txt"
[1] "m_tub_argentina_reg.txt"
[1] "p_aeruginosa_no_outliers_reg.txt"
[1] "agona_pruned_reg.txt"
[1] "s_kentucky_reg.txt"
[1] "paratyphi_a_clade1_reg.txt"
[1] "paratyphi_a_clade2_reg.txt"
[1] "paratyphi_a_clades12_reg.txt"
[1] "typhi_no_outliers_reg.txt"
[1] "Sd1_reg.txt"
[1] "shigella_sonnei_reg.txt"
[1] "staph_CC398_reg.txt"
[1] "st22_pruned_reg.txt"
[1] "st239_complete_reg.txt"
[1] "st239_ingroup_reg.txt"
[1] "st_usa300_reg.txt"
[1] "st93_reg.txt"
[1] "s_pneumon_concat_dates_pruned_reg.txt"
[1] "strep_pyogenes_clade1_reg.txt"
[1] "strep_pyogenes_clade2_reg.txt"
[1] "strep_pyogenes_clades12_reg.txt"
[1] "v_cholerae_reg.txt"
[1] 

In [17]:
write.table(meta_data, file = 'metadata_regression_plots.csv', sep = ',', row.names = F)

In [18]:
strong


In [19]:
meta_data$regression_rate

In [31]:
low

In [17]:
summary(f_reg)$coeff[2]


In [22]:
sum(meta_data$reg_squared > .7)