In [10]:
library(gdata)
library(ggplot2)

In [14]:
meta_data <- read.xls('metadata_updated_rates.xlsx', stringsAsFactors = F)
meta_data <- meta_data[-grep('N. meningitis', meta_data$Name), ]

In [15]:
print(colnames(meta_data))
print(meta_data[, c('species', 'beast_genomic_rate', 'Name')])


 [1] "X"                                 "n_recomb"                         
 [3] "RDP_methods"                       "code"                             
 [5] "species"                           "Name"                             
 [7] "CG."                               "titv"                             
 [9] "ac"                                "ag"                               
[11] "at"                                "cg"                               
[13] "gt"                                "ct"                               
[15] "model_chosen"                      "lifestile_coded"                  
[17] "lifestyle_k"                       "lifestyile"                       
[19] "gram_coded"                        "gram"                             
[21] "file_name"                         "reg_name"                         
[23] "temp_stucture_degree"              "CR1"                              
[25] "CR1_."                             "CR2"                              

In [16]:
print(meta_data$beast_genomic_rate_hpd)
hpd = meta_data$beast_genomic_rate_hpd
strsplit(gsub('[[] +|[]]', '', hpd), ' +')

 [1] "[  4.71965295e-09   3.52348113e-08]"         
 [2] "[  1.03074860e-08   2.27366835e-08]"         
 [3] "[  1.24106512e-08   3.98059127e-08]"         
 [4] "[  3.80107270e-08   8.02369335e-08]"         
 [5] "[  5.75837232e-08   9.73789945e-08]"         
 [6] "[  5.22962456e-08   1.15316936e-07]"         
 [7] "[  4.80782150e-08   1.70122636e-07]"         
 [8] "[  1.52695120e-07   1.93785596e-07]"         
 [9] "[  1.23526930e-07   2.59991487e-07]"         
[10] "[  3.46236453e-08   3.97784756e-07]"         
[11] "[  1.44579205e-07   3.13418618e-07]"         
[12] "[  2.52850527e-07   4.38880003e-07]"         
[13] "[  1.72645867e-07   8.09162895e-07]"         
[14] "[  4.27747707e-07   6.59008160e-07]"         
[15] "[  2.78458820e-07   9.64180772e-07]"         
[16] "[  1.01201460e-07   3.77046261e-07]"         
[17] "[  6.66723944e-08   5.55687289e-07]"         
[18] "[  8.99905284e-07   1.12391538e-06]"         
[19] "[  7.11045848e-07   9.40917696e-07]"         
[20] "[  9.5

In [17]:
meta_data$min_rate <- NA
meta_data$max_rate <- NA
i <- 1
for(hpd in meta_data$beast_genomic_rate_hpd){
    pull_hpds <-  as.numeric(strsplit(gsub('([[] +)|[]]', '', hpd), ' +')[[1]])
    meta_data$min_rate[i] <- min(pull_hpds)
    meta_data$max_rate[i] <- max(pull_hpds)
    i <- i + 1
}
head(meta_data[c('beast_genomic_rate', 'min_rate', 'max_rate')])

Unnamed: 0,beast_genomic_rate,min_rate,max_rate
1,1.562162e-08,4.719653e-09,3.523481e-08
2,1.572643e-08,1.030749e-08,2.273668e-08
3,2.260972e-08,1.241065e-08,3.980591e-08
4,5.675213e-08,3.801073e-08,8.023693e-08
6,7.604037e-08,5.758372e-08,9.737899e-08
7,8.016475e-08,5.229625e-08,1.153169e-07


In [18]:
plot_dat <- log10(meta_data[c('beast_genomic_rate', 'min_rate', 'max_rate')])
#plot_dat <- meta_data[c('beast_genomic_rate', 'min_rate', 'max_rate')]

plot_dat$temp_struct <- meta_data$temp_stucture_degree >0.5
plot_dat$temp_struct[meta_data$temp_stucture_degree>=0.9] <- 'blue'
plot_dat$temp_struct[meta_data$temp_stucture_degree >= 0.5 & meta_data$temp_stucture_degree < 0.9] <- 'orange'
plot_dat$temp_struct[meta_data$temp_stucture_degree < 0.5] <- 'red'
plot_dat$name <- meta_data$long_names
plot_dat$spec <- paste(meta_data$species, 1:nrow(meta_data))

In [19]:
plot_basic <- ggplot(plot_dat, aes(x = name, y = beast_genomic_rate)) + geom_point(colour = plot_dat$temp_struct)  + ylim(-9, -4)

In [20]:
pdf('plot_all_rates.pdf', useDingbats = F, width = 12, height = 6)
plot_basic + theme_bw()+ theme(axis.text.x = element_text(angle = 90, 
hjust = 1)) + geom_errorbar(aes(ymin = min_rate, ymax = max_rate), 
    width = .3, colour = plot_dat$temp_struct) # c('grey', 'black')[1+plot_dat$temp_struct])

dev.off()

In [15]:
length(unique(meta_data$species))
plot_dat

Unnamed: 0,beast_genomic_rate,min_rate,max_rate,temp_struct,name,spec
1,1.562162e-08,4.719653e-09,3.523481e-08,red,Mycobacterium leprae,Mycobacterium leprae 1
2,1.572643e-08,1.030749e-08,2.273668e-08,orange,Yersinia pestis,Yersinia pestis 2
3,2.260972e-08,1.241065e-08,3.980591e-08,red,Yersinia pestis (second pandemic),Yersinia pestis 3
4,5.675213e-08,3.801073e-08,8.023693e-08,darkgreen,Mycobacterium tuberculosis Lineage 4,Mycobacterium tuberculosis 4
5,6.049665e-08,9.131985e-09,1.627462e-07,red,Neisseria meningitis,Neisseria meningitidis 5
6,7.604037e-08,5.758372e-08,9.737899e-08,darkgreen,Salmonella Paratyphi A (two clades),Salmonella enterica 6
7,8.016475e-08,5.229625e-08,1.153169e-07,darkgreen,Salmonella Paratyphi A (clade 1),Salmonella enterica 7
8,9.473972e-08,4.807822e-08,1.701226e-07,darkgreen,Salmonella Paratyphi A (clade 2),Salmonella enterica 8
9,1.72569e-07,1.526951e-07,1.937856e-07,darkgreen,Bordetella pertussis,Bordetella pertussis 9
10,1.781592e-07,1.235269e-07,2.599915e-07,darkgreen,Salmonella Typhi H58,Salmonella enterica 10


In [94]:
meta_data[c('species', 'beast_genomic_rate')]

Unnamed: 0,species,beast_genomic_rate
1,Bordetella pertussis,1.736357e-07
2,Enterococcus faecium,2.959674e-06
3,Enterococcus faecium,1.882352e-06
4,Enterococcus faecium,4.622071e-06
5,Klebsiella pneumoniae,6.89294e-07
6,Klebsiella pneumoniae,7.803694e-07
7,Klebsiella pneumoniae,9.032348e-07
8,Mycobacterium leprae,1.626246e-08
9,Mycobacterium tuberculosis,5.739274e-08
10,Mycobacterium tuberculosis,1.926015e-07


In [9]:
plot_dat$temp_struct[meta_data$temp_stucture_degree>=0.9] <- 'green'
plot_dat$temp_struct[meta_data$temp_stucture_degree >= 0.5 & meta_data$temp_stucture_degree < 0.9] <- 'orange'
plot_dat$temp_struct[meta_data$temp_stucture_degree < 0.5] <- 'red'
plot_dat

Unnamed: 0,beast_genomic_rate,min_rate,max_rate,temp_struct,name,spec
1,-7.806274,-8.32609,-7.453028,red,Mycobacterium leprae,Mycobacterium leprae 1
2,-7.80337,-7.986847,-7.643273,orange,Yersinia pestis,Yersinia pestis 2
3,-7.645705,-7.906205,-7.400052,red,Yersinia pestis (second pandemic),Yersinia pestis 3
4,-7.246018,-7.420094,-7.095626,green,Mycobacterium tuberculosis Lineage 4,Mycobacterium tuberculosis 4
5,-7.218269,-8.039435,-6.788489,red,Neisseria meningitis,Neisseria meningitidis 5
6,-7.118956,-7.2397,-7.011535,green,Salmonella Paratyphi A (two clades),Salmonella enterica 6
7,-7.096017,-7.281529,-6.938107,green,Salmonella Paratyphi A (clade 1),Salmonella enterica 7
8,-7.023468,-7.318052,-6.769238,green,Salmonella Paratyphi A (clade 2),Salmonella enterica 8
9,-6.763037,-6.816175,-6.712679,green,Bordetella pertussis,Bordetella pertussis 9
10,-6.749192,-6.908238,-6.585041,green,Salmonella Typhi H58,Salmonella enterica 10
