In [None]:
library(tidyr)
library(dplyr, warn.conflicts=F, quietly=T)
library(ggplot2)

In [None]:
results = read.delim('2016-12-08_mash-run.tab', header=F,
                     col.names=c("seed", "metric", "kwipsize", "cov", "var", "rho"))
str(results)
summary(results)

## All data

In [None]:
dat = results %>%
        select(rho, metric, var, seed)

dat$var.f = as.factor(dat$var)
dat$seed = as.factor(dat$seed)
str(dat)

In [None]:
ggplot(dat, aes(x=var.f, y=rho, fill=metric)) +
    geom_boxplot(aes(fill=metric))

In [None]:
ggplot(dat, aes(x=var, y=rho, colour=seed, linetype=metric)) +
    geom_line() +
    scale_x_log10()

## $\pi$ vs performance

In [None]:
summ = results %>%
           select(metric, rho, var) %>%
           group_by(var, metric) %>%
           summarise(rho_av=median(rho, na.rm=TRUE), rho_sd=(IQR(rho, na.rm=TRUE)/2))

str(summ)

In [None]:
p = ggplot(summ, aes(x=var, y=rho_av, ymin=rho_av-rho_sd, ymax=rho_av+rho_sd, group=metric)) +
    geom_line(aes(linetype=metric)) +
    geom_ribbon(aes(fill=metric), alpha=0.2) +
    xlab(expression(paste('Mean pairwise variation (', pi, ')'))) +
    ylab(expression(paste("Spearman's ", rho))) +
    scale_x_log10()+
    theme_bw()

print(p)