In [None]:
library(plyr,  warn.conflicts=F)
library(dplyr, warn.conflicts=F)
library(tidyr, warn.conflicts=F)
library(ggplot2)
library(caTools)

In [None]:
metadata = read.delim("chlamy_meta.tab")

In [None]:
read.distmat =  function (filename) {
    dm = as.matrix(read.delim(filename, header=T, row.names=1))
    idxs = match(metadata$Run, row.names(dm))
    return(dm[idxs, idxs])
}

In [None]:
metadata = metadata[match(row.names(dm), metadata$Run),]

In [None]:
full = read.distmat("kwip/full_wip.dist")

In [None]:
c = cmdscale(as.dist(full))

In [None]:
sort(-c[,1])

In [None]:
c["SRR1734600", ]

In [None]:
coverages = c("0.01x", "0.1x", "0.5x", "1x", "2x", "4x", "8x", "12x", "15x", "25x",
              "50x", "75x", "100x", "150x", "200x", "full")

matricies = list()
pdf("all-pcoas.pdf")
for (coverage in coverages) {
    fname = paste0("kwip/", coverage, "_wip.dist")
    mat = read.distmat(fname)
    matricies = c(matricies, list(mat))
    mds = cmdscale(mat, k=2, eig=T, x.ret=T)
    eigs = mds$eig
    pct.contrib = round(eigs / sum(eigs) * 100)
    
    # Invert axes to match the paper (Flowers et al.) figure.
    # The sample here is one of the two red ones in the top right corner.
    if (mds$points["SRR1734600", 1] < 0) {
        mds$points[,1] = mds$points[,1] * -1
    }
    if (mds$points["SRR1734600", 2] < 0) {
        mds$points[,2] = mds$points[,2] * -1
    }
    
    pts.df = as.data.frame(mds$points)
    pts.df$Group = metadata$origin
    
    cols = c("light blue", "blue", "dark green", "red" )
    p = ggplot(pts.df, aes(x=V1, y=V2, colour=Group)) + 
        geom_point(size=2) + 
        scale_color_manual(values = cols) +
        xlab(paste0("PC 1 (", pct.contrib[1], "%)")) +
        ylab(paste0("PC 2 (", pct.contrib[2], "%)")) +
        ggtitle(paste(coverage, "fold subset")) + 
        theme_classic() +
        theme(panel.border=element_rect(colour = "black", fill=NA),
              legend.position="bottom")
    print(p)
}
dev.off()