In [None]:
library(plyr,  warn.conflicts=F)
library(dplyr, warn.conflicts=F)
library(tidyr, warn.conflicts=F)
library(ggplot2)
library(caTools)

In [None]:
metadata = read.delim("chlamy_meta.tab")

In [None]:
read.distmat =  function (filename) {
    dm = as.matrix(read.delim(filename, header=T, row.names=1))
    idxs = match(metadata$Run, row.names(dm))
    return(dm[idxs, idxs])
}

In [None]:
full = read.distmat("kwip/full_wip.dist")
metadata = metadata[match(row.names(full), metadata$Run),]

In [None]:
c = cmdscale(as.dist(full))

In [None]:
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
    if (xlab != NULL) {
        grid.xaxis()
    }
  }
}

In [None]:
coverages = c("0.01x", "0.1x", "0.5x", "1x", "2x", "4x", "8x", "12x", "15x", "25x",
              "50x", "75x", "100x", "150x", "200x", "full")

plot_covs = c("0.1x","1x", "2x",
              "4x", "8x", "15x",
              "50x", "150x", "full")
plots = list()
pdf("all-pcoas.pdf")
for (coverage in coverages) {
    fname = paste0("kwip/", coverage, "_wip.dist")
    mat = read.distmat(fname)
    mds = cmdscale(mat, k=2, eig=T, x.ret=T)
    eigs = mds$eig
    pct.contrib = round(eigs / sum(eigs) * 100)
    
    # Invert axes to match the paper (Flowers et al.) figure.
    # The sample here is one of the two red ones in the top right corner.
    if (mds$points["SRR1734600", 1] < 0) {
        mds$points[,1] = mds$points[,1] * -1
    }
    if (mds$points["SRR1734600", 2] < 0) {
        mds$points[,2] = mds$points[,2] * -1
    }
    
    pts.df = as.data.frame(mds$points)
    pts.df$Group = metadata$origin
    
    cols = c("light blue", "blue", "dark green", "red" )
    p = ggplot(pts.df, aes(x=V1, y=V2, colour=Group)) + 
        geom_point(size=2) + 
        scale_color_manual(values = cols) +
        xlab(paste0("PC 1 (", pct.contrib[1], "%)")) +
        ylab(paste0("PC 2 (", pct.contrib[2], "%)")) +
        ggtitle(paste(coverage, "fold subset")) + 
        theme_classic() +
        theme(panel.border=element_rect(colour = "black", fill=NA),
              legend.position="bottom")
    print(p)
    if (coverage  %in% plot_covs) {
        p = ggplot(pts.df, aes(x=V1, y=V2, colour=Group)) + 
            geom_point(size=2) + 
            scale_color_manual(values = cols) +
            ggtitle(coverage) + 
            theme_classic() +
            theme(panel.border=element_rect(colour = "black", fill=NA),
                  legend.position="none",
                  axis.title.x=element_blank(),
                  axis.title.y=element_blank(),
                  axis.ticks=element_blank(),
                  axis.text.x = element_blank(),
                  axis.text.y = element_blank()
                 )
        plots = c(plots, list(p))
    }
}
dev.off()

In [None]:
pdf("subset-pcoa-matrix.pdf")
layout=matrix(1:9, ncol = 3, byrow=T)
multiplot(plotlist = plots, cols=3, layout=layout)
dev.off()