## Preliminaries

In [1]:
library(ape)
library(ggtree)
library(phangorn)
library(Quartet)
library(adephylo)
library(ggplot2)
library(reshape2)
library(tidyverse)

Registered S3 method overwritten by 'ggtree':
  method      from 
  identify.gg ggfun

ggtree v3.2.1  For help: https://yulab-smu.top/treedata-book/

If you use ggtree in published research, please cite the most appropriate paper(s):

1. Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
2. Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution. 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
3. Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628




Attaching package: ‘ggtree’


The following object is masked from ‘package:ape’:

    rotate


Loading requir

#### Function for displaying face-to-face plot

In [2]:
plot_cophylo <- function(tree1, tree2) {
    cophyloplot(ladderize(tree1), ladderize(tree2),
                assoc = matrix(rep(tree2$tip.label, 2), ncol = 2),
                length.line = 4, space = 28, gap = 3);
}

#### Function to display RF and Quartet metrics

In [3]:
display_metrics <- function(tree, ref_tree) {
    splitStatuses <- SplitStatus(tree, ref_tree)
    quartetStatuses <- QuartetStatus(tree, ref_tree)
    # the least, the better (by distance)
    print(paste("Robinson-Foulds: ", RawSymmetricDifference(splitStatuses)))
    print(paste("Robinson-Foulds (normalized)", RawSymmetricDifference(splitStatuses) / splitStatuses[, 'N']))
    # the greater, the better (by similarity)
    print(paste("Similarity by Quartets: ", SymmetricDifference(quartetStatuses)))
}

## Plotting and comparison of trees

#### Loading reference tree

In [4]:
ref_tree <- read.tree("dummy_tree.nwk")

#### Combinatios of simulator parameters

In [5]:
confs_lower_z <- c("GXIEz","GXIez","GXiEz","GXiez",
                   "GxIEz","GxIez","GxiEz","Gxiez",
                   "gXIEz","gXIez","gXiEz","gXiez",
                   "gxIEz","gxIez","gxiEz","gxiez")

confs_upper_z <- c("GXIEZ","GXIeZ","GXiEZ","GXieZ",
                   "GxIEZ","GxIeZ","GxiEZ","GxieZ",
                   "gXIEZ","gXIeZ","gXiEZ","gXieZ",
                   "gxIEZ","gxIeZ","gxiEZ","gxieZ")

#### Functions to load metrics in dataframe

In [6]:
get_quartet_metrics <- function (confs, dist, niters) {
    df <- setNames(data.frame(matrix(ncol = length(confs), nrow = 10)), confs)
    for (conf in confs) {
        quartets <- c()
        for (i in 1:niters) {
            tree <- read.tree(paste(dist, "/", dist, "_tree_", conf, "_iter", i, ".nwk", sep=""))
            quartetStatuses <- QuartetStatus(tree, ref_tree)
            quartet <- SymmetricDifference(quartetStatuses)
            quartets <- c(quartets, quartet)
        }
        df[conf] <- quartets
    }
    df
}

get_rf_metrics <- function (confs, dist, niters) {
    df <- setNames(data.frame(matrix(ncol = length(confs), nrow = 10)), confs)
    for (conf in confs) {
        rfs <- c()
        for (i in 1:niters) {
            tree <- read.tree(paste(dist, "/", dist, "_tree_", conf, "_iter", i, ".nwk", sep=""))
            splitStatuses <- SplitStatus(tree, ref_tree)
            rf <- RawSymmetricDifference(splitStatuses) / splitStatuses[, 'N']
            rfs <- c(rfs, rf)
        }
        df[conf] <- rfs
    }
    df
}

#### Functions to plot metrics and combinatios of simulator parameters as a grid

In [7]:
plot_quartet <- function (df) {
  df %>%
  pivot_longer(everything()) %>%
  rename(quartet = value) %>%
  separate(name, into = c("gx", "ie"), sep = c(2,4)) %>%
  ggplot(aes(x = "", y = quartet)) + 
    geom_boxplot() +
    facet_grid(rows = vars(gx), cols = vars(ie), scales = "free") +
    theme(axis.ticks.x = element_blank(), axis.title.x = element_blank())
}

plot_rf <- function (df) {
  df %>%
  pivot_longer(everything()) %>%
  rename(RF = value) %>%
  separate(name, into = c("gx", "ie"), sep = c(2,4)) %>%
  ggplot(aes(x = "", y = RF)) + 
    geom_boxplot() +
    facet_grid(rows = vars(gx), cols = vars(ie), scales = "free") +
    theme(axis.ticks.x = element_blank(), axis.title.x = element_blank())
}

#### Rank vs. Reference

In [8]:
png("ref-vs-rank_quartet_lower-z.png")
get_quartet_metrics(confs_lower_z, "rank", 10) %>%
    plot_quartet()
dev.off()

In [9]:
png("ref-vs-rank_quartet-upper-z.png")
get_quartet_metrics(confs_upper_z, "rank", 10) %>%
    plot_quartet()
dev.off()

In [10]:
png("ref-vs-rank_rf-lower-z.png")
get_rf_metrics(confs_lower_z, "rank", 10) %>%
    plot_rf()
dev.off()

In [11]:
png("ref-vs-rank_rf-upper-z.png")
get_rf_metrics(confs_upper_z, "rank", 10) %>%
    plot_rf()
dev.off()

#### Rank-Indel vs. Reference

#### DCJ vs. Reference

In [12]:
png("ref-vs-dcj_quartet_lower-z.png")
get_quartet_metrics(confs_lower_z, "dcj", 10) %>%
    plot_quartet()
dev.off()

In [13]:
png("ref-vs-dcj_quartet_upper-z.png")
get_quartet_metrics(confs_upper_z, "dcj", 10) %>%
    plot_quartet()
dev.off()

In [14]:
png("ref-vs-dcj_rf_lower-z.png")
get_rf_metrics(confs_lower_z, "dcj", 10) %>%
    plot_rf()
dev.off()

In [15]:
png("ref-vs-dcj_rf_upper-z.png")
get_rf_metrics(confs_upper_z, "dcj", 10) %>%
    plot_rf()
dev.off()