### Response to Reviews
# Phylogenetic comparative methods

### using [BayesTraits](http://www.evolution.rdg.ac.uk/BayesTraits.html) 3.0.1

#### Motif set 1: (Sp-D, Sp-L, motif Z)
#### Motif set 2: (motif X, motif Y, Maze)
#### All fish (2,837 genera for which trait data are available)


In [None]:
version

In [None]:
library(tidyverse)
library(ape)
library(phytools)
library(fishtree)

packageVersion("tidyverse")
packageVersion("ape")
packageVersion("phytools")
packageVersion("fishtree")


In [None]:
fishtree_phylogeny(type = "chronogram")

### Prepare trees and trait data

In [None]:
make_statedata_trees_AllFish <- function() {
    
    df_ptn_gen <- read.csv("../annot/fish-patterns-db-gen.csv")

    tr <- fishtree_phylogeny(type="chronogram")
    tr_sps <- tr$tip.label
    tr_gensps <- str_split(tr_sps, "_", simplify=TRUE)
    df_tr_gensps <- data.frame(genus = tr_gensps[, 1], species = tr_sps)
    
    # keep genera for which trait data are available
    df_tr_ptn_gensps <- df_tr_gensps %>% filter(genus %in% df_ptn_gen$genus)
    
    set.seed(42)

    tr2s <- list()

    for (i in 1:500) {
        df_tr_ptn_gensps_sample <- df_tr_ptn_gensps %>% group_by(genus) %>% sample_n(1)
        df_tr_ptn_gensps_sample
    
        # keep randomly selected species as a representative
        tr2 <- keep.tip(tr, as.character(df_tr_ptn_gensps_sample$species))
    
        # use genera names as tip.labels
        tr2$tip.label <- str_split(tr2$tip.label, "_", simplify = TRUE)[, 1]
    
        tr2 <- force.ultrametric(tr2)
    
        tr2s <- append(tr2s, list(tr2))
    }
    trees_file <- paste("./trees/", "AllFish", "_500.trees", sep="")
    write.nexus(tr2s, file = trees_file, translate = TRUE)

    # read trees
    trees_file <- paste("./trees/", "AllFish", "_500.trees", sep="")
    tr2s <- read.nexus(file = trees_file)
    
    # align and keep trait data for genera included in the tree
    odr <- match(tr2s[[1]]$tip.label, df_ptn_gen$genus)
    odr <- subset(odr, !is.na(odr))
    df_ptn_gen2 <- df_ptn_gen[odr, ]
    
    # make multistate data for triple pattern motifs
    make_multistate_AllFish <- function(ptn1, ptn2, ptn3) {
        df_ms <- df_ptn_gen2[c("genus", ptn1, ptn2, ptn3)]
        df_ms <- df_ms %>% mutate(state = case_when(
            (df_ms[ptn1] == 0 & df_ms[ptn2] == 0 & df_ms[ptn3] == 0) ~ 1,
            (df_ms[ptn1] == 1 & df_ms[ptn2] == 0 & df_ms[ptn3] == 0) ~ 2,
            (df_ms[ptn1] == 0 & df_ms[ptn2] == 1 & df_ms[ptn3] == 0) ~ 3,
            (df_ms[ptn1] == 0 & df_ms[ptn2] == 0 & df_ms[ptn3] == 1) ~ 4,
            (df_ms[ptn1] == 1 & df_ms[ptn2] == 1 & df_ms[ptn3] == 0) ~ 5,
            (df_ms[ptn1] == 1 & df_ms[ptn2] == 0 & df_ms[ptn3] == 1) ~ 6,
            (df_ms[ptn1] == 0 & df_ms[ptn2] == 1 & df_ms[ptn3] == 1) ~ 7,
            (df_ms[ptn1] == 1 & df_ms[ptn2] == 1 & df_ms[ptn3] == 1) ~ 8
        ))

        state_file <- paste("./statedata/", "AllFish", "_state_", ptn1, "_", ptn2, "_", ptn3, ".txt", sep="")
        write.table(df_ms[c("genus", "state")],
            file = state_file,
            sep = "\t", na = "-", row.names = FALSE, col.names = FALSE, quote = FALSE)
    }

    # Motif set 1
    make_multistate_AllFish("Sp_D", "Sp_L", "Maze")
    make_multistate_AllFish("Sp_D", "Sp_L", "Sddl")
    make_multistate_AllFish("Sp_D", "Sp_L", "Eyes")
    make_multistate_AllFish("Sp_D", "Sp_L", "Area")
    make_multistate_AllFish("Sp_D", "Sp_L", "St_H")
    make_multistate_AllFish("Sp_D", "Sp_L", "St_V")
    make_multistate_AllFish("Sp_D", "Sp_L", "St_D")
    make_multistate_AllFish("Sp_D", "Sp_L", "Bltc")
    make_multistate_AllFish("Sp_D", "Sp_L", "Mono")
    
    # Motif set 2
    make_multistate_AllFish("St_H", "St_V", "Maze")
    make_multistate_AllFish("St_H", "Sp_D", "Maze")
    make_multistate_AllFish("St_H", "Sp_L", "Maze")
    make_multistate_AllFish("St_V", "Sp_D", "Maze")
    make_multistate_AllFish("St_V", "Sp_L", "Maze")

}

In [None]:
make_BayesTraits_commands_AllFish <- function() {
    make_commands_AllFish <- function(ptn1, ptn2, ptn3, rep) {
        fbase <- paste("AllFish_mini", "_state_", ptn1, "_", ptn2, "_", ptn3, sep="")
        trees_file <- paste("./trees/", "AllFish", "_500.trees", sep="")
        state_file <- paste("./statedata/", "AllFish", "_state_", ptn1, "_", ptn2, "_", ptn3, ".txt", sep="")
        
        sample <- "5000"
        iterations <- "6000000"
        burnin <- "1000000"
        hp_exp1 <- "0"
        hp_exp2 <- "10"
        # cores <- "1"
        cores <- "4"
        stones1 <- "100"
        stones2 <- "1000"
        
        modelpar <- paste("rjMCMC_SCT_HPexp_", hp_exp1, "_", hp_exp2, sep="")
        command_file_IND <- paste("./commands/command_core", cores, "_", fbase, "_IND_", modelpar, "_rep_", rep, ".txt", sep="")
        command_file_DEP <- paste("./commands/command_core", cores, "_", fbase, "_DEP_", modelpar, "_rep_", rep, ".txt", sep="")
        command_file_indZ <- paste("./commands/command_core", cores, "_", fbase, "_indZ_", modelpar, "_rep_", rep, ".txt", sep="")
        command_file_indX <- paste("./commands/command_core", cores, "_", fbase, "_indX_", modelpar, "_rep_", rep, ".txt", sep="")
        command_file_indY <- paste("./commands/command_core", cores, "_", fbase, "_indY_", modelpar, "_rep_", rep, ".txt", sep="")
        
        command_IND_MCMC <- c("1",
                              "2",
                              paste("LogFile ", "./logs/", fbase, "_IND_", modelpar, "_rep_", rep, sep = ""),
                              paste("RJHP exp", hp_exp1, hp_exp2),
                              paste("Cores", cores),
                              "",
                              paste("Sample", sample),
                              paste("Iterations", iterations),
                              paste("BurnIn", burnin),
                              "ScaleTrees 0.1",
                              "",
                              "res q15 q16 q17 q18  0",
                              "res q23 q24 q27 q28  0",
                              "res q32 q34 q36 q38  0",
                              "res q42 q43 q45 q48  0",
                              "res q51 q54 q56 q57  0",
                              "res q61 q63 q65 q67  0",
                              "res q71 q72 q75 q76  0",
                              "res q81 q82 q83 q84  0",
                              "",
                              "res q35 q46 q78  q12",
                              "res q25 q47 q68  q13",
                              "res q26 q37 q58  q14",
                              "res q53 q64 q87  q21",
                              "res q52 q74 q86  q31",
                              "res q62 q73 q85  q41",
                              "",
                              paste("Stones", stones1, stones2),
                              "",
                              "Run")
        write(command_IND_MCMC, file = command_file_IND)

        command_indZ_MCMC <- c("1",
                               "2",
                               paste("LogFile ", "./logs/", fbase, "_indZ_", modelpar, "_rep_", rep, sep = ""),
                               paste("RJHP exp", hp_exp1, hp_exp2),
                               paste("Cores", cores),
                               "",
                               paste("Sample", sample),
                               paste("Iterations", iterations),
                               paste("BurnIn", burnin),
                               "ScaleTrees 0.1",
                               "",
                               "res q15 q16 q17 q18  0",
                               "res q23 q24 q27 q28  0",
                               "res q32 q34 q36 q38  0",
                               "res q42 q43 q45 q48  0",
                               "res q51 q54 q56 q57  0",
                               "res q61 q63 q65 q67  0",
                               "res q71 q72 q75 q76  0",
                               "res q81 q82 q83 q84  0",
                               "",
                               "res q26 q37 q58  q14",
                               "res q62 q73 q85  q41",
                               "res q78 q35",
                               "res q87 q53",
                               "res q46 q12",
                               "res q64 q21",
                               "res q47 q13",
                               "res q74 q31",
                               "res q68 q25",
                               "res q86 q52",
                               "",
                               paste("Stones", stones1, stones2),
                               "",
                               "Run")
        write(command_indZ_MCMC, file = command_file_indZ)

        command_indX_MCMC <- c("1",
                               "2",
                               paste("LogFile ", "./logs/", fbase, "_indX_", modelpar, "_rep_", rep, sep = ""),
                               paste("RJHP exp", hp_exp1, hp_exp2),
                               paste("Cores", cores),
                               "",
                               paste("Sample", sample),
                               paste("Iterations", iterations),
                               paste("BurnIn", burnin),
                               "ScaleTrees 0.1",
                               "",
                               "res q15 q16 q17 q18  0",
                               "res q23 q24 q27 q28  0",
                               "res q32 q34 q36 q38  0",
                               "res q42 q43 q45 q48  0",
                               "res q51 q54 q56 q57  0",
                               "res q61 q63 q65 q67  0",
                               "res q71 q72 q75 q76  0",
                               "res q81 q82 q83 q84  0",
                               "",
                               "res q35 q78 q46  q12",
                               "res q53 q87 q64  q21",
                               "res q26 q14",
                               "res q62 q41",
                               "res q58 q37",
                               "res q85 q73",
                               "res q25 q13",
                               "res q52 q31",
                               "res q68 q47",
                               "res q86 q74",
                               "",
                               paste("Stones", stones1, stones2),
                               "",
                               "Run")
        write(command_indX_MCMC, file = command_file_indX)

        command_indY_MCMC <- c("1",
                               "2",
                               paste("LogFile ", "./logs/", fbase, "_indY_", modelpar, "_rep_", rep, sep = ""),
                               paste("RJHP exp", hp_exp1, hp_exp2),
                               paste("Cores", cores),
                               "",
                               paste("Sample", sample),
                               paste("Iterations", iterations),
                               paste("BurnIn", burnin),
                               "ScaleTrees 0.1",
                               "",
                               "res q15 q16 q17 q18  0",
                               "res q23 q24 q27 q28  0",
                               "res q32 q34 q36 q38  0",
                               "res q42 q43 q45 q48  0",
                               "res q51 q54 q56 q57  0",
                               "res q61 q63 q65 q67  0",
                               "res q71 q72 q75 q76  0",
                               "res q81 q82 q83 q84  0",
                               "",
                               "res q25 q47 q68  q13",
                               "res q52 q74 q86  q31",
                               "res q35 q12",
                               "res q53 q21",
                               "res q78 q46",
                               "res q87 q64",
                               "res q37 q14",
                               "res q73 q41",
                               "res q58 q26",
                               "res q85 q62",
                               "",
                               paste("Stones", stones1, stones2),
                               "",
                               "Run")
        write(command_indY_MCMC, file = command_file_indY)

        command_DEP_MCMC <- c("1",
                              "2",
                              paste("LogFile ", "./logs/", fbase, "_DEP_", modelpar, "_rep_", rep, sep = ""),
                              paste("RJHP exp", hp_exp1, hp_exp2),
                              paste("Cores", cores),
                              "",
                              paste("Sample", sample),
                              paste("Iterations", iterations),
                              paste("BurnIn", burnin),
                              "ScaleTrees 0.1",
                              "",
                              "res q15 q16 q17 q18  0",
                              "res q23 q24 q27 q28  0",
                              "res q32 q34 q36 q38  0",
                              "res q42 q43 q45 q48  0",
                              "res q51 q54 q56 q57  0",
                              "res q61 q63 q65 q67  0",
                              "res q71 q72 q75 q76  0",
                              "res q81 q82 q83 q84  0",
                              "",
                              paste("Stones", stones1, stones2),
                              "",
                              "Run")
        write(command_DEP_MCMC, file = command_file_DEP)
        
        job_file <- paste(fbase, "_core", cores, "_", modelpar, "_rep_", rep, ".job", sep="")
        job_contents <- c("#!/bin/bash",
                          "#$ -S /bin/bash",
                          "#$ -cwd",
                          paste("#$ -pe smp", cores),
                          "",
                          "export LD_LIBRARY_PATH=$HOME/lcl/lib64:$HOME/lcl/lib:$LD_LIBRARY_PATH",
                          "export PATH=$HOME/bin:$HOME/lcl/bin:$PATH",
                          "n=$SGE_TASK_ID",
                          "",
                          "cd ..",
                          "pwd",
                          "",
                          "date",
                          ". job_start_slackpost.sh",
                          "",
                          "case ${n} in",
                          "    1) ",
                          paste("    job_slackpost.sh '", fbase, " [indZ] start'", sep=""),
                          paste("    ./BayesTraitsV3-Threaded ", trees_file, " ", state_file, " < ", command_file_indZ, ";;", sep = ""),
                          "",
                          "    2) ",
                          paste("    job_slackpost.sh '", fbase, " [IND] start'", sep=""),
                          paste("    ./BayesTraitsV3-Threaded ", trees_file, " ", state_file, " < ", command_file_IND, ";;", sep = ""),
                          "",
                          "    3) ",
                          paste("    job_slackpost.sh '", fbase, " [DEP] start'", sep=""),
                          paste("    ./BayesTraitsV3-Threaded ", trees_file, " ", state_file, " < ", command_file_DEP, ";;", sep = ""),
                          "",
                          "    4) ",
                          paste("    job_slackpost.sh '", fbase, " [indX] start'", sep=""),
                          paste("    ./BayesTraitsV3-Threaded ", trees_file, " ", state_file, " < ", command_file_indX, ";;", sep = ""),
                          "",
                          "    5) ",
                          paste("    job_slackpost.sh '", fbase, " [indY] start'", sep=""),
                          paste("    ./BayesTraitsV3-Threaded ", trees_file, " ", state_file, " < ", command_file_indY, ";;", sep = ""),
                          "",
                          "esac",
                          "",
                          ". job_end_slackpost.sh")
        write(job_contents, file = paste("./jobs/", job_file, sep=""))
        
        return(job_file)

    }

    job_files <- list()
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Maze", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Maze", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Maze", 3))

    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Sddl", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Sddl", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Sddl", 3))

    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Eyes", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Eyes", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Eyes", 3))


    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_H", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_H", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_H", 3))

    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_V", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_V", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_V", 3))

    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_D", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_D", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "St_D", 3))

    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Bltc", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Bltc", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Bltc", 3))

    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Mono", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Mono", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Mono", 3))

    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Area", 1))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Area", 2))
    job_files <- c(job_files, make_commands_AllFish("Sp_D", "Sp_L", "Area", 3))

    
    
    job_files <- c(job_files, make_commands_AllFish("St_H", "St_V", "Maze", 1))
    job_files <- c(job_files, make_commands_AllFish("St_H", "St_V", "Maze", 2))
    job_files <- c(job_files, make_commands_AllFish("St_H", "St_V", "Maze", 3))

    job_files <- c(job_files, make_commands_AllFish("St_H", "Sp_D", "Maze", 1))
    job_files <- c(job_files, make_commands_AllFish("St_H", "Sp_D", "Maze", 2))
    job_files <- c(job_files, make_commands_AllFish("St_H", "Sp_D", "Maze", 3))

    job_files <- c(job_files, make_commands_AllFish("St_H", "Sp_L", "Maze", 1))
    job_files <- c(job_files, make_commands_AllFish("St_H", "Sp_L", "Maze", 2))
    job_files <- c(job_files, make_commands_AllFish("St_H", "Sp_L", "Maze", 3))

    job_files <- c(job_files, make_commands_AllFish("St_V", "Sp_D", "Maze", 1))
    job_files <- c(job_files, make_commands_AllFish("St_V", "Sp_D", "Maze", 2))
    job_files <- c(job_files, make_commands_AllFish("St_V", "Sp_D", "Maze", 3))

    job_files <- c(job_files, make_commands_AllFish("St_V", "Sp_L", "Maze", 1))
    job_files <- c(job_files, make_commands_AllFish("St_V", "Sp_L", "Maze", 2))
    job_files <- c(job_files, make_commands_AllFish("St_V", "Sp_L", "Maze", 3))

    return(job_files)
}

In [None]:
make_statedata_trees_AllFish()
job_files <- make_BayesTraits_commands_AllFish()
write_lines(job_files, paste("./jobs/", "AllFish_mini", "_jobs_cores4_rep123.txt", sep=""))