In [1]:
# Import libraries and set colors for plotting
import sys
import ete3
import os
import random
from os import walk
sys.path.insert(0,'../scripts/')
from classDeclarationsAndFunctions import RootedTree
from fileIO import ReadRootedTree
from MarkovModels import GenerateQForStationaryDistribution, Generate_rate_vectors
from math import floor, ceil
from config import projectPath
import subprocess as sub
import matplotlib.pyplot as plt
import numpy as np
from math import log
import pandas as pd
 
data_path = projectPath + "data/"
figures_path = projectPath + "figures/"
results_path = projectPath + "results/"

seqlen_list = [1000,2000,5000,10000]
# method_list = ['fasttree','iqtree','mstbackbone','raxmlng']
method_list = ['mstbackbone','fasttree','raxmlng','iqtree']
method_colors_map = {}
method_colors_map['mstbackbone'] = ['darkblue']
method_colors_map['fasttree'] = ['black']
method_colors_map['raxmlng'] = ['fuchsia']
method_colors_map['iqtree'] = ['darkorange']
method_colors_map['mst'] = ['cyan']


def Check_results_file_names(method,exp_id):
    exp_dir = data_path + "grove_exp/" + exp_id
    sequence_file_name = exp_dir + "/sequences_" + exp_id + ".fas"
    if method == "raxmlng":
        log_file_name = projectPath + 'batch_scripts/RAxMLNG_' + exp_id + '.raxml.log'
        tree_file_name = projectPath + 'batch_scripts/RAxMLNG_' + exp_id + '.raxml.bestTree'
    elif method.startswith("mstbackbone"):        
        log_file_name = data_path + 'grove_exp/' + exp_id + '/' + exp_id + '.mstbackbone_log'
        tree_file_name = data_path + 'grove_exp/' + exp_id + '/' + exp_id + '.newick'
    elif method == "fasttree":
        log_file_name = data_path + 'grove_exp/' + exp_id + '/sequences_' + exp_id + '.fas.fasttree_errlog'
        tree_file_name = data_path + 'grove_exp/' + exp_id + '/sequences_' + exp_id + '.fas.fasttree_newick'
    elif method == "iqtree":
        log_file_name = data_path + 'grove_exp/' + exp_id + '/sequences_' + exp_id + '.fas_iqtree2.0.log'
        tree_file_name = data_path + 'grove_exp/' + exp_id + '/sequences_' + exp_id + '.fas_iqtree2.0.treefile'        
    if os.path.exists(log_file_name) and os.path.exists(tree_file_name):
        if os.path.getsize(log_file_name) > 0 and os.path.getsize(tree_file_name) > 0:
            if method == "iqtree":
                log_file = open(log_file_name,"r")
                for line in log_file:
                    if line.startswith("ERROR: *** IQ-TREE CRASHES WITH SIGNAL ABORTED"):
                        print("IQ-TREE crashes for ", exp_id)
                        return (False)
                log_file.close()
                return (True)
            else:
                return(True)
        else:
            return(False)
    else:
        return(False)

tree_id_list = []

tree_id_num_taxa = pd.read_csv(data_path + "selected_grove_tree_ids_scalability",skipinitialspace = True)


In [6]:
# Create_directories for GC_diff
gc_diff_list = [0, 0.1, 0.2, 0.4]
grove_sequences_path = data_path + "simulated_data/grove_sequences/"
frac_trees_processed = 0.0
tot_num_trees = 115.0

for tree_id in tree_id_num_taxa["tree_id"]:
    for gc_diff in gc_diff_list:
        for seq_len in seqlen_list:
            tree_file_dir = grove_sequences_path + "gc_diff_" + str(gc_diff) + "/" + str(tree_id)
            sub.call("mkdir " + tree_file_dir, shell = True)
    frac_trees_processed += 1.0/tot_num_trees
    print(frac_trees_processed)


0.008695652173913044
0.017391304347826087
0.026086956521739132
0.034782608695652174
0.043478260869565216
0.05217391304347826
0.0608695652173913
0.06956521739130435
0.0782608695652174
0.08695652173913045
0.0956521739130435
0.10434782608695654
0.11304347826086959
0.12173913043478264
0.13043478260869568
0.13913043478260873
0.14782608695652177
0.15652173913043482
0.16521739130434787
0.17391304347826092
0.18260869565217397
0.19130434782608702
0.20000000000000007
0.20869565217391312
0.21739130434782616
0.2260869565217392
0.23478260869565226
0.2434782608695653
0.25217391304347836
0.2608695652173914
0.26956521739130446
0.2782608695652175
0.28695652173913055
0.2956521739130436
0.30434782608695665
0.3130434782608697
0.32173913043478275
0.3304347826086958
0.33913043478260885
0.3478260869565219
0.35652173913043494
0.365217391304348
0.37391304347826104
0.3826086956521741
0.39130434782608714
0.4000000000000002
0.40869565217391324
0.4173913043478263
0.42608695652173934
0.4347826086956524
0.4434782608