In [1]:
import os
import shutil
import pandas as pd

from ete3 import Tree
from copy import deepcopy
from subprocess import Popen, PIPE
from utils.revbayes import revbayes_template, treealign, read_nexus_sum_into_ete3



In [2]:
# load ground truth phylogeny - change to your path to the phylogeny file
gt = Tree('phylogeny.nh')

# uncomment to visualize phylogeny
# print(gt)
# gt.show()

run = '/mnt/cluster/Training_Results/rove/ROVE_RESNET50_FROZEN_NORMALIZE_multisim4_2022-9-8-9-46-4'

#out dir cannot have certain characters in it, or perhaps be too long, otherwise revbayes will not run on it
out_dir = '/home/rob/revbayes_run/consistency_test/' 

In [None]:
out = []
print_output = True

nmi_fp = os.path.join(run,'CSV_Logs',f'Data_Test_discriminative_nmi.csv')
traits_fp = os.path.join(run, 'traits_discriminative_Test.nex')

if os.path.exists(nmi_fp) and os.path.exists(traits_fp):
    nmi = pd.read_csv(nmi_fp)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    sub_out = [run]

    for _set in ['Train','Val','Test']:
        nmi_fp = os.path.join(run,'CSV_Logs',f'Data_{_set}_discriminative_nmi.csv')
        recall_fp = os.path.join(run,'CSV_Logs',f'Data_{_set}_discriminative_e_recall.csv')
        if os.path.exists(nmi_fp) and os.path.exists(recall_fp):
            nmi = pd.read_csv(nmi_fp)
            recall = pd.read_csv(recall_fp)
            if len(nmi) < 50:
                continue
            sub_out += [nmi.max().iloc[0]*100,recall.max().iloc[0]*100]

    for check_no in range(10):
        out_tree_dir = os.path.join(out_dir, str(check_no))
        if not os.path.exists(out_tree_dir):
            os.makedirs(out_tree_dir)

        out_tree_fp = os.path.join(out_tree_dir, 'OUTSUMFILE')
        if not os.path.exists(out_tree_fp):

            traits_fp = os.path.join(run, f'traits_discriminative_Test.nex')
            #copy traits file to somewhere we can run,
            new_traits_fp = os.path.join(out_tree_dir,f'traits_discriminative_Test.nex')
            shutil.copy(traits_fp, new_traits_fp)

            #make rev file somewhere we can run, pointing to traits file and output files for trees
            bayes_str = revbayes_template.format(_dir=out_tree_dir, _set=_set)
            bayes_fp = os.path.join(out_dir,'run_traits.rev')
            with open(bayes_fp, 'w') as f:
                f.write(bayes_str)
            assert os.path.exists(bayes_fp), bayes_fp
            print(bayes_fp)

            print('About to start')
            #run rev file
            cmd = ['singularity', 'run', '--app', 'rb', 'RevBayes_Singularity_1.1.1.simg', bayes_fp]
            if print_output == True:
                with Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True) as p:
                    for line in p.stdout:
                        print(line, end='') # process line here
            else:
                output = subprocess.run(cmd, capture_output=True)

        #read in tree and convert to newick format
        tree = read_nexus_sum_into_ete3(out_tree_fp)

        test_gt = deepcopy(gt)
        test_gt.prune([x.name for x in tree.get_leaves()])
        #save align score
        align, align_max = treealign(tree,test_gt)

        sub_out += [align, align_max]
    out += [sub_out]
df = pd.DataFrame(out,columns=['name','model','seed','Train_nmi','Train_recall@1','Val_nmi','Val_recall@1','Test_nmi','Test_recall@1','Val_Align','Val_Max','Test_Align', 'Test_Max']).set_index('name')
df = df.dropna()     

/home/rob/revbayes_run/consistency_test/run_traits.rev
About to start

RevBayes version (1.1.1)
Build from tags/1.1.1 (rapture-588-gae00cc) on Thu Feb 11 22:41:40 UTC 2021

Visit the website www.RevBayes.com for more information about RevBayes.

RevBayes is free software released under the GPL license, version 3. Type 'license()' for details.

To quit RevBayes type 'quit()' or 'q()'.


> source("/home/rob/revbayes_run/consistency_test/run_traits.rev")
   Processing file "/home/rob/revbayes_run/consistency_test/run_traits.rev"
   Successfully read one character matrix from file '/home/rob/revbayes_run/consistency_test/0/traits_discriminative_Test.nex'

   Running burn-in phase of Monte Carlo sampler for 50000 iterations.
   This simulation runs 1 independent replicate.
   The simulator uses 5 different moves in a random move schedule with 22 moves per iteration

Progress:
0---------------25---------------50---------------75--------------100
**********************************************

150000      |        4761.77   |        4835.81   |       -74.0391   |     0.06088476   |   00:00:50   |   00:01:56   |
200000      |        4760.79   |        4834.84   |       -74.0553   |     0.05972943   |   00:01:07   |   00:01:40   |
250000      |        4752.58   |        4826.58   |       -74.0035   |     0.05996948   |   00:01:24   |   00:01:24   |
300000      |        4762.35   |        4836.26   |       -73.9132   |     0.06349453   |   00:01:41   |   00:01:07   |
350000      |        4759.32   |        4833.41   |       -74.0855   |     0.05996374   |   00:01:58   |   00:00:50   |
400000      |        4763.11   |        4837.04   |       -73.9293   |     0.06338852   |   00:02:15   |   00:00:33   |
450000      |        4762.78   |        4836.65   |       -73.8748   |      0.0644157   |   00:02:31   |   00:00:16   |
500000      |        4761.37   |        4835.45   |       -74.0735   |     0.05982206   |   00:02:48   |   00:00:00   |
   Processing file "/home/rob/revbayes_r