### Is QMC working: Episode II.

Reading in Deren's functions...

In [2]:
## imports
import toytree
import itertools
import ipyrad as ip
import subprocess as sps

class QMC_test(object):
    def __init__(self, ntips):
        
        ## load in tree
        self.toytree = toytree.rtree(ntips)
        self.tree = self.toytree.tree
        self.namedict = {}

        ## convert tip names to indexes
        leaf = 0
        for node in self.tree.traverse():
            if node.is_leaf():
                ## store old name
                self.namedict[leaf] = node.name
                ## set new name
                node.name = leaf
                ## advance counter
                leaf += 1    

        
    ## methods 
    def get_all_combs(self, set1, set2, as_list=False):
        qiter = (sorted(i) + sorted(j) for (i, j) in itertools.product(
                    itertools.combinations(set1, 2), 
                    itertools.combinations(set2, 2),
                ))
        
        ## option to return as list 
        if as_list:
            return list(qiter)
        ## but returning as generator is more efficient
        else:
            return qiter

        
    def decompose_to_quartets(self):
                
        ## store quartet sets
        qrts = set()
        
        ## get all tips in tree as a set
        n_all = set(self.tree.get_leaf_names())
    
        ## traverse tree
        for node in self.tree.traverse():
            ## skip root or tip nodes
            if not (node.is_root() or node.is_leaf()):
                
                ## get all tips below this node
                below = set(node.get_leaf_names())
                above = n_all - below

                ## get all combinations of 2 above and 2 below this node
                for qrt in self.get_all_combs(above, below):
                   
                    ## add to quartet set
                    qrts.add(tuple(sorted(qrt)))
        
        ## store qrts  
        return qrts
    
    
    def write_qrts_to_qmc_format(self, qrts):
        dump = ["{},{}|{},{}".format(*i) for i in qrts]
        with open("quartets.txt", "w") as outfile:
            outfile.write("\n".join(dump))
            
            
    def run_qmc_on_quartets_file(self):
        
        ## run qmc on quartets file
        cmd = [ip.bins.qmc, "qrtt=./quartets.txt", "otre=./tre.tmp"]
        proc = sps.Popen(cmd)
        proc.communicate()
        
        ## read in tree file and return as toytree object
        tre = toytree.tree("./tre.tmp")
                
        ## put names back onto qmc tree tips
        for node in tre.tree.traverse():
            if node.is_leaf():
                node.name = self.namedict[int(node.name)]
        
        return tre
        
            
    def run(self):
        ## get quartets
        qrts = self.decompose_to_quartets()
        
        ## dump to qmc to write quartets file
        self.write_qrts_to_qmc_format(qrts)
        
        ## infer supertree
        tre = self.run_qmc_on_quartets_file()

        return tre
    
    
    def run_reps_on_same_quartets_file(self, nreps):

        ## test over replicate runs
        reps = []

        ## draw the true tree
        qmc.toytree.draw(width=300, height=350);

        ## reuse the same set of quartets in each rep
        qrts = qmc.decompose_to_quartets()

        ## write quartets to file
        qmc.write_qrts_to_qmc_format(qrts)

        ## store number of inferred trees with a difference
        diff = 0

        ## iterate over reps
        for rep in range(nreps):
            tre = qmc.run_qmc_on_quartets_file()
            reps.append(tre)

            ## measure difference using robinson-foulds
            rb = qmc.tree.robinson_foulds(tre.tree, unrooted_trees=True)

            ## if any difference store it
            if rb[0]:
                diff += 1

        print "{} trees different / {} total runs".format(diff, nreps)

In [3]:
qmc = QMC_test(20)

### Draw the random tree:

In [7]:
qmc.toytree.draw(width=300,height=350)

(<toyplot.canvas.Canvas at 0x118e97110>,
 <toyplot.coordinates.Cartesian at 0x118e170d0>)

### Run the qmc function and plot, finding a totally different output tree:

In [10]:
tre = qmc.run()
tre.draw(height=350, width=300);

### These trees aren't the same, but the function to test for similarity doesn't realize that:

In [9]:
## does this tree match the input tree?
matches = tre.tree.robinson_foulds(qmc.tree, unrooted_trees=True)[0] == 0
print "inferred tree matches input tree = {}".format(matches)

inferred tree matches input tree = True


### I think this must be a branch length problem -- the quartets might not be inferred correctly?

### Below, I changed some functions that define `QMC_test`. In particular, I inserted a new distance calculation method for the quartet decomposition, because I think the old method wasn't noticing branch lengths. I also removed tip name conversion just because it made it quicker for me to bring in my code for this ad-hoc test.

In [50]:
## imports
import toytree
import itertools
import ipyrad as ip
import subprocess as sps
import numpy as np

class QMC_test(object):
    def __init__(self, ntips):
        
        ## load in tree
        self.toytree = toytree.rtree(ntips)
        self.tree = self.toytree.tree
        self.namedict = {}

        ## convert tip names to indexes
        #leaf = 0
        #for node in self.tree.traverse():
        #    if node.is_leaf():
        #        ## store old name
        #        self.namedict[leaf] = node.name
        #        ## set new name
        #        node.name = leaf
        #        ## advance counter
        #        leaf += 1    

        
    ## methods 
    def get_all_combs(self, set1, set2, as_list=False):
        qiter = (sorted(i) + sorted(j) for (i, j) in itertools.product(
                    itertools.combinations(set1, 2), 
                    itertools.combinations(set2, 2),
                ))
        
        ## option to return as list 
        if as_list:
            return list(qiter)
        ## but returning as generator is more efficient
        else:
            return qiter

        
    def decompose_to_quartets(self):
                
        alltipcombns = np.array(list(itertools.combinations((np.array(range(len(self.tree.get_leaves())))), 4)))

        alltruequarts = np.empty(shape = (0,4))

        for combo in range(len(alltipcombns)):

            # make an array of the pairwise distances in current quartet

            distmat= np.array((
                                self.tree.get_distance(("t-" + str(alltipcombns[combo][0])),("t-" + str(alltipcombns[combo][1]))),
                                self.tree.get_distance(("t-" + str(alltipcombns[combo][0])),("t-" + str(alltipcombns[combo][2]))),
                                self.tree.get_distance(("t-" + str(alltipcombns[combo][0])),("t-" + str(alltipcombns[combo][3]))),
                                self.tree.get_distance(("t-" + str(alltipcombns[combo][1])),("t-" + str(alltipcombns[combo][2]))),
                                self.tree.get_distance(("t-" + str(alltipcombns[combo][1])),("t-" + str(alltipcombns[combo][3]))),
                                self.tree.get_distance(("t-" + str(alltipcombns[combo][2])),("t-" + str(alltipcombns[combo][3])))
            ))

            # now select the closest two and group them against the other two.
            closest2 = [[0,1],[0,2],[0,3],[1,2],[1,3],[2,3]][np.argmin(distmat)] # corresponds to each distance comparison
            truequartetidxs = closest2+list(set([0,1,2,3]) - set(closest2)) # make the true quartet
            truequartet = [alltipcombns[combo][i] for i in truequartetidxs]
            alltruequarts = np.vstack([alltruequarts,np.array(truequartet)])
        alltruequarts = alltruequarts.astype(int)
        ## store qrts  
        return alltruequarts
    
    
    def write_qrts_to_qmc_format(self, qrts):
        dump = ["{},{}|{},{}".format(*i) for i in qrts]
        with open("quartets.txt", "w") as outfile:
            outfile.write("\n".join(dump))
            
            
    def run_qmc_on_quartets_file(self):
        
        ## run qmc on quartets file
        cmd = [ip.bins.qmc, "qrtt=./quartets.txt", "otre=./tre.tmp"]
        proc = sps.Popen(cmd)
        proc.communicate()
        
        ## read in tree file and return as toytree object
        tre = toytree.tree("./tre.tmp")
                
        ## put names back onto qmc tree tips
        #for node in tre.tree.traverse():
        #    if node.is_leaf():
        #        node.name = self.namedict[int(node.name)]
        
        return tre
        
            
    def run(self):
        ## get quartets
        qrts = self.decompose_to_quartets()
        
        ## dump to qmc to write quartets file
        self.write_qrts_to_qmc_format(qrts)
        
        ## infer supertree
        tre = self.run_qmc_on_quartets_file()

        return tre
    
    
    def run_reps_on_same_quartets_file(self, nreps):

        ## test over replicate runs
        reps = []

        ## draw the true tree
        qmc.toytree.draw(width=300, height=350);

        ## reuse the same set of quartets in each rep
        qrts = qmc.decompose_to_quartets()

        ## write quartets to file
        qmc.write_qrts_to_qmc_format(qrts)

        ## store number of inferred trees with a difference
        diff = 0

        ## iterate over reps
        for rep in range(nreps):
            tre = qmc.run_qmc_on_quartets_file()
            reps.append(tre)

            ## measure difference using robinson-foulds
            rb = qmc.tree.robinson_foulds(tre.tree, unrooted_trees=True)

            ## if any difference store it
            if rb[0]:
                diff += 1

        print "{} trees different / {} total runs".format(diff, nreps)

## Now re-run the test, seeing a real resemblance between trees, but also small, frustrating differences between them.

In [51]:
qmc = QMC_test(20)

In [52]:
qmc.toytree.draw(width=300,height=350)

(<toyplot.canvas.Canvas at 0x12084e0d0>,
 <toyplot.coordinates.Cartesian at 0x120729650>)

In [55]:
tre = qmc.run()
tre.draw(height=350,width=300)

(<toyplot.canvas.Canvas at 0x118f60710>,
 <toyplot.coordinates.Cartesian at 0x120854810>)

## New Function from Deren

In [105]:
## imports
import toytree
import itertools
import ipyrad as ip
import subprocess as sps
import copy
class QMC_test(object):
    def __init__(self, ntips):
        
        ## load in tree
        self.toytree = toytree.rtree(ntips)
        self.tree = copy.deepcopy(self.toytree.tree)
        self.namedict = {}

        ## convert tip names to indexes
        leaf = 0
        for node in self.tree.traverse():
            if node.is_leaf():
                ## store old name
                self.namedict[leaf] = node.name
                ## set new name
                node.name = leaf
                ## advance counter
                leaf += 1    

        
    ## methods 
    def get_all_combs(self, set1, set2, as_list=False):
        #qiter = (sorted(i) + sorted(j) for (i, j) in itertools.product(
        #            itertools.combinations(set1, 2), 
        #            itertools.combinations(set2, 2),
        #        ))
        qiter = (tuple(i+j) for (i, j) in itertools.product(
            itertools.combinations(set1, 2), 
            itertools.combinations(set2, 2),
        ))
        
        ## option to return as list 
        if as_list:
            return list(qiter)
        ## but returning as generator is more efficient
        else:
            return qiter

        
    def decompose_to_quartets(self):
                
        ## set to compared ordered sets, and set to store quartet
        qrts = set()
        stored = set()
        
        ## get all tips in tree as a set
        n_all = set(self.tree.get_leaf_names())
    
        ## traverse tree
        for node in self.tree.traverse():
            ## skip root or tip nodes
            if not (node.is_root() or node.is_leaf()):
                
                ## get all tips below this node
                below = set(node.get_leaf_names())
                above = n_all - below

                ## get all combinations of 2 above and 2 below this node
                for qrt in self.get_all_combs(above, below):
                   
                    ## add to quartet set
                    sqrt = tuple(sorted(qrt))
                    if sqrt not in qrts:
                        stored.add(qrt)
                        qrts.add(sqrt)
                        
        ## store qrts  
        return stored
    
    
    def write_qrts_to_qmc_format(self, qrts):
        dump = ["{},{}|{},{}".format(*i) for i in qrts]
        with open("quartets.txt", "w") as outfile:
            outfile.write("\n".join(dump))
            
            
    def run_qmc_on_quartets_file(self):
        
        ## run qmc on quartets file
        cmd = [ip.bins.qmc, "qrtt=./quartets.txt", "otre=./tre.tmp"]
        proc = sps.Popen(cmd)
        proc.communicate()
        
        ## read in tree file and return as toytree object
        tre = toytree.tree("./tre.tmp")
                
        ## put names back onto qmc tree tips
        for node in tre.tree.traverse():
            if node.is_leaf():
                node.name = self.namedict[int(node.name)]
        
        return tre
        
            
    def run(self):
        ## get quartets
        qrts = self.decompose_to_quartets()
        
        ## dump to qmc to write quartets file
        self.write_qrts_to_qmc_format(qrts)
        
        ## infer supertree
        tre = self.run_qmc_on_quartets_file()

        return tre
    
    
    def run_reps_on_same_quartets_file(self, nreps):

        ## test over replicate runs
        reps = []

        ## draw the true tree
        qmc.toytree.draw(width=300, height=350);

        ## reuse the same set of quartets in each rep
        qrts = qmc.decompose_to_quartets()

        ## write quartets to file
        qmc.write_qrts_to_qmc_format(qrts)

        ## store number of inferred trees with a difference
        diff = 0

        ## iterate over reps
        for rep in range(nreps):
            tre = qmc.run_qmc_on_quartets_file()
            reps.append(tre)

            ## measure difference using robinson-foulds
            rb = qmc.toytree.tree.robinson_foulds(tre.tree, unrooted_trees=True)

            ## if any difference store it
            if rb[0]:
                diff += 1

        print "{} trees different / {} total runs".format(diff, nreps)

In [106]:
current_test = QMC_test(20)

In [107]:
current_test.toytree.draw(width = 300, height = 350)

(<toyplot.canvas.Canvas at 0x1101daf50>,
 <toyplot.coordinates.Cartesian at 0x1101da190>)

In [81]:
list(current_test.tree.traverse())

[Tree node '0' (0x12085ae9),
 Tree node '1' (0x1208d635),
 Tree node '4' (0x1208d641),
 Tree node '2' (0x1208d645),
 Tree node '3' (0x1208d655),
 Tree node '5' (0x1208d669),
 Tree node '11' (0x1208d66d),
 Tree node '0' (0x1208d64d),
 Tree node '1' (0x1208d651),
 Tree node '2' (0x1208d661),
 Tree node '3' (0x1208d665),
 Tree node '6' (0x1208d671),
 Tree node '7' (0x1208d675),
 Tree node '12' (0x1208d695),
 Tree node '14' (0x1208d6a1),
 Tree node '4' (0x1208d679),
 Tree node '5' (0x1208d67d),
 Tree node '8' (0x1208d685),
 Tree node '9' (0x1208d689),
 Tree node '6' (0x1208d6ad),
 Tree node '13' (0x1208d6b1),
 Tree node '7' (0x1208d6c1),
 Tree node '15' (0x1208d6c5),
 Tree node '8' (0x1208d68d),
 Tree node '9' (0x1208d691),
 Tree node '10' (0x1208d699),
 Tree node '10' (0x1208d69d),
 Tree node '11' (0x1208d6b9),
 Tree node '12' (0x1208d6bd),
 Tree node '13' (0x1208d6cd),
 Tree node '16' (0x1208d6d1),
 Tree node '14' (0x1208d6a5),
 Tree node '15' (0x1208d6a9),
 Tree node '16' (0x1208d6d9),


In [108]:
results = current_test.run()

In [109]:
results.draw(height = 350,width=300)

(<toyplot.canvas.Canvas at 0x1101cb250>,
 <toyplot.coordinates.Cartesian at 0x1101cb290>)

In [110]:
results.tree.write(outfile="testingtree2.phy")

In [111]:
current_test.tree.write(outfile="testingtree1.phy")

In [113]:
current_test.tree.get_leaf_names()

[0, 2, 3, 4, 10, 11, 5, 12, 13, 6, 7, 8, 14, 15, 1, 9, 16, 17, 18, 19]

In [114]:
current_test.toytree.get_tip_labels()

['t-0',
 't-1',
 't-2',
 't-3',
 't-4',
 't-5',
 't-6',
 't-7',
 't-8',
 't-9',
 't-10',
 't-11',
 't-12',
 't-13',
 't-14',
 't-15',
 't-16',
 't-17',
 't-18',
 't-19']

In [115]:
2+4

6