### Evaluate MIP 2 solution ::
1. GAPS between extant and the top ancestor node.
2. Number of I1,I2,I3 mutations count.
3. Number of Unidirectional / Bidirectional edges in the solution.


Input files needed:
1. GRASP Tree
2. GRASP Ancestors (includes extants)
3. MIP Ancestors
4. pogs JSON

Created by: Sanjana Tule

Date: 14 Sep 2022

In [1]:
import re
import json
from pysam import FastaFile,FastxFile
from ete3 import Tree
from operator import add



## DEFINE FUNCTIONS

### COUNT GAPS

In [13]:
''' EVALUATION 1'''
''' THIS BLOCK COUNTS THE GAP BETWEEN EXTANTS AND MIP 2 ANCESTOR NODE '''

def count_gaps(seq_str,output_method):
    if output_method == 'grasp':
        str1 =  re.sub('[a-zA-Z]', 'a', seq_str)
    if output_method == 'mip2':
        str1 =  re.sub('0', '-', seq_str)
    return str1.count('-')

def counts_ancestor_gaps(mip2_fasta):
    
    # get gap from MIP2
    sequences_info = FastaFile(mip2_fasta)
    mip_top_ancestor_gap = count_gaps(sequences_info.fetch('NODE_0'),'mip2')
    
    return mip_top_ancestor_gap

In [12]:
mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/MIPIndel/data/CYP2U_165/mip_ancestor_indel.fasta"
counts_ancestor_gaps(mip2_fasta)

106

### MUTATION COUNTS

In [3]:
def count_mutation(p,n,c1,c2):
    
    # convert amino acids into 1
    p =  re.sub('[a-zA-Z]', '1', p)
    n =  re.sub('[a-zA-Z]', '1', n)
    c1 =  re.sub('[a-zA-Z]', '1', c1)
    c2 =  re.sub('[a-zA-Z]', '1', c2)
        
#     print("parent",p)
#     print("current node",n)
#     print("child 1",c1)
#     print("child 2",c2)
    
    tn,i1a,i1b,i2a,i2b,i3 = 0,0,0,0,0,0
    
    for i in range(0,len(n)):
        if n[i] == p[i]: #(node and parent are equal)
            if c1[i] == c2[i] and c1[i] != n[i]:
                i2b = i2b + 1
            if c1[i] != c2[i]:
                i1b = i1b + 1
        elif n[i] != p[i]: #(node not equal to parent)
            if c1[i] != c2[i]:
                i2a = i2a + 1
            if c1[i] == c2[i] and c1[i] == n[i]:
                i1a = i1a + 1
            if c1[i] == c2[i] and c1[i] != n[i]:
                i3 = i3 + 1
        #print([tn,i1a,i1b,i2a,i2b,i3])
    
    if i1a == 0 and i1b == 0 and i2a == 0 and i2b == 0 and i3 == 0:
        tn = 0
    else:
        tn = 1 
    #print("mutation",[tn,i1a,i1b,i2a,i2b,i3])
    return [tn,i1a,i1b,i2a,i2b,i3]

print("NO MUTATIONS")
print("No mutations",count_mutation('1','1','1','1'))

## ONE FLIP
print("ONE FLIP")
print("I1A",count_mutation('1','0','0','0'))
print("I1A",count_mutation('0','1','1','1'))

## HALF KIDS CHANGE
print('HALF KIDS CHANGE')
print("I1B",count_mutation('0','0','1','0'))
print("I1B",count_mutation('0','0','0','1'))
print("I1B",count_mutation('1','1','1','0'))
print("I1B",count_mutation('1','1','0','1'))

## CHAOS
print('CHAOS')
print("I2A",count_mutation('1','0','1','0'))
print("I2A",count_mutation('1','0','0','1'))
print("I2A",count_mutation('0','1','1','0'))
print("I2A",count_mutation('0','1','0','1'))

### KIDS CHANGE
print("KIDS CHANGE")
print("I2B",count_mutation('1','1','0','0'))
print("I2B",count_mutation('0','0','1','1'))

### total flip ###
print("TOTAL FLIP")
print("I3",count_mutation('1','0','1','1'))
print("I3",count_mutation('0','1','0','0'))

NO MUTATIONS
No mutations [0, 0, 0, 0, 0, 0]
ONE FLIP
I1A [1, 1, 0, 0, 0, 0]
I1A [1, 1, 0, 0, 0, 0]
HALF KIDS CHANGE
I1B [1, 0, 1, 0, 0, 0]
I1B [1, 0, 1, 0, 0, 0]
I1B [1, 0, 1, 0, 0, 0]
I1B [1, 0, 1, 0, 0, 0]
CHAOS
I2A [1, 0, 0, 1, 0, 0]
I2A [1, 0, 0, 1, 0, 0]
I2A [1, 0, 0, 1, 0, 0]
I2A [1, 0, 0, 1, 0, 0]
KIDS CHANGE
I2B [1, 0, 0, 0, 1, 0]
I2B [1, 0, 0, 0, 1, 0]
TOTAL FLIP
I3 [1, 0, 0, 0, 0, 1]
I3 [1, 0, 0, 0, 0, 1]


In [14]:
''' EVALUATION 2  - THIS BLOCK COUNTS DIFFERENT KINDS OF MUTATIONS AT A BRANCH POINT '''

def count_different_mutations(ancestor_fasta):
    
    total_mut = [0,0,0,0,0,0] #nodes,i1a,i1b,i2a,i2b,i3
    
    # load the tree
    tree_file = open(nwk_file_path,"r")
    my_tree = tree_file.read() + ";"
    tree = Tree(my_tree, format=1)
    
    # traverse tree
    sequences_info = FastaFile(ancestor_fasta)
    
    for n in tree.traverse():
        if n.is_leaf() == False:  
            #print("NODE NAME",n.name)
            current_node = n.name
            current_node_sequence = sequences_info.fetch(current_node)
            child_seq_1 = sequences_info.fetch(n.children[0].name)
            child_seq_2 = sequences_info.fetch(n.children[1].name)
            
            if n.is_root() == False: # if root then parent sequence same as the root
                parent_node  = n.up.name
                parent_node_sequence = sequences_info.fetch(parent_node)
            else:
                parent_node_sequence = current_node_sequence
            
        # remove starting and ending for mip2
        if method == 'mip2':
            current_node_sequence = current_node_sequence[1:-1]
            child_seq_1 = child_seq_1[1:-1]
            child_seq_2 = child_seq_2[1:-1]
            parent_node_sequence = parent_node_sequence[1:-1]
            
        mut_ret  = count_mutation(parent_node_sequence,current_node_sequence,child_seq_1,child_seq_2)
        
        total_mut =  list( map(add, total_mut, mut_ret) )
    
       
    return total_mut

In [None]:
print(count_different_mutations(mip2_fasta)

## RUN EVALUATION

In [6]:
## CYP2U - 165
# nwk_file_path       = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_165/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_165/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_165/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_165/mip2_ancestors.fa"
# tree_name = 'cyp2u_165'

# # ## CYP2U - 359
# nwk_file_path       = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_359/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_359/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_359/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_359/\
#                                                 mip2_ancestors.fa"
# tree_name = 'cyp2u_359'


# ## CYP2U - 595
# nwk_file_path       = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_595/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_595/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_595/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/CYP2U_595/\
# mip2_ancestors.fa"
# tree_name = 'cyp2u_595'

# ## DHAD - 585
# nwk_file_path       = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_585/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_585/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_585/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_585/\
# mip2_ancestors.fa"
# tree_name = 'dhad_585'

# ## KARI - 1176
# nwk_file_path       = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/KARI_1176/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/KARI_1176/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/KARI_1176/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/KARI_1176/\
# mip2_ancestors.fa"
# tree_name = 'kari_1176' 


# ## GO - 399
# nwk_file_path       = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/GDH-GOx_399/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/GDH-GOx_399/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/GDH-GOx_399/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/GDH-GOx_399/mip2_ancestors.fa"
# tree_name = 'go_399' 


## DHAD - 1612
# nwk_file_path       = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_1612/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_1612/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_1612/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/DHAD_1612/\
# mip2_ancestors.fa"
# tree_name = 'dhad_1612' 

## sample tree 1
# nwk_file_path = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st1/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st1/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st1/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st1/mip2_ancestors.fa"
# tree_name = 'sample_tree'

## sample tree 5
nwk_file_path = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st5/grasp_ancestors.nwk'
grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st5/grasp_ancestors.fa"
pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st5/pogs.json'
mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st5/mip2_ancestors.fa"
tree_name = 'sample_tree5'


## sample tree 3
# nwk_file_path = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st3/grasp_ancestors.nwk'
# grasp_ancestor_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st3/grasp_ancestors.fa"
# pogs_file           = '/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st3/pogs.json'
# mip2_fasta = "/Users/sanjanatule/Documents/uq/Projects/PreferredPath/data/st3/mip2_ancestors.fa"
# tree_name = 'sample_tree3'

In [8]:
###### tn - total nodes with mutations
#i1a - 1 FLIP  from parent to current node
#i1b - 1 FLIP HALF KID CHANGE from current node to 1 child
#i2a - 2 FLIPS CHAOS , flip from parent to current node and current node to 1 child
#i2b - 2 FLIPS ALL KIDS CHANGE, change from parent to current node and current node to 2 childs
#i3  - 3 FLIPS COMPLETE FLIP


print("****** GRASP METHOD *********")  #i1a,i1b,i2a,i2b,i3
print(count_different_mutations(grasp_ancestor_fasta,nwk_file_path,'grasp'))

# print("****** MIP2 *********")
# print(count_different_mutations(mip2_fasta,nwk_file_path,'mip2'))



****** GRASP METHOD *********
[32, 19, 73, 0, 0, 0]


In [9]:
print("****** MIP2 *********")
print(count_different_mutations(mip2_fasta,nwk_file_path,'mip2'))

****** MIP2 *********
[32, 19, 73, 0, 0, 0]
