In [1]:
from ete3 import Tree
from pysam import FastaFile,FastxFile
import sys
import getopt
from operator import add
from pyvis.network import Network
import matplotlib.pyplot as plt

In [2]:
''' function to calculate the total indel events at each node of the tree'''
def count_mutations(p,n,c1,c2):
    i3 = 0
    for i in range(0,len(n)):
        if n[i] != p[i] and n[i] != c1[i] and n[i] != c2[i]:
            i3 += 1

    if i3 > 0:
        return 1
    else:
        return 0

def find_3_mutation_away_pos(n,c1,c2,p):
    poss = []
    i3 = 0
    for i in range(0,len(n)):
        if n[i] != p[i] and n[i] != c1[i] and n[i] != c2[i]:
            poss.append(i)
            i3 += 1
    print(f"position {poss}")
    
def count_indel_events(tree,indelfastafile):

    total_ancestors = 0
    total_i3_mut = 0
    extra_info = []

    # traverse tree
    indel_pattern = FastaFile(indelfastafile)

    for n in tree.traverse():
        if n.is_leaf() == False:
            total_ancestors += 1
            current_node = n.name
            current_node_sequence = indel_pattern.fetch(current_node)
            child_seq_1 = indel_pattern.fetch(n.children[0].name)
            child_seq_2 = indel_pattern.fetch(n.children[1].name)

            if n.is_root() == False: # if root then parent sequence same as the root
                parent_node  = n.up.name
                parent_node_sequence = indel_pattern.fetch(parent_node)
            else:
                parent_node_sequence = current_node_sequence

            i3_mutations  = count_mutations(parent_node_sequence,current_node_sequence,child_seq_1,child_seq_2)
            if i3_mutations > 0:
                print(f"node name {n.name}")
                extra_info.append([n.name,n.children[0].name,n.children[1].name,n.up.name])
                find_3_mutation_away_pos(current_node_sequence,child_seq_1,child_seq_2,parent_node_sequence)
            total_i3_mut +=  i3_mutations
                
    return total_i3_mut , round((total_i3_mut / total_ancestors)* 100,2),extra_info


def network_plot(highlight_node,highlight_node_label,other_nodes,other_nodes_color,other_nodes_label,all_edges):
    net = Network(
        notebook=True, directed=True,bgcolor = "white", font_color = "black",cdn_resources = 'in_line' ,
        height = "1000px",          # height of chart
        width = "100%",             # fill the entire width   
    )

    # all nodes
    net.add_nodes(other_nodes,color=other_nodes_color,label=other_nodes_label)

    # highlighted node
    net.add_node(highlight_node,color="red",label=highlight_node_label)

    # all other edges
    net.add_edges(all_edges)

    net.repulsion(
        node_distance=100,
        central_gravity=0.2,
        spring_length=200,
        spring_strength=0.05,
        damping=0.01,
    )
    return net

def get_mini_tree(highlight_node,tree):
    ## traverse down
    for n in tree.traverse():
        n.add_feature('mtree',0)

    i = 1
    all_edges   = []
    for n in tree.traverse():
        if n.is_leaf() == False and n.is_root() == False:
            if n.name == highlight_node or n.up.mtree > 0:
                n.add_feature('mtree',i)
                all_edges.append((n.name,n.children[0].name,i))
                all_edges.append((n.name,n.children[1].name,i))
                i = i + 1
                if i == 5:
                    break

    ## traverse up
    for n in tree.traverse():
        n.add_feature('mtree',0)

    i = 1 
    for n in tree.traverse('postorder'):
        if n.is_leaf() == False and n.is_root() == False:
            if n.name == highlight_node or n.children[0].mtree > 0 or n.children[1].mtree > 0:
                n.add_feature('mtree',i)
                all_edges.append((n.up.name,n.name))
                i = i + 1
                if i == 5:
                    break
                
    # nodes
    other_nodes = []
    for e in all_edges:
        other_nodes.append(e[0])
        other_nodes.append(e[1])
    other_nodes = list(set(other_nodes))
    other_nodes.remove(highlight_node)
    
    return other_nodes,all_edges

### GENERAL

In [3]:
folder_location         = '/Users/sanjanatule/Documents/uq/Projects/Indels/indelmip/data/'
tree_name               = 'MBL_624'
nwk_file_path           = folder_location + tree_name + '/MBL_624_ancestors.nwk'

# load the tree
tree_file = open(nwk_file_path,"r")
my_tree = tree_file.read() + ";"
tree = Tree(my_tree, format=1)

### MIP

In [4]:
# MIP
indel_fasta_solution_file          = folder_location + tree_name + '/mip_ancestor_indel.fasta'  # indels from MIP
indel_pattern        = FastaFile(indel_fasta_solution_file)
count_indel_events(tree,indel_fasta_solution_file)

node name N477
position [1497]


(1,
 0.16,
 [['N477',
   'tr|A0A0G4J057|A0A0G4J057_PLABS',
   'tr|M2W3R7|M2W3R7_GALSU',
   'N433']])

In [5]:
highlight_node = 'N477'
start_pos,end_pos = 1494,1498

# create mini tree
other_nodes,all_edges = get_mini_tree(highlight_node,tree)

# labels and colors
other_nodes_label = [ i + '    ' + indel_pattern[i][start_pos:end_pos] for i in other_nodes]
other_nodes_color = ['lightgrey' for i in other_nodes]
highlight_node_label = highlight_node + '    ' + indel_pattern[highlight_node][start_pos:end_pos]

# plot
net = network_plot(highlight_node,highlight_node_label,other_nodes,other_nodes_color,other_nodes_label,all_edges)

net.show('MIP_MBL_624.html')

MIP_MBL_624.html


### PSP

In [6]:
# PSP
indel_fasta_solution_file     = folder_location + tree_name + '/psp_grasp_all_indel.fasta' # indels from grasp
indel_pattern        = FastaFile(indel_fasta_solution_file)
count_indel_events(tree,indel_fasta_solution_file)

node name N389
position [347, 348, 349, 350, 2755]
node name N93
position [3789]
node name N94
position [3789]
node name N376
position [2245, 2246]
node name N320
position [5001]
node name N336
position [2841, 2842]
node name N368
position [2841, 2842]
node name N99
position [2729]
node name N204
position [2880]
node name N195
position [2729, 2730]
node name N243
position [3677]
node name N287
position [2247, 2248]
node name N119
position [1168]
node name N212
position [3697]
node name N149
position [1168]


(15,
 2.41,
 [['N389', 'N390', 'N392', 'N91'],
  ['N93', 'N94', 'N373', 'N92'],
  ['N94', 'N95', 'N319', 'N93'],
  ['N376', 'N377', 'N386', 'N375'],
  ['N320', 'N321', 'N336', 'N319'],
  ['N336', 'N337', 'N368', 'N320'],
  ['N368', 'N369', 'N371', 'N336'],
  ['N99', 'N100', 'tr|A0A1U7LSY1|A0A1U7LSY1_NEOID', 'N98'],
  ['N204', 'N205', 'N246', 'N203'],
  ['N195', 'N196', 'tr|A0A0L0HHK0|A0A0L0HHK0_SPIPD', 'N181'],
  ['N243', 'tr|A0A067M497|A0A067M497_9AGAM', 'N244', 'N206'],
  ['N287', 'N288', 'tr|F2U181|F2U181_SALR5', 'N286'],
  ['N119', 'N120', 'N149', 'N118'],
  ['N212', 'N213', 'tr|A0A0C3S5J6|A0A0C3S5J6_PHLGI', 'N211'],
  ['N149', 'N150', 'tr|W3X335|W3X335_PESFW', 'N119']])

In [7]:
highlight_node = 'N389'
start_pos,end_pos    = 346,351

# create mini tree
other_nodes,all_edges = get_mini_tree(highlight_node,tree)

# labels and colors
other_nodes_label = [ i + '    ' + indel_pattern[i][start_pos:end_pos] for i in other_nodes]
other_nodes_color = ['lightgrey' for i in other_nodes]
highlight_node_label = highlight_node + '    ' + indel_pattern[highlight_node][start_pos:end_pos]

# plot
net = network_plot(highlight_node,highlight_node_label,other_nodes,other_nodes_color,other_nodes_label,all_edges)
net.show('PSP_MBL_624.html') # 4 positions - 347, 348, 349, 350 in N389

PSP_MBL_624.html


### SICP

In [10]:
# SICP
indel_fasta_solution_file     = folder_location + tree_name + '/sicp_grasp_all_indel.fasta' # indels from grasp
indel_pattern        = FastaFile(indel_fasta_solution_file)
count_indel_events(tree,indel_fasta_solution_file)

node name N91
position [3610]
node name N92
position [2818, 2819, 2820, 2821, 2822, 2825]
node name N588
position [3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 3625, 3627, 3629, 3631, 3632, 3633, 3634, 3635, 3637, 3638, 3639, 3640, 3645, 3646, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3685, 3730, 3731, 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, 3740, 3741, 3768, 3770, 3771, 3772, 3789, 3791]
node name N84
position [34, 35, 369, 389, 754, 755, 2011, 2012, 2017, 2018, 2056, 2146, 2147, 2939, 2972, 2973, 2993]
node name N373
position [1191, 1192]
node name N312
position [1973]
node name N395
position [3865, 3866, 3867]
node name N407
position [3399, 3400, 3401, 3402, 3403, 3404, 3408, 3409, 3410, 3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3422, 3423, 3

(17,
 2.73,
 [['N91', 'N92', 'N389', 'N81'],
  ['N92', 'N93', 'N374', 'N91'],
  ['N588', 'tr|A0A1F1SHB6|A0A1F1SHB6_9NEIS', 'N589', 'N587'],
  ['N84', 'N85', 'N87', 'N83'],
  ['N373',
   'tr|A0A1X7V715|A0A1X7V715_AMPQE',
   'tr|A0A6P8Z0R5|A0A6P8Z0R5_THRPL',
   'N93'],
  ['N312', 'N313', 'N316', 'N95'],
  ['N395', 'tr|A0A0B2UJL3|A0A0B2UJL3_9MICR', 'tr|Q8SVD6|Q8SVD6_ENCCU', 'N394'],
  ['N407', 'N408', 'N421', 'N406'],
  ['N202', 'N203', 'N256', 'N97'],
  ['N251', 'N252', 'N255', 'N250'],
  ['N467', 'N468', 'tr|A0A2N1NYS1|A0A2N1NYS1_9GLOM', 'N438'],
  ['N177', 'N178', 'N179', 'N174'],
  ['N249',
   'tr|A0A316VAN0|A0A316VAN0_9BASI',
   'tr|A0A316Z1F6|A0A316Z1F6_9BASI',
   'N246'],
  ['N106', 'N107', 'tr|F2QUU4|F2QUU4_KOMPC', 'N105'],
  ['N110', 'tr|A0A0J9XI61|A0A0J9XI61_GEOCN', 'N111', 'N109'],
  ['N143', 'N144', 'tr|A0A6P8ARP7|A0A6P8ARP7_MAGGR', 'N121'],
  ['N217', 'N218', 'tr|J4G319|J4G319_9APHY', 'N216']])

In [11]:
highlight_node = 'N588'
start_pos,end_pos = 3602,3626


# create mini tree
other_nodes,all_edges = get_mini_tree(highlight_node,tree)

# labels and colors
other_nodes_label = [ i + '    ' + indel_pattern[i][start_pos:end_pos] for i in other_nodes]
other_nodes_color = ['lightgrey' for i in other_nodes]
highlight_node_label = highlight_node + '    ' + indel_pattern[highlight_node][start_pos:end_pos]

# plot
net = network_plot(highlight_node,highlight_node_label,other_nodes,other_nodes_color,other_nodes_label,all_edges)

net.show('MIP_MBL_624.html')

MIP_MBL_624.html
