# TME 3

## 1. Introduction


In [1]:
import math
from dataclasses import dataclass

import numpy as np
from ete3 import Tree


In [2]:
tree1 = Tree('((((Electrode,Magnezone),Porygon-Z),((((Aggron,Bastiodon),Forretress),Ferrothorn),((((Regirock,Regice),Registeel),Metagross),Klinklang),Genesect)),Probopass);')
print(tree1)



            /-Electrode
         /-|
      /-|   \-Magnezone
     |  |
     |   \-Porygon-Z
     |
     |            /-Aggron
     |         /-|
     |      /-|   \-Bastiodon
   /-|     |  |
  |  |   /-|   \-Forretress
  |  |  |  |
  |  |  |   \-Ferrothorn
  |  |  |
  |  |  |            /-Regirock
  |  |  |         /-|
  |  |  |      /-|   \-Regice
--|   \-|     |  |
  |     |   /-|   \-Registeel
  |     |  |  |
  |     |--|   \-Metagross
  |     |  |
  |     |   \-Klinklang
  |     |
  |      \-Genesect
  |
   \-Probopass


In [3]:
tree2 = Tree('(((((Regirock,Regice),Registeel),((Metagross,Klinklang),Genesect)),(((Aggron,Bastiodon),(Forretress,Ferrothorn)),Probopass)),(Porygon-Z,(Magnezone,Electrode)));')
print(tree2)



               /-Regirock
            /-|
         /-|   \-Regice
        |  |
        |   \-Registeel
      /-|
     |  |      /-Metagross
     |  |   /-|
     |   \-|   \-Klinklang
     |     |
     |      \-Genesect
   /-|
  |  |         /-Aggron
  |  |      /-|
  |  |     |   \-Bastiodon
  |  |   /-|
  |  |  |  |   /-Forretress
--|   \-|   \-|
  |     |      \-Ferrothorn
  |     |
  |      \-Probopass
  |
  |   /-Porygon-Z
   \-|
     |   /-Magnezone
      \-|
         \-Electrode


In [4]:
mutation_matrix = np.array([
  [0, 3, 4, 9],
  [3, 0, 2, 4],
  [4, 2, 0, 4],
  [9, 4, 4, 0]
])

pokemons = {
  'Probopass': 'A',
  'Aggron': 'T',
  'Bastiodon': 'T',
  'Regirock': 'G',
  'Registeel': 'G',
  'Regice': 'G',
  'Klinklang': 'G',
  'Metagross': 'C',
  'Genesect': 'A',
  'Porygon-Z': 'C',
  'Magnezone': 'C',
  'Forretress': 'T',
  'Electrode': 'A',
  'Ferrothorn': 'G'
}

nucleotides = ['A', 'C', 'G', 'T']


## 2. Sankoff algorithm

In [5]:
@dataclass
class NodeMetadata:
  distribution: np.ndarray
  origin: np.ndarray
  value: int = -1

def sankoff(tree: Tree):
  metadata = dict[Tree, NodeMetadata]()

  for node in tree.traverse('postorder'): # type: ignore
    if node.children:
      # axis 0: child
      # axis 1: possible nucleotide for child node
      d = np.array([metadata[child].distribution for child in node.children])

      # axis 2: possible nucleotide for current node
      m = d[..., np.newaxis] + mutation_matrix[np.newaxis, ...]

      metadata[node] = NodeMetadata(
        m.min(axis=1).sum(axis=0),
        m.argmin(axis=1)
      )
    else:
      metadata[node] = NodeMetadata(
        np.array([0 if pokemons[node.name] == nucleotide else math.inf for nucleotide in nucleotides]),
        np.array([])
      )

  for node in tree.traverse('preorder'): # type: ignore
    if node.up:
      child_index = node.up.children.index(node)
      nucleotide = metadata[node.up].origin[child_index, metadata[node.up].value]
    else:
      nucleotide = metadata[node].distribution.argmin()

    metadata[node].value = int(nucleotide) # int() is only for the type checker
    node.name = (f'{node.name} ' if node.name else str()) + f'[{nucleotides[nucleotide]}]'

  return metadata[tree].distribution[metadata[tree].value]


In [6]:
print(sankoff(tree1))
print(tree1.get_ascii(show_internal=True))


18.0

            /-Electrode [A]
         /[C]
      /[C]  \-Magnezone [C]
     |  |
     |   \-Porygon-Z [C]
     |
     |            /-Aggron [T]
     |         /[T]
     |      /[T]  \-Bastiodon [T]
   /[C]    |  |
  |  |   /[G]  \-Forretress [T]
  |  |  |  |
  |  |  |   \-Ferrothorn [G]
  |  |  |
  |  |  |            /-Regirock [G]
  |  |  |         /[G]
  |  |  |      /[G]  \-Regice [G]
-[A]  \[G]    |  |
  |     |   /[G]  \-Registeel [G]
  |     |  |  |
  |     |-[G]  \-Metagross [C]
  |     |  |
  |     |   \-Klinklang [G]
  |     |
  |      \-Genesect [A]
  |
   \-Probopass [A]


In [7]:
print(sankoff(tree2))
print(tree2.get_ascii(show_internal=True))


21.0

               /-Regirock [G]
            /[G]
         /[G]  \-Regice [G]
        |  |
        |   \-Registeel [G]
      /[C]
     |  |      /-Metagross [C]
     |  |   /[C]
     |   \[C]  \-Klinklang [G]
     |     |
     |      \-Genesect [A]
   /[C]
  |  |         /-Aggron [T]
  |  |      /[T]
  |  |     |   \-Bastiodon [T]
  |  |   /[T]
  |  |  |  |   /-Forretress [T]
-[C]  \[C]  \[T]
  |     |      \-Ferrothorn [G]
  |     |
  |      \-Probopass [A]
  |
  |   /-Porygon-Z [C]
   \[C]
     |   /-Magnezone [C]
      \[C]
         \-Electrode [A]
