In [3]:
import networkx
import sys
from ete3 import Tree


def read_phylogenetic_tree(path: str) -> list[str]:
    with open(path, "r") as f:
        tree_str = f.read()
    return [x.replace("\n", "") + ";" for x in tree_str.split(";")][:-1]

newick_strings = read_phylogenetic_tree("handins/handin1/newick.tre")
newick_strings[0:10]

['(Atrichantha_elsiae,(Atrichantha_gemmifera,Calotesta_alba,(Metalasia_oligocephala,(Metalasia_bodkinii,(Metalasia_humilis,(Metalasia_quinqueflora,Metalasia_lichtensteinii,((Metalasia_tenuis,Metalasia_tenuifolia),(Metalasia_phillipsii,(((Metalasia_serrata,Metalasia_cymbifolia),Metalasia_seriphiifolia,Metalasia_erubescens),((((Metalasia_serrulata,Metalasia_rogersii),Metalasia_albescens),Metalasia_juniperoides),(Metalasia_rhoderoides,Metalasia_confusa,Metalasia_riparia),(Metalasia_compacta,((((Metalasia_agathosmoides,Metalasia_fastigiata),Metalasia_adunca),Metalasia_capitata),((Metalasia_brevifolia,Metalasia_densa),(Metalasia_muraltiifolia,(Metalasia_pulchella,(Metalasia_cephalotes,(Metalasia_divergens,Metalasia_inversa)))))))))))))))Metalasia));',
 '(Poaceae,(((Apiaceae,Asteraceae),(((Brassicaceae,Fabaceae),Solanaceae),Caprifoliaceae)),(Chenopodiaceae,Polygonaceae)));',
 '(Poaceae,((((Apiaceae,Asteraceae),Caprifoliaceae),((Brassicaceae,Fabaceae),Solanaceae)),(Chenopodiaceae,Polygonaceae

In [4]:
class AdjacencyList:
    # directed unweighted adjacency list representation of a graph
    # stores all outgoing edges for each vertex
    # allows for edges going from a vertex to it self (so far)
    def __init__(self) -> None:
        self.adjList = {}

    def add_vertex(self, vertex: str) -> None:
        if vertex not in self.adjList.keys():
            self.adjList[vertex] = []  # create the vertex if it does not already exists

    def add_edge(self, start_vertex: str, end_vertex: str) -> None:
        if (start_vertex not in self.adjList.keys()) or (
            end_vertex not in self.adjList.keys()
        ):
            raise Exception("Some provided vertex does not exists in the graph")
        elif start_vertex == end_vertex:
            raise Exception("A vertex can't have an edge to itself")
        else:
            self.adjList[start_vertex].append(end_vertex)

    def __str__(self) -> str:
        result = ""
        for vertex, neighbors in self.adjList.items():
            result += f"{vertex}: {neighbors}\n"
        return result

# Newick Strings

## 1. Write a Python script to extract the phylogenetic trees into a graph representation

In [5]:
graph_representations1 = []  # to store the adjacency lists

for tree in newick_strings:
    tree = Tree(tree, format=8)
    adjList = AdjacencyList()

    # give labels to intermediate nodes with no labels
    name_iterator = 0
    for node in tree.traverse():
        if node.name == "":
            node.name = str(
                name_iterator
            )  # give unlabelled intermediate nodes a unique label

        adjList.add_vertex(node.name)
        name_iterator += 1

    # add edges
    for node in tree.traverse():
        if children := node.get_children():  # if current node is not a leaf node
            for (
                child_node
            ) in children:  # add edges going to each child from current node
                adjList.add_vertex(child_node.name)
                adjList.add_edge(start_vertex=node.name, end_vertex=child_node.name)

    graph_representations1.append(adjList)

In [11]:
print(graph_representations1[1])

0: ['Poaceae', '2']
Poaceae: []
2: ['3', '4']
3: ['5', '6']
4: ['Chenopodiaceae', 'Polygonaceae']
5: ['Apiaceae', 'Asteraceae']
6: ['11', 'Caprifoliaceae']
Chenopodiaceae: []
Polygonaceae: []
Apiaceae: []
Asteraceae: []
11: ['13', 'Solanaceae']
Caprifoliaceae: []
13: ['Brassicaceae', 'Fabaceae']
Solanaceae: []
Brassicaceae: []
Fabaceae: []



## 2. How many phylogenetic trees are there?

In [7]:
print(f"There are in total: {len(newick_strings)} phylogenetic trees")

There are in total: 100 phylogenetic trees


## 3. Are these phylogenetic trees rooted or unrooted?

In [8]:
n_unrooted_trees = 0
#counter = 0

for tree in newick_strings:
    
    t = Tree(tree, format=8)
    
    for node in t.traverse():
        if len(node.children) > 2:
            n_unrooted_trees += 1
            #print(counter)
        break
    #counter += 1

print(f"There are {n_unrooted_trees} unrooted trees\nand {len(newick_strings) - n_unrooted_trees} rooted trees")

There are 7 unrooted trees
and 93 rooted trees


There are 93 rooted trees and 7 unrooted (with indicies 27,31,32,36,56,58,95 in newick_strings variable)

# Enewick strings

In [9]:
enewick_strings = read_phylogenetic_tree("handins/handin1/enewick.tre")
enewick_strings[0:10]

['(#1,((((2,3),1))#1,4));',
 '((#1,4),((1)#1,(2,3)));',
 '((#1,4),(((3)#1,2),1));',
 '((#1,4),(((2,3))#1,1));',
 '((#1,4),(((2)#1,3),1));',
 '((#1,((1)#1,4)),(2,3));',
 '(((#1,4),1),((3)#1,2));',
 '(#1,((((2,3))#1,4),1));',
 '(((#1,4),1),((2)#1,3));',
 '(#1,((((1)#1,4),3),2));']

## 4. 

## 5. How many phylogenetic networks are there?

In [10]:
print(f"There are in total: {len(enewick_strings)} phylogenetic trees")

There are in total: 100 phylogenetic trees
