In [694]:
import pandas as pd
import numpy as np 
from itertools import permutations
import math
import random

In [695]:
#generate a tree to obtain a additive distance matrix 

class Vertex:
    def __init__(self, node):
        self.id = node
        self.adjacent = {}

    def __str__(self):
        return str([x.id for x in self.adjacent])

    def add_neighbor(self, neighbor, weight=0):
        self.adjacent[neighbor] = weight

    def get_connections(self):
        return self.adjacent.keys()  

    def get_id(self):
        return self.id

    def get_weight(self, neighbor):
        return self.adjacent[neighbor]

class Tree:
    def __init__(self):
        self.vert_dict = {}
        self.num_vertices = 0

    def __iter__(self):
        return iter(self.vert_dict.values())

    def add_vertex(self, node):
        self.num_vertices = self.num_vertices + 1
        new_vertex = Vertex(node)
        self.vert_dict[node] = new_vertex
        return new_vertex

    def get_vertex(self, n):
        if n in self.vert_dict:
            return self.vert_dict[n]
        else:
            return None

    def add_edge(self, frm, to, cost = 0):
        if frm not in self.vert_dict:
            self.add_vertex(frm)
        if to not in self.vert_dict:
            self.add_vertex(to)

        self.vert_dict[frm].add_neighbor(self.vert_dict[to], cost)
        self.vert_dict[to].add_neighbor(self.vert_dict[frm], cost)

    def get_vertices(self):
        return self.vert_dict.keys()

In [696]:
output = (np.array([random.randrange(1, 10) for _ in range(0, 5)]))

#generate n+1 random edge weights where n = # of nodes + a weight for a connecting branch

t = Tree()

t.add_vertex('a')
t.add_vertex('b')
t.add_vertex('c')
t.add_vertex('d')

t.add_edge('a', 'b', output[0] + output[1])   #dist between a to b
t.add_edge('a', 'c', output[0] + output[4] + output[2]) #distance between a, connecting limb, c
t.add_edge('a', 'd', output[0] + output[4] + output[3]) 
t.add_edge('b', 'c', output[1] + output[4] + output[2])
t.add_edge('b', 'd', output[1] + output[4] + output[3])
t.add_edge('c', 'd', output[2] + output[3])

In [697]:
# get dict of dicts for each node, neighbors, distance to neighbors 
def get_matrix(tree):
    D = {}
    for v in tree:
        vid = v.get_id()
        D[vid] = {vid : 0} # assign distance from itself to 0
        for w in v.get_connections():
            wid = w.get_id()
            D[vid][wid] = int(v.get_weight(w))
    return D
        
get_matrix(t)

{'a': {'a': 0, 'b': 8, 'c': 23, 'd': 20},
 'b': {'a': 8, 'b': 0, 'c': 17, 'd': 14},
 'c': {'a': 23, 'b': 17, 'c': 0, 'd': 13},
 'd': {'a': 20, 'b': 14, 'c': 13, 'd': 0}}

In [698]:
mat= pd.DataFrame.from_dict(get_matrix(t)) #convert to 2d matrix
mat

Unnamed: 0,a,b,c,d
a,0,8,23,20
b,8,0,17,14
c,23,17,0,13
d,20,14,13,0


In [709]:
#check if the matrix is additive using four point condition
def is_additive(matrix, labels):
    i = labels[0]
    j = labels[1] 
    k = labels[2] 
    l = labels[3] 
    result = ''
    if ((matrix[i][j] + matrix[k][l] <= matrix[i][k] + matrix[j][l]) and 
    (matrix[i][k] + matrix[j][l] == matrix[i][l] + matrix[j][k])):
        result = "Additive"
    else:
        result = "Not additive"
    return result 

labels = ['a','b','c','d']
is_additive(mat, labels)

'Additive'

## Neigbor join

In [701]:
import copy
clusters = []
for cluster in list(dm):
    clusters.append(cluster)
cluster2idx = { name : idx for idx, name in enumerate(clusters) }
idx2cluster = copy.deepcopy(clusters)
m = copy.deepcopy(dm).to_dict()
Z = np.empty((0,4), float)
result = ""
print(clusters)
#membership = [set([cluster2idx[name]]) for name in clusters]

['A', 'B', 'C', 'D']


In [702]:
M, N = dm.shape
new_cluster_idx = M+1
while len(clusters) > 1:
    print(result)
    print(m)
    idx1 = None
    idx2 = None
    ui = 0
    uj = 0
    Sd = 100000
    for i in clusters:
        for j in clusters:
            if i != j:
                ui = sum(m[i][k] for k in clusters)
                uj = sum(m[j][k] for k in clusters)
                curr = (M - 2) * m[i][j] - ui - uj
                if Sd > curr:
                    Sd = curr
                    idx1 = cluster2idx[i]
                    idx2 = cluster2idx[j]
    
    Dij = m[idx2cluster[idx1]][idx2cluster[idx2]]
    new_cluster = "(" + idx2cluster[idx1] +"." + idx2cluster[idx2] + ")"
    result = new_cluster
    m[new_cluster] = {}
    new_cluster_idx += 1
    #membership.append(membership[cluster2idx[idx1]] | membership[cluster2idx[idx2]])
    #Z = np.append(Z, np.array([[idx1, idx2, Sd]]))
    
    m[idx2cluster[idx1]][new_cluster] = (m[idx2cluster[idx1]][idx2cluster[idx2]] + (1/(M-2)) * (ui - uj))/2
    m[idx2cluster[idx2]][new_cluster] = (m[idx2cluster[idx1]][idx2cluster[idx2]] + (1/(M-2)) * (uj - ui))/2
    
    #if idx2cluster[idx1] in m: del m[idx2cluster[idx1]]
    #if idx2cluster[idx2] in m: del m[idx2cluster[idx2]]
    
    clusters.remove(idx2cluster[idx1])
    clusters.remove(idx2cluster[idx2])
    
    m[new_cluster][new_cluster] = 0
    for c in clusters:
        m[c][new_cluster] = (m[c][idx2cluster[idx1]] + m[c][idx2cluster[idx2]] - Dij)/2
        m[new_cluster][c] = m[c][new_cluster]
        for k in list(m[c].keys()):
            if k == idx2cluster[idx1] or k == idx2cluster[idx2]:
                del m[c][k]
        
    del cluster2idx[idx2cluster[idx1]]
    del cluster2idx[idx2cluster[idx2]]
    if idx2cluster[idx1] in m: del m[idx2cluster[idx1]]
    if idx2cluster[idx2] in m: del m[idx2cluster[idx2]]
    
    clusters.append(new_cluster)
    cluster2idx[new_cluster] = len(idx2cluster)
    idx2cluster.append(new_cluster)
        
    #M -= 1
    #N -= 1


{'A': {'A': 0, 'B': 72, 'C': 73, 'D': 74}, 'B': {'A': 72, 'B': 0, 'C': 75, 'D': 76}, 'C': {'A': 73, 'B': 75, 'C': 0, 'D': 77}, 'D': {'A': 74, 'B': 76, 'C': 77, 'D': 0}}
(A.B)
{'C': {'C': 0, 'D': 77, '(A.B)': 38.0}, 'D': {'C': 77, 'D': 0, '(A.B)': 39.0}, '(A.B)': {'(A.B)': 0, 'C': 38.0, 'D': 39.0}}
(C.(A.B))
{'D': {'D': 0, '(C.(A.B))': 39.0}, '(C.(A.B))': {'(C.(A.B))': 0, 'D': 39.0}}


In [703]:
print(result)

(D.(C.(A.B)))


In [704]:
##additive phlyogeny construction using degenerate triples

In [705]:
def additive_phylogeny(D,T):
    if D.shape == (2,2):
        T = {dm.index[0] : (dm.index[1], dm[dm.index[0]][dm.index[1]])}
        return T
    i,j,k,flag = degenerate_check(D)
    if



SyntaxError: invalid syntax (<ipython-input-705-b0db53ccc965>, line 6)