# Project 1 Phylogenetics

In [41]:
import matplotlib.pyplot as plt
import numpy as np
import math

In [42]:
"""
Constructs a transition Matrix with a specified alpha level a
Args:
     a: alpha level for the Jukes Cantor Matrix
Returns:
     Transition Matrix corresponding to the Jukes-Cantor Algorithm
"""

def transition_matrix(a):
    b = a/3
    M = np.array([[1-a, b, b, b],
                 [b, 1-a, b, b],
                 [b, b, 1-a, b],
                 [b, b, b, 1-a]])
    return M

def rand_vector(n):
    a = np.random.rand(n)
    return a/sum(a)

"""
Finds the equilibrium point of a transition Matrix
Args:
    M: the transition Matrix for the Jukes Cantor Algorithm 
Returns:
    The equilibrium point of a transition Matrix
"""

def find_eq(M):
    D, V = np.linalg.eig(M)
    for x in xrange(D.size):
        if abs(D[x] - 1) < .0001:
            return V[:, x]/sum(V[:, x])

"""
Counts the number of time steps until the Jukes-Counter Algorithm
raches a steady state.
Args:
          M: the transition Matrix for the Jukes Cantor Algorithm 
        p_t: the initial probability vector specified by 4.4.3
    epsilon: the acceptable error bound on the equilibrium value

Returns:
    Number of iterations required for the model to converge 
    to within epsilon of the equilibrium value
"""
def counter(epsilon, p_t, M):
    p_eq = find_eq(M)
    def is_within_epsilon(p_t):
        t = True
        for i in xrange(p_t.size):
            t = t and abs(p_eq[i] - p_t[i]) < epsilon
        return t
    count = 0
    while not is_within_epsilon(p_t):
        p_t = M.dot(p_t)
        count += 1
    return count

"""
Performs simulations for all of the specific examples in problem 443
Args:
          p: probability vector
Returns:
    Number of iterations required for the model to converge 
    to within epsilon of the equilibrium value
"""
def problem_443(p):
    M     = transition_matrix(.3)
    large = counter(0.05, p, M)
    small = counter(0.01, p, M)
    print('------------------------------------------------------------')
    print('number of iterations to get within epsilon = .05 is:')
    print(large)
    print('-------------------------------------------------------------')
    print('number of iterations to get with epsilon = .01 is:')
    print(small)
    print('-------------------------------------------------------------')


"""
Performs simulations for a set of random probability vectors

Returns:
    Average Number of iterations required for the model to converge 
    to within epsilon of the equilibrium value
"""
def simulate_convergence(a, iter):
    small = np.zeros(iter)
    large = np.zeros(iter)
    M     = transition_matrix(a)
    
    for x in xrange(iter):
        p      = rand_vector(4)
        large[x] = counter(0.05, p, M)
        small[x] = counter(0.01, p, M)
    return [small, large]

# Problem 3a)

 How many time steps does it take to reach equillibrium for   $ p_0 = [.2,.3,.4,.1] $

In [43]:
p_0 = np.array([.2,.3,.4,.1])
problem_443(p_0)

------------------------------------------------------------
number of iterations to get within epsilon = .05 is:
3
-------------------------------------------------------------
number of iterations to get with epsilon = .01 is:
6
-------------------------------------------------------------


# Problem 3b)

Making other choices for $p_0$

In [44]:
p_1 = np.array([.5,.25,.25,0])
problem_443(p_1)

------------------------------------------------------------
number of iterations to get within epsilon = .05 is:
4
-------------------------------------------------------------
number of iterations to get with epsilon = .01 is:
7
-------------------------------------------------------------


In [48]:
p_2 = np.array([.45,.25,.15,.15])
problem_443(p_2)

------------------------------------------------------------
number of iterations to get within epsilon = .05 is:
3
-------------------------------------------------------------
number of iterations to get with epsilon = .01 is:
6
-------------------------------------------------------------


In [49]:
p_3 = np.array([0,.75,.125,.125])
problem_443(p_3)

------------------------------------------------------------
number of iterations to get within epsilon = .05 is:
5
-------------------------------------------------------------
number of iterations to get with epsilon = .01 is:
8
-------------------------------------------------------------


# Problem 3c) 

Using $p_0 = [.25, .25, .25, .25]$, what do you observe? why?

In [47]:
p_0 = np.array([.25,.25,.25,.25])
problem_443(p_0)

------------------------------------------------------------
number of iterations to get within epsilon = .05 is:
0
-------------------------------------------------------------
number of iterations to get with epsilon = .01 is:
0
-------------------------------------------------------------


 ###### Explanation:

# Problem 3d)

Using $p_0 = [0,1,0,0]$ what do you observe?  What is the biologocal meaning of this $p_0$? 

In [51]:
p_0 = np.array([0,1,0,0])
problem_443(p_0)

------------------------------------------------------------
number of iterations to get within epsilon = .05 is:
6
-------------------------------------------------------------
number of iterations to get with epsilon = .01 is:
9
-------------------------------------------------------------


 ###### Explanation:

# Problem 4a)