In [None]:
import numpy as np
import pandas as pd
import scipy as sp
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)

import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()
import tensorflow_probability as tfp
tfd = tfp.distributions

In [None]:
import optimizers
import sbn

def log_like_with(branch_id: int, branch_length: float, grad=False):
    saved_branch_length = branch_lengths[branch_id]
    branch_lengths[branch_id] = branch_length
    if grad:
        _, log_grad = inst.branch_gradients()[0]
        result = np.array(log_grad)[branch_id]
    else:
        result = np.array(inst.log_likelihoods())[0]
    branch_lengths[branch_id] = saved_branch_length
    return result

def phylo_log_like(x_arr):
    return np.array([log_like_with(2, x) for x in x_arr])

def grad_phylo_log_like(x_arr):
    return np.array([log_like_with(2, x, grad=True) for x in x_arr])

x_vals = np.linspace(0, 0.3, 100)
df = pd.DataFrame({"x": x_vals, "y": phylo_log_like(x_vals)})
df.plot(x="x", y="y", kind="line")

In [None]:
inst = sbn.instance("charlie")
inst.tree_collection = sbn.TreeCollection(
    [sbn.Tree.of_parent_id_vector([3, 3, 3])],
    ["mars", "saturn", "jupiter"])
inst.read_fasta_file('../data/hello.fasta')
inst.make_beagle_instances(2)
branch_lengths_extended = np.array(inst.tree_collection.trees[0].branch_lengths,
                          copy=False)
branch_lengths_extended[:] = np.array([0.1, 0.1, 0.3, 0.])
# Here we are getting a slice that excludes the last (fake) element. 
# Thus we can just deal with the actual branch lengths.
branch_lengths = branch_lengths_extended[:len(branch_lengths_extended)-1]

def log_like_with(in_branch_lengths, grad=False):
    global branch_lengths
    saved_branch_lengths = branch_lengths.copy()
    branch_lengths[:] = in_branch_lengths
    if grad:
        _, log_grad = inst.branch_gradients()[0]
        result = np.array(log_grad)
    else:
        result = np.array(inst.log_likelihoods())[0]
        branch_lengths[:] = saved_branch_lengths
    return result

def phylo_log_like(x_arr):
    # TODO can do something better with some np mapping thing
    return np.array([log_like_with(x) for x in x_arr])

def grad_phylo_log_like(x_arr):
    return np.array([log_like_with(x, grad=True) for x in x_arr])

In [None]:
x_arr = np.array([[0.1, 0.1, x] for x in np.linspace(0, 0.3, 100)])
df = pd.DataFrame({"x": x_arr[:,2], "y": phylo_log_like(x_arr)})
df.plot(x="x", y="y", kind="line")

In [None]:
def gamma_factory(params):
    return tfp.distributions.Gamma(concentration=params[:,0], rate=params[:,1])

def lognormal_factory(params):
    return tfp.distributions.LogNormal(loc=params[:,0], scale=params[:,1])

def gamma_factory(params):
    return tfp.distributions.Gamma(concentration=params[0], rate=params[1])

def lognormal_factory(params):
    return tfp.distributions.LogNormal(loc=params[0], scale=params[1])

In [None]:
alpha = 2.
beta = 5.
gamma = tfd.Gamma(concentration=alpha, rate=beta)

def grad_log_like(x):
    with tf.GradientTape() as g:
        tf_x = tf.constant(x, dtype=tf.float32)
        g.watch(tf_x)
        return g.gradient(gamma.log_prob(tf_x), tf_x).numpy()

def log_like(x):
    return gamma.log_prob(x)
    
grad_log_like(np.array([0.3, 0.2]))
log_like(np.array([0.3, 0.2]))

In [None]:
class TFContinuousParameterModel:
    def __init__(self, q_factory, initial_params, params_count, particle_count, step_size=0.01):
        assert initial_params.ndim == 1
        self.q_factory = q_factory
        self.param_matrix = np.full((params_count, len(initial_params)), initial_params)
        #self.param_matrix = np.copy(initial_params)
        self.particle_count = particle_count
        self.step_size = step_size
        # The current stored sample.
        self.x = None
        # The gradient of x with respect to the parameters of q.
        self.grad_x = None
        # The stochastic gradient of log sum q for x.
        self.grad_log_sum_q = None
        
        
    def sample(self):
        with tf.GradientTape(persistent=True) as g:
            tf_params = tf.constant(self.param_matrix, dtype=tf.float32)
            g.watch(tf_params)
            q_distribution = self.q_factory(tf_params)
            tf_x = q_distribution.sample(self.particle_count)
            q_term = tf.math.reduce_sum(tf.math.log(q_distribution.prob(tf_x)))
        self.x = tf_x.numpy()
        # The Jacobian is laid out as particles x edges x edges x params.
        self.grad_x = np.sum(g.jacobian(tf_x, tf_params).numpy(), axis=2)
        self.grad_log_sum_q = g.gradient(q_term, tf_params).numpy()
        del g  # Should happen anyway but being explicit to remember.
        return self.x
    
    
    def clear_sample(self):
        self.x = None
        self.grad_x = None
        self.grad_log_sum_q = None

    
    def elbo_gradient_using_current_sample(self, grad_log_p_x):
        assert self.grad_x is not None
        # Chain rule for the first term.
        unnormalized_result = np.matmul(grad_log_p_x, self.grad_x) - self.grad_log_sum_q
        return unnormalized_result / self.particle_count
    
    
    def elbo(self, target_log_like, max_x = 0.5):
        q_distribution = self.q_factory(self.param_matrix)
        min_x = max_x/100
        x_vals = np.linspace(min_x, max_x, 100)
        p_log_likes = target_log_like(x_vals)
        q_log_probs = q_distribution.log_prob(x_vals)
        return np.sum(sp.special.softmax(q_log_probs) * (p_log_likes - q_log_probs))

    
    def gradient_step(self, grad_log_p_x, history = None):
        grad = self.elbo_gradient_using_current_sample(grad_log_p_x)
        self.param_matrix += self.step_size * grad
        self.clear_sample()
        if history is not None:
            history.append(np.concatenate([self.param_matrix]))
    
    
    def plot(self, target_log_like, max_x = 0.5):
        min_x = max_x/100
        x_vals = np.linspace(min_x, max_x, 100)
        q_distribution = self.q_factory(self.param_matrix)
        df = pd.DataFrame({
            "x": x_vals, 
            "target": sp.special.softmax(target_log_like(x_vals)),
            "fit": sp.special.softmax(q_distribution.log_prob(x_vals).numpy())})
        df.plot(x="x", y=["target", "fit"], kind="line", 
                title=q_distribution._name+" "+str(self.param_matrix))

    
#m = TFContinuousParameterModel(gamma_factory, np.array([2., 12.]), 3, 100, step_size=0.05)
m = TFContinuousParameterModel(lognormal_factory, np.array([-2., 0.5]), 3, 5)
print(m.__dict__)

In [None]:
m.sample()

In [None]:
phylo_log_like(m.x)

In [None]:
np.matmul(grad_log_p_x, self.grad_x)

In [None]:
m.param_matrix

In [None]:
m.x.shape

In [None]:
m.grad_x.shape

In [None]:
m.grad_x

In [None]:
np.matmul(grad_log_p_x, self.grad_x)

In [None]:
[x for x in m.grad_x]

In [None]:
m.plot(phylo_log_like)

In [None]:
history = []
for _ in range(100):
    m.sample()
    m.gradient_step(grad_phylo_log_like(m.x), history)
    history[-1] = np.append(history[-1], m.elbo(phylo_log_like))

In [None]:
m.plot(phylo_log_like)

In [None]:
history