# Python Imports

In [None]:
import inspect
from functools import partial
from math import sqrt

import numpy as np
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
from scipy import stats

import ternary

In [None]:
from matplotlib import rc
# plt.rcParams['figure.figsize'] = [16, 8]
rc('figure', **{'figsize': (16, 8)})
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)

In [None]:
import pyed
from pyed import dynamics, geometries, incentives, information, normalize
from pyed.incentives import (
    linear_fitness, fermi_incentive, logit_incentive, replicator_incentive_power, 
    rock_paper_scissors, uniform_mutation_matrix)
from pyed.dynamics import compute_trajectory, replicator_trajectory

# Plotting and Figures

In [None]:
def plot_trajectories(data, params, divergence=None):
    if not divergence:
        divergence = information.kl_divergence

    gs = gridspec.GridSpec(1, 2)
    
    # Trajectory plots
    ax = plt.subplot(gs[0, 0])
    figure, tax = ternary.figure(ax=ax)
    for i, (momentum, color) in enumerate(params):
        tax.plot(data[i], linewidth=2, color=color, label=momentum)
    tax.boundary()
    tax.ax.set_title("Trajectories")
    tax.legend()

    # Divergences
    ax = plt.subplot(gs[0, 1])
    for i, (momentum, color) in enumerate(params):
        e = normalize(np.array([1, 1, 1]))
        v = [divergence(e, x) for x in data[i]]
        ax.plot(range(len(data[i])), v, color=color, label=momentum)
    ax.set_title("Divergences")
    ax.legend()
    plt.show()

    
def plot_convergence(data, params, divergence=None):
    if not divergence:
        divergence = information.kl_divergence

    gs = gridspec.GridSpec(1, 2)
    
    # Divergences
    ax = plt.subplot(gs[0, 0])
    e = normalize(np.array([1, 1, 1]))
    for i, (momentum, color) in enumerate(params):
        v = [divergence(e, x) for x in data[i]]
        ratios = [np.log(x / v[0]) for k, x in enumerate(v)]
        ax.plot(range(len(data[i])), ratios, color=color, label=momentum)
    ax.set_title("Log Divergence ratio")
    ax.legend()
    plt.show()

    # Divergences
    ax = plt.subplot(gs[0, 1])
    e = normalize(np.array([1, 1, 1]))
    for i, (momentum, color) in enumerate(params):
        v = [divergence(e, x) for x in data[i]]
        ratios = [v[k] / v[k-1] for k, x in list(enumerate(v))[1:]]
        ax.plot(list(range(len(data[i])))[1:], ratios, color=color, label=momentum)
    
    ax.set_title("log Divergence ratio subsequent")
    ax.legend()    
    plt.show()
    
    
def time_to(data, params, divergence=None, domain=None):
    if not divergence:
        divergence = information.kl_divergence

    if not len(domain) > 0:
        domain = list(np.arange(0.4, 0.05, -0.01))
    
    def process_data(ds):
        ys = []
        for x in domain:
            found = False
            for i, d in enumerate(ds):
                if d < x:
                    ys.append(i)
                    found = True
                    break
            if not found:
                ys.append(len(ds))
        return np.array(ys)
    
    gs = gridspec.GridSpec(1, 2)
    
    # Divergences
    ax = plt.subplot(gs[0, 0])
    e = normalize(np.array([1, 1, 1]))
    v0 = [divergence(e, x) for x in data[0]]
    ys0 = process_data(v0)
    
    for i, (momentum, color) in list(enumerate(params)):
        v = [divergence(e, x) for x in data[i]]
        ys = process_data(v)
#         ys = [sqrt(y) for y in ys]
        ax.plot(domain, ys, color=color, label=momentum)
    ax.set_title("Time to")
    ax.legend()

    ax = plt.subplot(gs[0, 1])
    e = normalize(np.array([1, 1, 1]))
    v0 = [divergence(e, x) for x in data[0]]
    ys0 = process_data(v0)
    
    for i, (momentum, color) in list(enumerate(params)):
        v = [divergence(e, x) for x in data[i]]
        ys = process_data(v)
        ax.plot(domain, ys / ys0, color=color, label=momentum)
    ax.set_title("Time to relative")
    ax.legend()
    plt.show()

def approximate_q(data, params, divergence=None, domain=None):
    if not divergence:
        divergence = information.kl_divergence

    def process_data(ds):
        qs = []
        for i in range(3, len(ds)):
            q_n = np.log(np.abs(ds[i  ] - ds[i-1]) / np.abs(ds[i-1] - ds[i-2]))
            q_d = np.log(np.abs(ds[i-1] - ds[i-2]) / np.abs(ds[i-2] - ds[i-3]))
            q = q_n / q_d
            qs.append(q)
        return qs
    
    gs = gridspec.GridSpec(1, 1)
    
    # Divergences
    ax = plt.subplot(gs[0, 0])
    e = normalize(np.array([1, 1, 1]))
#     v = [divergence(e, x) for x in data[0]]
#     ys = process_data(v)
    
    for i, (momentum, color) in list(enumerate(params)):
        v = [divergence(e, x) for x in data[i]]
        ys = process_data(v)
        ax.plot(range(len(ys)), ys, color=color, label=momentum)
    ax.set_title("Approximate Order")
    ax.legend()
    plt.show()
        

# Figures Actually in the Paper

## Diagram for momentum

In [None]:
betas = np.linspace(-1, 3, num=51, endpoint=True, retstep=False, dtype=None, axis=0)
ys = [(1. / (1 - beta)) for beta in betas]


fig, ax = plt.subplots(figsize=(20, 10))

plt.plot(betas, ys, linewidth=3)
plt.axvline(x=0, ymin=-25, ymax=25, dashes=[3,3], color='grey')
plt.axvline(x=1, ymin=-25, ymax=25, dashes=[2,2], color='black')
plt.axvline(x=2, ymin=-25, ymax=25, dashes=[2,2], color='black')

plt.axhline(y=0, xmin=-1, xmax=3, color='black')
plt.axhline(y=1, xmin=-1, xmax=3, dashes=[2,2], color='black')
plt.axhline(y=-1, xmin=-1, xmax=3, dashes=[2,2], color='black')

plt.plot([0], [1], marker='o', markersize=10, color="red")
plt.ylim(-5, 5)
plt.xlim(-1, 3)

ax.annotate('Velocity increasing',
            xy=(-0.5, 3), xycoords='data',
            xytext=(-100, 60), textcoords='offset points',
            size=30)

ax.annotate('Velocity decreasing and\nreversed',
            xy=(1.8, -3), xycoords='data',
            xytext=(-30, -30), textcoords='offset points',
            size=30)


ax.annotate('Momentum free',
            xy=(-0.05, 1.5), xycoords='data',
            xytext=(-100, 60), textcoords='offset points',
            size=20, color='r',
            arrowprops=dict(arrowstyle="->"))

plt.xlabel(r"Momentum $\beta$", size=40)
plt.ylabel(r"Coefficient $\frac{1}{1-\beta}$", size=40)
plt.xticks(range(-1, 4, 1), range(-1, 4, 1), size=20)
plt.yticks(range(-4, 5, 2), range(-4, 5, 2), size=20)

plt.savefig("momentum_beta.png", dpi=400)

# Phase Portraits and KL-divergence

In [None]:
initial_state = normalize(np.array([1, 1, 3]))

# Dynamics parameters
m = rock_paper_scissors(a=2, b=-1)
print(m)
fitness = linear_fitness(m)
incentive = replicator_incentive_power(fitness, 1)
mu = uniform_mutation_matrix(3, ep=0.)

# Various momenta
params = [(-0.5, 'b'), (0, 'black'), (0.5, 'g'), (0.9, 'r')]
data = []

for momentum, _ in params:
    t = compute_trajectory(
        initial_state,
        incentive,
        iterations=1000,
        mu=mu,
        momentum=momentum,
        h=0.01)
    data.append(t)

plot_trajectories(data, params)
plt.savefig("divergence_example", dpi=200)

In [None]:
initial_state = normalize(np.array([1, 1, 4]))

# Dynamics parameters
m = rock_paper_scissors(a=-1, b=1)
fitness = linear_fitness(m)
incentive = replicator_incentive_power(fitness, 1)
# incentive = fermi_incentive(fitness, beta=1)
mu = uniform_mutation_matrix(3, ep=0.)

momentum = .7
params = [('Polyak', 'red'), ('Nesterov', 'blue')]
data = []

for nesterov in [False, True]:
  t = compute_trajectory(
      initial_state,
      incentive,
      iterations=3000,
      mu=mu,
      momentum=momentum,
      nesterov=nesterov
    )
  data.append(t)

t = compute_trajectory(
  initial_state,
  incentive,
  iterations=3000,
  mu=mu,
  momentum=0,
  nesterov=nesterov
)
data.append(t)
params.append(("Momentum free", "black"))
    
plot_trajectories(data, params)
plt.savefig("convergence_divergence.png", dpi=200)

In [None]:
initial_state = normalize(np.array([1, 1, 4]))

# Dynamics parameters
m = rock_paper_scissors(a=2, b=1)
fitness = linear_fitness(m)
incentive = replicator_incentive_power(fitness, 1)
mu = uniform_mutation_matrix(3, ep=0.)

# Various momenta
params = [(0, 'black'), (.5, 'b'), (0.9, 'r'), (0.95, 'y'), (0.96, 'g')]
data = []

for momentum, _ in params:
    t = compute_trajectory(
        initial_state,
        incentive,
        iterations=1000,
        mu=mu,
        momentum=momentum)
    data.append(t)

plot_trajectories(data, params)
plt.savefig("polyak_examples.png", dpi=200)

In [None]:
initial_state = normalize(np.array([1, 1, 4]))

# Dynamics parameters
m = rock_paper_scissors(a=2, b=1)
fitness = linear_fitness(m)
incentive = replicator_incentive_power(fitness, 1)
mu = uniform_mutation_matrix(3, ep=0.)

# Various momenta
params = [(0, 'black'), (.5, 'b'), (0.9, 'r'), (0.95, 'y'), (0.96, 'g')]
data = []

for momentum, _ in params:
    t = compute_trajectory(
        initial_state,
        incentive,
        iterations=1000,
        mu=mu,
        momentum=momentum,
        nesterov=True
        )
    data.append(t)

plot_trajectories(data, params)
plt.savefig("nesterov_examples.png", dpi=200)

# Convergence

In [None]:
%%time
initial_state = normalize(np.array([1, 1, 4]))

# Dynamics parameters
# m = [[0, 1, 1], [1, 0, 1], [1, 1, 0]]
m = rock_paper_scissors(a=1, b=1)
fitness = linear_fitness(m)

alphas = np.linspace(.001, .1, num=20)
iterations = [len(replicator_trajectory(initial_state, fitness, h=a, momentum=0.0)) for a in alphas]

In [None]:
%%time
def make_num_iter_fn(initial_state, fitness):
  def num_iter(alpha, beta):
    return len(replicator_trajectory(initial_state, fitness, h=alpha, momentum=beta, iterations=20000))
  return num_iter

n = 50
f = make_num_iter_fn(initial_state, fitness)
alphas = np.linspace(.0001, .2, num=n)
betas = np.linspace(0, .8, num=n)

X, Y = np.meshgrid(alphas, betas)
Z = np.zeros((n, n))
for i in range(n):
  print("==> {}".format(i))
  for j in range(n):
    Z[i,j] = f(X[i,j],Y[i,j])

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
plt.contourf(X, Y, np.log(Z), cmap='viridis')
cb = plt.colorbar()
cb.set_label("Log(iterations)", fontsize=18)
plt.xlabel("Step size (learning rate", fontsize=18)
plt.ylabel("Momentum", fontsize=18)
plt.title("Log(number of iterations) to convergence", fontsize=20)