# Minimize experiments

## Minimize the Rosenbrock function

$$ f(x) = \sum_{d=1:D-1} 100 (x_{d+1} - x_d^a)^2 + (1-x_d)^2 $$

In [1]:
from minimize import line_search, kalman_filter_smoother
from helpers import plot_linesearch
import matplotlib.pyplot as plt

import os
from datetime import datetime

In [2]:
import autograd.numpy as np
from autograd import grad


def convert_to_1d_objective(f, x, d):
    """
    Converts a multivariate objective function f to the univariate function
    
        g(a) = f(x + a * d),
        
    handling the derivatives as well.
    """
    
    def g(a):
        
        ga, dfa = f(x + a * d)
        
        dga = np.dot(dfa, d)
        
        return np.array([[ga, dga]])
    
    return g
    

def rosenbrock(x, a):
    
    def f(x, a):
        return np.sum(100 * (x[1:] - x[:-1] ** a) ** 2 + (1 - x[:-1]) ** 2)
    
    df = grad(f, argnum=0)
    
    fx = f(x, a)
    dfx = df(x, a)
    
    return fx, dfx

In [3]:
x = np.ones(shape=(2,))

print(rosenbrock(x, 2))

standard_rosenbrock = lambda x : rosenbrock(x, a=2)

linesearch_objective = convert_to_1d_objective(standard_rosenbrock, x, d=x)

print(linesearch_objective(x))

(0.0, array([0., 0.]))
[[ 401. 1202.]]


In [4]:
def conjugate_gradients(objective, x0, c1, c2, wp_thresh, save_path):
    
    t0 = 0.
    t_guess = np.array([1.])
    num_dim = x0.shape[0]
            
    f, df = objective(x0)
    
    d = - df
    g = df
    diff = None
    
    x = x0
    xhist = [x]
    fhist = [f]
    
    names = ['t', 'y', 'wp_probs', 'mf', 'Vf', 'ms', 'Vs', 'iVC', 'post_probs']
    os.mkdir(f'{os.getcwd()}/{save_path}')
    
    print(save_path)
    
    for i in range(100):
        
        linesearch_objective = convert_to_1d_objective(objective, x, d)
        
        # Create y0 array
        y0 = np.array([[f, np.dot(df, d)]])
        
        if not (diff is None): t_guess = np.abs(diff / y0[0, 1])
        
        t, y, wp_probs, mf, Vf, ms, Vs, iVC, post_probs = line_search(objective=linesearch_objective,
                                                                      c1=c1,
                                                                      c2=c2,
                                                                      wp_thresh=wp_thresh,
                                                                      t0=t0,
                                                                      y0=y0,
                                                                      t_guess=t_guess)
            
        os.mkdir(f'{os.getcwd()}/{save_path}/{str(i).zfill(5)}')
        
        for name, array in zip(names, [t, y, wp_probs, mf, Vf, ms, Vs, iVC, post_probs]):
            np.save(f'{os.getcwd()}/{save_path}/{str(i).zfill(5)}/{name}.npy', array)
            
#         plot_linesearch(c1=c1,
#                         c2=c2,
#                         t_data=t,
#                         mf=mf,
#                         Vf=Vf,
#                         ms=ms,
#                         Vs=Vs,
#                         iVC=iVC,
#                         post_probs=post_probs,
#                         wp_probs=wp_probs,
#                         x=None,
#                         y=y)
        
        idx_best = np.argmax(wp_probs)
        alpha = float(t[idx_best+1])
        
        print(f'step {i:3d} objective {fhist[-1]} alpha {alpha}')
        
        # Update x
        x = x + alpha * d
        f_, df_ = objective(x)
        
        # Update d and df
        beta = np.dot(df_, (df_ - df)) / np.dot(df, df)
        
        if False: # i > 0 and i % x.shape[0] == 0
            d = - df_
        else:
            d = - df_ + beta * d
            
        # Ensure searching in direction of decreasing objective
        if np.dot(d, df_) > 0.:
            d = - d
            
        diff = np.array([f_ - f])
        
        f = f_
        df = df_
        
        xhist.append(x)
        fhist.append(f)
        
    return np.array(xhist)

In [5]:
c1 = 0.05
c2 = 0.8
wp_thresh = 0.95

ndim = 100
x0 = np.zeros(shape=(ndim,))

standard_rosenbrock = lambda x : rosenbrock(x, a=2)

save_path = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')

xhist = conjugate_gradients(objective=standard_rosenbrock,
                            x0=x0,
                            c1=c1,
                            c2=c2,
                            wp_thresh=wp_thresh, 
                            save_path=save_path)

2020-12-02--19-06-52
step   0 objective 99.0 alpha 0.004851524209221672
step   1 objective 97.99296132800922 alpha 0.09188848720323861
step   2 objective 97.72290125644739 alpha 0.006011499887506359
step   3 objective 97.54322247813117 alpha 0.001691563557122163
step   4 objective 97.38590182079717 alpha 0.005149459963468006
step   5 objective 97.32059776201716 alpha 0.02120597218093409
step   6 objective 97.22100721179552 alpha 0.006470135564200208
step   7 objective 97.13425747984296 alpha 0.012665155245344808
step   8 objective 97.05636966867516 alpha 0.0062528056774712685
step   9 objective 96.989136102568 alpha 0.012193378649245933
step  10 objective 96.88438631606347 alpha 0.009139380371861463
step  11 objective 96.72688796097566 alpha 0.009648018366131217
step  12 objective 96.49663426427577 alpha 0.0036785216193061677
step  13 objective 96.3090773607526 alpha 0.00231090553069803
step  14 objective 96.1616792995802 alpha 0.0035182232479864482
step  15 objective 96.05427607631555



step  18 objective 95.94854405300799 alpha 0.00505575229258421
step  19 objective 95.908422398911 alpha 0.012020854644311385
step  20 objective 95.84456079193043 alpha 0.005352502399121924
step  21 objective 95.80074806501356 alpha 0.0050970678909323655
step  22 objective 95.67424243937664 alpha 0.0063564495520334365
step  23 objective 95.58967826523751 alpha 0.005990226847278747
step  24 objective 95.45277970896407 alpha 0.002268277523104202
step  25 objective 95.3408650813739 alpha 0.011609956145980697
step  26 objective 95.2838098487668 alpha 0.00160981415694876
step  27 objective 95.19253055817714 alpha 0.0007830607869591891
step  28 objective 95.11698016850832 alpha 0.0035160104486410305
step  29 objective 95.09314719061078 alpha 0.0065969751241413185
step  30 objective 95.05539370138149 alpha 0.005005045779754876
step  31 objective 95.02478032422185 alpha 0.0021007433040889562
step  32 objective 94.99951333464963 alpha 0.007792643223981416
step  33 objective 94.93173687789884 alp

Exception: WP standard deviation was <= 1e-6

In [None]:
np.exp(-20)

In [None]:
# plt.figure(figsize=(8, 8))

# eps = 2.

# x1, x2 = np.meshgrid(np.linspace(1. - eps, 1 + eps, 40), np.linspace(1. - eps, 1. + eps, 40))
# x = np.stack([x1, x2], axis=-1)
# x = np.reshape(x, (-1, 2))

# f = np.array([standard_rosenbrock(x_)[0] for x_ in x])
# f = np.reshape(f, (x1.shape[0], x1.shape[0]))

# plt.contourf(x1, x2, np.log(f) + 1e-6, cmap='coolwarm', alpha=0.5)
# plt.plot(xhist[:, 0], xhist[:, 1], color='black')
# plt.gca().set_aspect('equal')
# plt.xlim([1. - eps, 1. + eps])
# plt.ylim([1. - eps, 1. + eps])
# plt.show()

In [None]:
# import scipy as scp

# ndim = 10
# x0 = np.zeros(shape=(ndim,))

# standard_rosenbrock_f = lambda x : rosenbrock(x, a=10)[0]
# standard_rosenbrock_df = lambda x : rosenbrock(x, a=10)[1]

# scp.optimize.minimize(fun=standard_rosenbrock_f,
#                       x0=x0,
#                       method='CG',
#                       jac=standard_rosenbrock_df,
#                       options={'maxiter' : 1000})

# Plotting searches



In [None]:
i = 0

names = ['t', 'y', 'wp_probs', 'mf', 'Vf', 'ms', 'Vs', 'iVC', 'post_probs']
loaded = [np.load(f'{os.getcwd()}/{save_path}/{str(i).zfill(5)}/{name}.npy') for name in names]
t, y, wp_probs, mf, Vf, ms, Vs, iVC, post_probs = loaded

j = 0

plot_linesearch(c1=c1,
                c2=c2,
                t_data=t,
                mf=mf[j:j+1],
                Vf=Vf[j:j+1],
                ms=ms[j:j+1],
                Vs=Vs[j:j+1],
                iVC=iVC[j:j+1],
                post_probs=np.ones_like(post_probs[j:j+1]),
                wp_probs=wp_probs,
                x=None,
                y=y)

In [None]:
i = 6

names = ['t', 'y', 'wp_probs', 'mf', 'Vf', 'ms', 'Vs', 'iVC', 'post_probs']
loaded = [np.load(f'{os.getcwd()}/{save_path}/{str(i).zfill(5)}/{name}.npy') for name in names]
t, y, wp_probs, mf, Vf, ms, Vs, iVC, post_probs = loaded

log_nsr = -10.

a = 1 / np.max(t)
log_nsr_adjusted = log_nsr

t_ = t * a
y_ = y[:].copy()
y_[:, 1] = y_[:, 1] / a

mf, Vf, ms, Vs, iVC, theta2, nlml = kalman_filter_smoother(t_,
                                                           y_,
                                                           log_nsr=log_nsr_adjusted,
                                                           verbose=True)

plot_linesearch(c1=c1,
                c2=c2,
                t_data=t_,
                mf=mf[None, :],
                Vf=Vf[None, :],
                ms=ms[None, :],
                Vs=Vs[None, :],
                iVC=iVC[None, :],
                post_probs=np.array([1.]),
                wp_probs=wp_probs,
                x=None,
                y=y_,
                options={'kfs_info'  : f'log-NSR: {log_nsr:.2f}, scale: {scale:.0f}',
                         'save_path' : f'figures/{log_nsr:.2f}_{scale:.0f}.pdf'})

print(f'evidence {-nlml}')
# diff [0.00152318 1.07406326]
# quad [107.11845063]
# diff [-7.05007756e-05  5.39869149e-01]
# quad [1.65557898e+08]
# diff [0.00216854 1.52950542]
# quad [2.49449633e+08]

# Notes

- Fix scaling.
- Add second scaling for numerical stability.