# Adaptive Learning
In which we adapt the learning rate $\eta$ during the learning process

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
#%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt

___

## Setup

Fetch our tools:

In [None]:
from nn import Network, Layer, IdentityLayer, AffineLayer, MapLayer
from nnbench import NNBench
from nnvis import NNVis

Build our `xor` net:

In [None]:
net = Network()
net.extend(AffineLayer(2,2))
net.extend(MapLayer(np.tanh, lambda d: 1.0 - np.tanh(d)**2))
net.extend(AffineLayer(2,1))
net.extend(MapLayer(np.tanh, lambda d: 1.0 - np.tanh(d)**2))

Make a test bench and a visualizer:

In [None]:
bench = NNBench(net)
vis = NNVis(bench)

Prepare fixed training data for the learning process _[improve]_:

In [None]:
dat = \
[(np.array([-1,-1]), np.array([-1])),
 (np.array([-1,1]), np.array([1])),
 (np.array([1,1]), np.array([-1])),
 (np.array([1,-1]), np.array([1]))]
dc = 0
amp= 1
temp = [(d[0]*amp/2+dc,d[1]*amp/2+dc) for d in dat]

bench.training_data = ((np.array([v[0] for v in temp]),
                        np.array([v[1] for v in temp])),)
bench.training_data_gen = bench.training_data_gen_fixed

Set the state to an ordinary example starting point, for consistent notebook behavior below. We also make it the checkpoint in the bench.

In [None]:
net.set_state_from_vector(np.array([-0.88681521, -1.28596788,  0.3248974 , -2.33838503,  0.34761944,
       -0.94541789,  1.99448043,  0.38704839, -3.8844268 ]))
bench.checkpoint_net()

---

## Experiment

In [None]:
bench.rollback_net()
n = 10
t = bench.analyze_learning_track(bench.learn_track(n))
#[(a, eval('t.' + a)) for a in filter(lambda v: not v.startswith('__'), dir(t))]
list(filter(lambda v: not v.startswith('__'), dir(t)))

In [None]:
bench.rollback_net()
bench.net.eta = 0.3
traja = bench.analyze_learning_track(bench.learn_track(500))
vis.plot_trajectory(traja)

In [None]:
def plot_adaptive_learning(losses):
    fig, ax = plt.subplots()  # Create a figure and an axes.
    ax.plot(losses, label=f"$\eta=FIXME$")  # Plot some data on the axes.
    ax.set_xlabel('learnings')  # Add an x-label to the axes.
    ax.set_ylabel('loss')  # Add a y-label to the axes.
    ax.set_title("Losses")  # Add a title to the axes.
    ax.set_yscale('log')
    ax.legend()  # Add a legend.        

Let's try a function of (`analyze_learning_track`'s output object, $\eta_t$, N) to give
($\eta_t+1$, n, stop_flag)

In [None]:
def adapt_learning_t1(traja, eta, n_cum):
    return eta, 10, n_cum>3000 or np.mean(traja.losses)<1e-25

def adapt_learning_t2(traja, eta, n_cum):
    eta += 0.04 * np.mean(traja.traj_cos) * eta
    return eta, 3, n_cum>3000 or np.mean(traja.losses)<1e-25

def adapt_learning_t3(traja, eta, n_cum):
    eta += 0.1 * (np.mean(traja.traj_cos) - 0.7) * eta
    return eta, 3, n_cum>3000 or np.mean(traja.losses)<1e-25

def adapt_learning_t4(traja, eta, n_cum):
    eta += 0.1 * (np.mean(traja.traj_cos) - 0.9) * eta
    return eta, 3, n_cum>3000 or np.mean(traja.losses)<1e-25

In [None]:
adapt_learning = adapt_learning_t3

bench.rollback_net()
bench.net.eta = 0.1
n = 3
n_cum = 0
losses = []
for i in range(1000):
    traja = bench.analyze_learning_track(bench.learn_track(n))
    #vis.plot_trajectory(traja)
    losses.append(traja.losses)
    n_cum += n
    eta, n, stop = adapt_learning(traja, bench.net.eta, n_cum)
    if stop:
        break
    bench.net.eta = eta
    print(f"{eta:.3f}", end=" ")
print(f"\n{n_cum} total lessons")
losses = np.concatenate(losses)
plot_adaptive_learning(losses)