# Testing manualgrad vs autograd

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import numpy as np
from tqdm import tqdm

from bokeh.plotting import figure, show, output_file, save
from bokeh.layouts import gridplot
from bokeh.io import output_notebook
from bokeh.palettes import Viridis, Category10, Category20
from bokeh.io import export_svg
output_notebook()

In [2]:
from metamod.control import NonLinearNetEq, NonLinearNetControl, BatchNetworkControl
from metamod.tasks import AffineCorrelatedGaussian, MultiDimGaussian, SemanticTask
from metamod.trainers import two_layer_training
from metamod.networks import NonLinearNet, FullGradNet
from metamod.utils import plot_lines, plot_weight_ev, check_dir, save_var, get_date_time

In [3]:
run_name = "composition_of_tasks"
results_path = "../results"
results_dict = {}

n_steps = 5000
iter_control = 200
inner_loop_iters = n_steps
outer_loop_iters = iter_control

dataset_params = {"batch_size": 64,
                  "h_levels": 3}

dataset = SemanticTask(**dataset_params)

In [4]:
model_params = {"learning_rate": 5e-3,
                "hidden_dim": 8,
                "intrinsic_noise": 0.0,
                "reg_coef": 0.0,
                "input_dim": dataset.input_dim,
                "output_dim": dataset.output_dim,
                "W1_0": None,
                "W2_0": None}

model = NonLinearNet(**model_params)

In [5]:
control_params = {"inner_loop_iters": inner_loop_iters,
                  "outer_loop_iters": outer_loop_iters,
                  "control_lower_bound": -1.0,
                  "control_upper_bound": 1.0,
                  "gamma": 0.99,
                  "cost_coef": 0.3,
                  "reward_convertion": 1.0,
                  "init_g": None,
                  "control_lr": 0.005}

In [6]:
save_weights_every = 20
dataset.set_random_seed(0)

iters, loss, weights_iter, weights = two_layer_training(model=model, dataset=dataset, n_steps=n_steps, save_weights_every=save_weights_every)

results_dict["iters"] = iters
results_dict["Loss_t_sim"] = loss
results_dict["weights_sim"] = weights
results_dict["weights_iters_sim"] = weights_iter

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1008.48it/s]


In [7]:
losses = (loss, )
colors = (Category10[10][0], ) #, Category10[10][0], Category10[10][1])
legends = ("Autograd Non-linear", ) # "Approximation", "Init Control")
alphas = (1.0, ) #, 1, 1)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)

In [8]:
weights[0].shape

(250, 8, 4)

In [9]:
full_grad_model_params = model_params.copy()
full_grad_model_params["W1_0"] = weights[0][0, :, :]
full_grad_model_params["W2_0"] = weights[1][0, :, :]

In [10]:
full_model = FullGradNet(**full_grad_model_params)

In [11]:
grad_loss = []
dataset.set_random_seed(0)
for i in tqdm(range(n_steps)):
    x, y = dataset.sample_batch()
    current_loss = full_model.train_step(x, y, g1_tilda=None, g2_tilda=None)
    grad_loss.append(current_loss.detach().cpu().numpy())
grad_loss = np.array(grad_loss)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1132.18it/s]


In [12]:
losses = (loss, grad_loss)
colors = (Category10[10][0], Category10[10][1])  #, Category10[10][1])
legends = ("Autograd Non-linear", "Manual grad Non-linear") # "Approximation", "Init Control")
alphas = (1.0, 1) #, 1)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)

In [13]:
full_model.reset_weights(weights[0][0, :, :], weights[1][0, :, :])
net_controller = BatchNetworkControl(network=full_model, task=dataset,
                                     **control_params)

In [14]:
control_params["iters_control"] = iter_control
cumulated_reward = []

In [15]:
for i in tqdm(range(iter_control)):
    R = net_controller.outer_loop(get_numpy=True, show_tqdm=False)
    # print("cumulated reward:", R)
    cumulated_reward.append(R)
cumulated_reward = np.array(cumulated_reward).astype(float)
results_dict["cumulated_reward_opt"] = cumulated_reward

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [46:38<00:00, 13.99s/it]


In [16]:
opt = plot_lines(np.arange(iter_control), (cumulated_reward,), x_axis_label="gradient steps on control", y_axis_label="Cumulated reward")
show(opt)

In [17]:
opt_loss = net_controller.get_loss_function(get_numpy=True) 

In [18]:
losses = (loss, grad_loss, opt_loss)
colors = (Category10[10][0], Category10[10][1], Category10[10][2])  #, Category10[10][1])
legends = ("Autograd Non-linear", "Manual grad Non-linear", "Optmized training") # "Approximation", "Init Control")
alphas = (1.0, 1, 1) #, 1)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)