In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import numpy as np
from tqdm import tqdm

from bokeh.plotting import figure, show, output_file, save
from bokeh.layouts import gridplot
from bokeh.io import output_notebook
from bokeh.palettes import Viridis, Category10, Category20
from bokeh.io import export_svg
output_notebook()

In [2]:
from metamod.control import SingleLayerEq, SingleLayerControl
from metamod.tasks import AffineCorrelatedGaussian, TwoGaussians
from metamod.trainers import single_layer_training
from metamod.networks import SingleLayerNet
from metamod.utils import plot_lines, plot_weight_ev, check_dir, save_var, get_date_time

In [3]:
# run_name = "testing_square_loss_single_neuron"
# results_path = "../results"

run_name = "single_neuron_cost0"
results_path = "../results"

In [4]:
results_dict = {}

In [5]:
#dataset_params = {"mu_vec": (3.0, 1.0), 
#                  "batch_size": 256, 
#                  "dependence_parameter": 0.8, 
#                  "sigma_vec":(1.0, 1.0)}

#dataset = AffineCorrelatedGaussian(**dataset_params)

dataset_params = {"mu": 2.0, 
                  "batch_size": 128, 
                  "std": 1.0}
dataset = TwoGaussians(**dataset_params)

In [6]:
model_params = {"learning_rate": 1e-3,
                "reg_coef": 0.1,
                "intrinsic_noise": 0.0,
                "input_dim": dataset.input_dim,
                "output_dim": dataset.output_dim,
                "W_0": np.zeros((1, 1))}

model = SingleLayerNet(**model_params)

In [7]:
n_steps = 600
save_weights_every = 20

iters, loss, weights_iter, weights = single_layer_training(model=model, dataset=dataset, n_steps=n_steps, save_weights_every=save_weights_every)

results_dict["iters"] = iters
results_dict["Loss_t_sim"] = loss
results_dict["weights_sim"] = weights
results_dict["weights_iters_sim"] = weights_iter

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [00:00<00:00, 655.87it/s]


In [8]:
weights.shape

(30, 1, 1)

In [9]:
init_W = weights[0, ...]

init_weights = init_W
input_corr, output_corr, input_output_corr, expected_y, expected_x = dataset.get_correlation_matrix()

time_span = np.arange(0, len(iters))*model_params["learning_rate"]
results_dict["time_span"] = time_span

In [10]:
init_W.shape

(1, 1)

In [11]:
equation_params = {"in_cov": input_corr,
                   "out_cov": output_corr,
                   "in_out_cov": input_output_corr,
                   # "expected_y": expected_y,
                   # "expected_x": expected_x,
                   "init_weights": init_weights,
                   "n_steps": n_steps,
                   "reg_coef": model_params["reg_coef"],
                   "intrinsic_noise": model_params["intrinsic_noise"],
                   "learning_rate": model_params["learning_rate"],
                   "time_constant": 1.0}

In [12]:
solver = SingleLayerEq(**equation_params)

In [13]:
control_params = {**equation_params,
                  "control_lower_bound": -30,
                  "control_upper_bound": 30,
                  "gamma": 0.99,
                  "cost_coef": 0.0,
                  "reward_convertion": 1.0,
                  "init_g": None,
                  "control_lr": 10.0,
                  "square_control_loss": True}

In [14]:
control = SingleLayerControl(**control_params)

In [15]:
sim_weights = weights
print(sim_weights.shape, control.g_tilda.shape)

(30, 1, 1) torch.Size([600, 1, 1])


In [16]:
W_t= solver.get_weights(time_span, get_numpy=True)
Loss_t = solver.get_loss_function(W_t, get_numpy=True)

results_dict["W_t_eq"] = W_t
results_dict["Loss_t_eq"] = Loss_t

In [17]:
W_t_control = control.get_weights(time_span, get_numpy=True)
Loss_t_control = control.get_loss_function(W_t_control, get_numpy=True)

results_dict["W_t_control_init"] = W_t_control
results_dict["Loss_t_control_init"] = Loss_t_control
results_dict["control_signal_init"] = control.g_tilda

In [18]:
losses = (loss, Loss_t, Loss_t_control)
colors = (Category10[10][0], Category10[10][0], Category10[10][1])
legends = ("Real Non-linear", "Approximation", "Init Control")
alphas = (0.3, 1, 1)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)

In [19]:
flat_W_t = np.reshape(weights, (weights.shape[0], -1))
flat_eq_W_t = np.reshape(W_t, (W_t.shape[0], -1))


In [20]:
weight_plot = plot_weight_ev(flat_W_t, flat_eq_W_t, iters=weights_iter, iters_set2=iters, title="W")
show(weight_plot)

## Optimizing control signal

In [21]:
iter_control = 5000
control_params["iters_control"] = iter_control
cumulated_reward = []

In [22]:
for i in tqdm(range(iter_control)):
    R = control.train_step(get_numpy=True)
    # print("cumulated reward:", R)
    cumulated_reward.append(R)
cumulated_reward = np.array(cumulated_reward).astype(float)
results_dict["cumulated_reward_opt"] = cumulated_reward
control_params["control"] = control

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [16:54<00:00,  4.93it/s]


In [23]:
opt = plot_lines(np.arange(iter_control), (cumulated_reward,), x_axis_label="gradient steps on control", y_axis_label="Cumulated reward")
show(opt)

In [24]:
W_t_opt = control.get_weights(time_span, get_numpy=True)
Loss_t_opt = control.get_loss_function(W_t_opt, get_numpy=True)

results_dict["W_t_control_opt"] = W_t_opt
results_dict["Loss_t_control_opt"] = Loss_t_opt

In [25]:
losses = (loss, Loss_t, Loss_t_opt)
colors = (Category10[10][0], Category10[10][0], Category10[10][1])
legends = ("Real Non-linear", "Approximation", "Approximated Optimized Control")
alphas = (0.3, 1, 1)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)

In [26]:
g_tilda = control.g_tilda.detach()
control_signal = g_tilda
W_0 = control_params["init_weights"]
results_dict["control_signal"] = control_signal

In [27]:
reset_model_params = model_params.copy()
reset_model_params["W_0"] = W_0

reset_model = SingleLayerNet(**reset_model_params)

In [28]:
iters, loss_OPT, weights_iter_OPT, weights_OPT = single_layer_training(model=reset_model,
                                                                       dataset=dataset,
                                                                       n_steps=n_steps,
                                                                       save_weights_every=save_weights_every,
                                                                       control_signal=control_signal)
results_dict["Loss_t_sim_OPT"] = loss_OPT
results_dict["weights_sim_OPT"] = weights_OPT
results_dict["weights_iters_sim_OPT"] = weights_iter_OPT
results_dict["iters_OPT"] = iters

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [00:00<00:00, 1602.24it/s]


In [29]:
losses = (loss, Loss_t, Loss_t_opt, loss_OPT)
colors = (Category10[10][0], Category10[10][0], Category10[10][1], Category10[10][1])
legends = ("Real Non-linear", "Approximation", "Approximated Optimized Control", "Optimized Non-linear")
alphas = (0.3, 1, 1, 0.3)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)

In [30]:
baseline_reward_rate = -control_params["reward_convertion"]*Loss_t
control_reward_rate = -control_params["reward_convertion"]*Loss_t_opt - control.control_cost(get_numpy=True)

In [31]:
results_dict["baseline_iRR"] = baseline_reward_rate
results_dict["control_iRR"] = control_reward_rate
losses = (baseline_reward_rate, control_reward_rate)
colors = (Category10[10][0], Category10[10][1])
legends = ("Baseline iRR", "Control iRR")
alphas = (1, 1)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)

In [32]:
losses = (control_reward_rate - baseline_reward_rate,)
colors = (Category10[10][0],)
legends = ("net diff iRR",)
alphas = (1,)

s = plot_lines(iters, losses, legends, alphas, colors)
show(s)

In [33]:
results_dict.keys()

dict_keys(['iters', 'Loss_t_sim', 'weights_sim', 'weights_iters_sim', 'time_span', 'W_t_eq', 'Loss_t_eq', 'W_t_control_init', 'Loss_t_control_init', 'control_signal_init', 'cumulated_reward_opt', 'W_t_control_opt', 'Loss_t_control_opt', 'control_signal', 'Loss_t_sim_OPT', 'weights_sim_OPT', 'weights_iters_sim_OPT', 'iters_OPT', 'baseline_iRR', 'control_iRR'])

In [34]:
params_dict = {"dataset_params": dataset_params,
               "model_params": model_params,
               "equation_params": equation_params,
               "control_params": control_params,
               "reset_params": reset_model_params}

In [35]:
import os
time_str = get_date_time()
saving_path = os.path.join(results_path, run_name+"_"+time_str)

In [36]:
save_var(results_dict, "results.pkl", results_path=saving_path)
save_var(params_dict, "params.pkl", results_path=saving_path)

In [37]:
# save_var(params_dict, "params.pkl", results_path=saving_path)