In [None]:
import os

if not os.path.isdir("cld_optimization_experiments"):
    !git clone https://github.com/oopir/cld_optimization_experiments

%cd cld_optimization_experiments

In [None]:
import random
import numpy as np
import torch

from src.data import load_1d_regression_data
from src.training import get_1d_regression_curves_for_betas
from src.plots import plot_1d_regression_curves

torch.cuda.empty_cache()
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
data = load_1d_regression_data(device=device)

betas_to_plot = [1e07, 1e04, 1e02]
seeds_for_curves = list(range(1))

epochs = int(1e07)
track_every = max(1,epochs//100) # track at most 100 times
print_every = max(1, epochs//10)

common = dict(
    data=data,
    eta=1e-7, 
    epochs=epochs,
    lam_fc1=data["d_in"] / (torch.nn.init.calculate_gain("tanh") ** 2),
    regularization_scale=1.0,
    device=device,
    print_every=print_every
)

x_plot    = torch.linspace(-1.5, 1.5, 400, device=device).unsqueeze(1)

In [None]:
curves_by_beta = get_1d_regression_curves_for_betas(
    x_plot=x_plot,
    betas=betas_to_plot,
    seeds=seeds_for_curves,
    common=common,
)

In [None]:
plot_1d_regression_curves(data, x_plot, curves_by_beta)

In [None]:
# def train_and_return_model(beta, seed, data, eta, epochs, lam_fc1, lam_fc2, hidden_width, regularization_scale, device, print_every):
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed_all(seed)
#     np.random.seed(seed)
#     random.seed(seed)

#     X_train = data["X_train"]
#     y_train = data["y_train"].unsqueeze(1)  # (N, 1)
#     d_in = data["d_in"]
#     d_out = data["d_out"]

#     model = TwoLayerNet(d_in=d_in, hidden=hidden_width, d_out=d_out, with_bias=True).to(device)
#     params, lam_tensors = make_lambda_like_params(model, lam_fc1, lam_fc2)

#     for epoch in range(epochs):
#         model.train()
#         for p in params:
#             if p.grad is not None:
#                 p.grad.zero_()
#         outputs = model(X_train)
#         loss = loss_fn(outputs, y_train)
#         loss.backward()
#         langevin_step(params,lam_tensors,beta=beta,eta=eta,regularization_scale=regularization_scale)
#         if epoch % print_every == 0:
#             print(f"    epoch = {epoch:5} | loss = {loss:.2}")

#     return model

In [None]:
# def get_curves_for_betas(betas, seeds):
#     curves = {}
#     m_values = [int(min(1e05, beta * np.log(beta))) for beta in betas_to_plot]
#     for beta, m in zip(betas, m_values):
#         print(f"beta={beta:.0e}, m={m:.2e}")
#         fs = []
#         for seed in seeds:
#             print(f"  seed={seed}")
#             model = train_and_return_model(beta=beta, seed=seed, lam_fc2=m, hidden_width=m, **common)
#             with torch.no_grad():
#                 f = model(x_plot).cpu().numpy().ravel()
#             fs.append(f)
#         curves[beta] = np.stack(fs, axis=0)  # (n_seeds, n_grid)
#     return curves

# curves_by_beta = get_curves_for_betas(betas_to_plot, seeds_for_curves)


In [None]:
# X_train_np = data["X_train"].cpu().numpy().ravel()
# y_train_np = data["y_train"].cpu().numpy().ravel()
# y_target_np = np.interp(x_plot_np, X_train_np, y_train_np)

# plt.figure(figsize=(6, 4))

# plt.plot(x_plot_np, y_target_np, "k--", label="target")
# plt.scatter(X_train_np, y_train_np, c="k", s=20, zorder=3)

# for beta, fs in curves_by_beta.items():
#     mean = fs.mean(axis=0)
#     std = fs.std(axis=0)
#     label = f"Î²={beta:.1e}"
#     plt.plot(x_plot_np, mean, label=label)
#     plt.fill_between(x_plot_np, mean - std, mean + std, alpha=0.2)

# plt.xlim(-1.5, 1.5)
# plt.legend()
# plt.tight_layout()
# plt.show()