In [1]:
import os, sys
from tqdm import trange, tqdm
from IPython.utils import io
import itertools

import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy
from numba import njit

import torch
from torch import nn
from torch.utils.data import TensorDataset, ConcatDataset

source = "../source"
sys.path.append(source)

from data import fun_data, grid_data
from preprocessing import Direct, Encoding, OneHot
from compilation import Compiler, Tracker, ScalarTracker, ActivationTracker
from activations import get_activations
from data_analysis.automata import to_automaton_history
from data_analysis.visualization.animation import SliderAnimation
from data_analysis.visualization.activations import (
    ActivationsAnimation,
    FunctionAnimation,
    PointAnimation,
)
from data_analysis.visualization.automata import AutomatonAnimation
from data_analysis.visualization.epochs import EpochAnimation
import data_analysis.visualization.publication as publication
import simulate

import models as models
from models import MLP, CNN, ResNet

import cProfile
import pstats


is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    print("GPU available")
else:
    device = torch.device("cpu")
    print("GPU not available")

device = torch.device("cpu")

GPU available


In [2]:
settings = pd.read_csv("model_settings/2 points.txt", sep=" ", header=0)

In [3]:
def make_plots(setting):
    ## Load settings
    (
        model_type,
        nonlinearity,
        gain,
        lr,
        P,
        L,
        n_epochs,
        hidden_layer,
        dx2,
        dy2,
        in_dim,
        out_dim,
    ) = settings.loc[setting].to_numpy()
    model_type = getattr(models, model_type)
    if nonlinearity == "discontinuous":
        nonlinearity = simulate.Discontinuous.apply
    elif nonlinearity == "none":
        nonlinearity = None
    else:
        nonlinearity = getattr(torch.nn.functional, nonlinearity)

    factor = 2
    n_epochs = factor * n_epochs
    lr = lr / factor

    ## Generate data
    input_dim, output_dim = in_dim, out_dim

    inputs = np.array([[-1] * input_dim, [-1 + np.sqrt(dx2)] * input_dim]) / np.sqrt(
        input_dim
    )
    outputs = np.array(
        [[0.6] * output_dim, [0.6 + np.sqrt(dy2)] * output_dim]
    ) / np.sqrt(output_dim)
    names = ["A", "B"]
    data = TensorDataset(
        torch.from_numpy(inputs.astype(np.float32)).to(device),
        torch.from_numpy(outputs.astype(np.float32)).to(device),
    )

    encoding = Encoding(dict(zip(names, inputs)))

    train_datasets = [data]
    val_dataset = [data]

    tracked_datasets = val_dataset + train_datasets

    ## Instantiate model
    model = model_type(
        encoding=encoding,
        input_size=inputs.shape[1],
        output_size=outputs.shape[1],
        hidden_dim=P,
        n_hid_layers=L,
        device=device,
        init_std=gain,
        non_linearity=nonlinearity,
    )

    ## Setup compiler
    criterion = lambda x, y: 0.5 * nn.functional.mse_loss(x, y)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    compiler = Compiler(model, criterion, optimizer)
    compiler.trackers = {
        "loss": ScalarTracker(lambda: compiler.validation(tracked_datasets)),
        "hidden": ActivationTracker(
            model,
            lambda inputs: model(inputs)[1][hidden_layer],
            datasets=tracked_datasets,
        ),
        "output": ActivationTracker(
            model, lambda inputs: model(inputs)[0], datasets=tracked_datasets
        ),
    }

    ## Training run
    compiler.training_run(
        train_datasets, tracked_datasets, n_epochs=n_epochs, batch_size=100
    )

    data_hid = compiler.trackers["hidden"].get_trace()
    data_output = compiler.trackers["output"].get_trace()
    loss = compiler.trackers["loss"].get_trace().copy()
    train_loss = loss.groupby("Epoch").mean()
    train_loss = train_loss.to_numpy().ravel()

    h_A = [
        np.array(data.loc[epoch, 0, "A"])
        for epoch, data in data_hid.query("Dataset == 0").groupby("Epoch")
    ]
    h_B = [
        np.array(data.loc[epoch, 0, "B"])
        for epoch, data in data_hid.query("Dataset == 0").groupby("Epoch")
    ]
    y_A = [
        np.array(data.loc[epoch, 0, "A"])
        for epoch, data in data_output.query("Dataset == 0").groupby("Epoch")
    ]
    y_B = [
        np.array(data.loc[epoch, 0, "B"])
        for epoch, data in data_output.query("Dataset == 0").groupby("Epoch")
    ]

    epochs = np.arange(0, len(h_A))

    y_true_A, y_true_B = outputs[0], outputs[1]
    dy2 = np.sum((y_true_B - y_true_A) ** 2)
    h2 = np.array([np.sum((h_A[epoch] - h_B[epoch]) ** 2) for epoch in epochs])
    y2 = np.array([np.sum((y_A[epoch] - y_B[epoch]) ** 2) for epoch in epochs])
    w = np.array(
        [
            y2[epoch] - np.dot(y_true_A - y_true_B, y_A[epoch] - y_B[epoch])
            for epoch in epochs
        ]
    )
    y0_mean = np.sum((0.5 * ((y_A[0] + y_B[0]) - (y_true_B + y_true_A))) ** 2)

    h0, y0, w0, dy = h2[0], y2[0], w[0], dy2

    ## Fit theory
    traj_path = "plots/2_points/trajectories/"

    eta_h_opt, eta_y_opt,_ = simulate.optimize_eta(h2, y2, w, dx2, dy2)
    eta_h, eta_y = eta_h_opt, eta_y_opt

    t_max = len(epochs)

    sol = scipy.integrate.solve_ivp(
        simulate.der,
        [0, t_max],
        [h0, y0, w0],
        args=(eta_h, eta_y, dx2, dy2),
        dense_output=True,
    )

    t = np.linspace(0, t_max, len(epochs))
    z = sol.sol(t)
    t = epochs

    eta_y_mean_opt = simulate.optimize_eta_y_mean(z, train_loss, dy2, y0_mean)

    dh = h2[1:] - h2[:-1]
    loss_theory = simulate.loss(t, z, eta_y_mean_opt, dy2, y0_mean)

    ## Save plots
    publication.set_color_mixed()
    figsize = (1.875, 1.5)

    fig, ax = plt.subplots(figsize=figsize)
    plt.plot(t, z[0], linestyle="--")
    plt.plot(t, z[1], linestyle="--")
    plt.plot(t, z[2], linestyle="--")
    plt.gca().set_prop_cycle(None)
    ax.plot(t, h2, label="$||dh||^2$")
    ax.plot(t, y2, label="$||dy||^2$")
    ax.plot(t, w, label="$w$")
    plt.xlabel("Epochs")
    with io.capture_output() as captured:
        publication.plt_show(save_path=traj_path + "ode/" + setting + ".png")

    fig, ax = plt.subplots(figsize=figsize)
    plt.gca().set_prop_cycle(None)
    ax.plot(0, 0)
    ax.plot(0, 0)
    ax.plot(t, loss_theory, label=r"loss (theory)", linestyle="--")
    ax.plot(0, 0)
    ax.plot(t, train_loss, label="loss", zorder=1)
    plt.ylim(0, 0.5)
    plt.xlabel("Epochs")
    with io.capture_output() as captured:
        publication.plt_show(save_path=traj_path + "loss/" + setting + ".png")

    return

In [4]:
etas_h, etas_y = {}, {}
for setting in settings.index:
    print(f"\t\t\t\t\t-----{setting.upper()}-----")
    make_plots(setting)

					-----DEFAULT-----


Training: 100%|██████████| 6000/6000 [02:23<00:00, 41.92steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.04280497212246946


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Loss: 0.04325391491574131
					-----SHALLOW-----


Training: 100%|██████████| 6000/6000 [00:50<00:00, 117.81steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.030687682103477033
Loss: 0.030811325440106972


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----NARROW-----


Training: 100%|██████████| 6000/6000 [00:55<00:00, 109.02steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.1902468389363724
Loss: 0.14346442517224997


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----LINEAR-----


Training: 100%|██████████| 6000/6000 [02:11<00:00, 45.59steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.20544720863987612
Loss: 0.160946831190728
					-----ELU-----


Training: 100%|██████████| 6000/6000 [02:18<00:00, 43.33steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.20262605035997272
Loss: 0.07220027775883706
					-----TANH-----


Training: 100%|██████████| 6000/6000 [02:19<00:00, 43.09steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.2115097660687202
Loss: 0.18919670512349235
					-----RELU-----


Training: 100%|██████████| 6000/6000 [02:20<00:00, 42.68steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.007393614027906311
Loss: 0.013950624316947052


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----SWISH-----


Training: 100%|██████████| 6000/6000 [02:19<00:00, 43.15steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.10994310868513552
Loss: 0.1321828504149345


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----SKIP-----


Training: 100%|██████████| 6000/6000 [02:28<00:00, 40.51steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.2236322492209698
Loss: 0.13204380340997002


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----CNN-----


Training: 100%|██████████| 6000/6000 [04:05<00:00, 24.40steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.2308950147429071
Loss: 0.21190901518408303


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----DROPOUT-----


Training: 100%|██████████| 6000/6000 [01:39<00:00, 60.35steps/s, train_loss=0.00099, val_loss=0.00012]


Loss: 0.3477046879764514
Loss: 0.1786688167779026


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----GAIN_0.9-----


Training: 100%|██████████| 6000/6000 [02:20<00:00, 42.84steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.3548988307527157
Loss: 0.12322419093455246
					-----GAIN_1.1-----


Training: 100%|██████████| 6000/6000 [02:19<00:00, 42.88steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.0185717389559153
Loss: 0.026072866889560484
					-----GAIN_1.2-----


Training: 100%|██████████| 6000/6000 [02:16<00:00, 43.84steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.04757312054260861
Loss: 0.05789194225393046


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----GAIN_1.3-----


Training: 100%|██████████| 6000/6000 [02:18<00:00, 43.46steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 0.23970898902829557
Loss: 0.01222398066023362


  model_loss = np.sum((pred - train_loss) ** 2)
  df = fun(x) - f0
  model_loss = np.sum((pred - train_loss) ** 2)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


					-----GAIN_1.4-----


Training: 100%|██████████| 6000/6000 [02:17<00:00, 43.76steps/s, train_loss=0.00000, val_loss=0.00000]


Loss: 1.0067268467850783
Loss: 0.027280790657089857
					-----GAIN_1.6-----


Training: 100%|██████████| 6000/6000 [02:17<00:00, 43.71steps/s, train_loss=0.09627, val_loss=0.09627]


Loss: 9.505330387885959
Loss: 190.4259612657725
