In [3]:
import os, sys
from tqdm import trange

import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy

import torch
from torch import nn
from torch.utils.data import TensorDataset

source = "/home/loek/projects/rnn/source"
sys.path.append(source)

from data import fun_data, grid_data
from preprocessing import Direct, Encoding, OneHot
from compilation import Compiler, Tracker, ScalarTracker, ActivationTracker
from activations import get_activations
from data_analysis.automata import to_automaton_history
from data_analysis.visualization.animation import SliderAnimation
from data_analysis.visualization.activations import (
    ActivationsAnimation,
    FunctionAnimation,
)
from data_analysis.visualization.automata import AutomatonAnimation
from data_analysis.visualization.epochs import EpochAnimation

from models import MLP

import cProfile
import pstats


is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    print("GPU available")
else:
    device = torch.device("cpu")
    print("GPU not available")

device = torch.device("cpu")

GPU available


In [4]:
## Load data
data_path = "/home/loek/projects/rnn/DNN/data/"

NA, NB = 100, 5
classA = [f"A{n}" for n in np.arange(1, NA + 1)]
classB = [f"B{n}" for n in np.arange(1, NB + 1)]
A = pd.DataFrame(
    data=([1, 0] * np.ones(shape=(NA, 2))).T, index=["A", "B"], columns=classA
)
B = pd.DataFrame(
    data=([0, 1] * np.ones(shape=(NB, 2))).T, index=["A", "B"], columns=classB
)
classes = A.join(B)


names = classes.columns.to_numpy()

encoding = OneHot(names.tolist())

inputs = encoding(names)
outputs = np.array([list(classes[index]) for index in classes])

# Setup training data
inputs = torch.from_numpy(inputs.astype(np.float32)).to(device)
outputs = torch.from_numpy(outputs.astype(np.float32)).to(device)

dataset = TensorDataset(inputs, outputs)
train_datasets = [dataset]
val_dataset = [dataset]

tracked_datasets = val_dataset + train_datasets

In [5]:
gain = 0.8
P = 100
L = 50

In [6]:
## Instantiate model
model = MLP(
    encoding=encoding,
    input_size=inputs.shape[1],
    output_size=outputs.shape[1],
    hidden_dim=P,
    n_hid_layers=L,
    device=device,
    init_std=gain,
)

In [7]:
## Setup compiler

# Define hyperparameters
n_epochs = 1000
lr = 0.00005

# Define Loss, Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
compiler = Compiler(model, criterion, optimizer)
compiler.trackers = {
    "loss": ScalarTracker(lambda: compiler.validation(tracked_datasets)),
    "hidden": ActivationTracker(
        model, lambda inputs: model(inputs)[1][int(len(model) / 2)]
    ),
    "hidden_early": ActivationTracker(
        model, lambda inputs: model(inputs)[1][int(len(model) * (1 / 10))]
    ),
    "hidden_late": ActivationTracker(
        model, lambda inputs: model(inputs)[1][int(len(model) * (9 / 10))]
    ),
    "output": ActivationTracker(model, lambda inputs: model(inputs)[0]),
}

In [8]:
## Training run
compiler.training_run(
    train_datasets, tracked_datasets, n_epochs=n_epochs, batch_size=100
)

Training:   9%|▉         | 93/1000 [00:12<02:02,  7.38steps/s, train_loss=0.20202, val_loss=0.04577]


In [9]:
## Get all activations
activations = []

for layer in range(len(model) - 1):
    act = get_activations(val_dataset, lambda inputs: model(inputs)[1][layer], encoding)
    activations.append(act)

index_names = activations[0].index.names
activations = pd.concat(activations, keys=list(range(len(activations))))
activations.index = activations.index.set_names(["Epoch"] + index_names)

animation = SliderAnimation(
    [ActivationsAnimation(activations, transform="PCA_per_epoch", plot_labels=True)],
    parameters=list(set(activations.index.get_level_values("Epoch"))),
    parameter_name="Layer",
    fig_size=4,
)

  label.loc[index] = index


AttributeError: 'list' object has no attribute 'items'

<Figure size 800x400 with 0 Axes>

In [10]:
## Visualize representation dynamics
data_hid_early = compiler.trackers["hidden_early"].get_trace()
data_hid_late = compiler.trackers["hidden_late"].get_trace()
data_output = compiler.trackers["output"].get_trace()
query = "Epoch >= 0"
data_hid_early = data_hid_early.query(query).copy()
data_hid_late = data_hid_late.query(query).copy()
data_output = data_output.query(query).copy()

loss = compiler.trackers["loss"].get_trace()
val_loss = loss.query("Dataset==0")[0].to_numpy()
train_loss = loss.query("Dataset>0").groupby("Epoch").mean()

# weight_change = compiler.trackers["weight change"].get_trace().to_numpy().reshape(-1)

animation = SliderAnimation(
    [
        ActivationsAnimation(
            data_hid_early.query("Dataset != 0"), transform="PCA", plot_labels=True
        ),
        ActivationsAnimation(
            data_hid_late.query("Dataset != 0"), transform="PCA", plot_labels=True
        ),
        ActivationsAnimation(
            data_output.query("Dataset != 0"),
            transform="PCA",
            plot_labels=True,
        ),
        EpochAnimation(
            graphs={
                "Training loss": train_loss,
                "Validation loss": val_loss,
            },
            unitless_graphs={
                # "weight change": weight_change,
            },
            # x_bounds=(0, 800),
            # y_bounds=(0, 1),
        ),
    ],
    parameters=list(set(data_output.index.get_level_values("Epoch"))),
    parameter_name="Epoch",
    fig_size=4,
)

  label.loc[index] = index
  label.loc[index] = index
  label.loc[index] = index


AttributeError: 'list' object has no attribute 'items'

<Figure size 800x800 with 0 Axes>

In [11]:
## Plot reduction
x = data_hid_early.query("Epoch == 0 and Dataset == 0").to_numpy()
y = data_output.query("Epoch == 0 and Dataset == 0").to_numpy()
D2 = 0
for i, j in zip(x, y):
    D2 += ((np.linalg.norm(j) / np.linalg.norm(i)) ** 2) / x.shape[0]
print(f"D0^2: {D2}")

m = np.linspace(0, 1, L)

base = np.exp((np.log(D2)) * m)

D0^2: 2.0525343429121885e-25


In [12]:
def dist2(act1: pd.DataFrame, act2: pd.DataFrame):
    distances = []
    for (epoch, data1_epoch), (epoch, data2_epoch) in zip(
        act1.groupby("Epoch"), act2.groupby("Epoch")
    ):
        distance = 0
        for input_1, act_1 in data1_epoch.groupby("Input"):
            for input_2, act_2 in data2_epoch.groupby("Input"):
                distance += np.linalg.norm(act_1.to_numpy() - act_2.to_numpy()) / (
                    len(data1_epoch) * len(data2_epoch) - 1
                )
        distances.append(distance)
    distances2 = np.array([dist**2 for dist in distances])

    return distances2


## Compute distances
actA = activations.loc[:, :, classA]
actB = activations.loc[:, :, classB]

dist2A = dist2(actA, actA)
dist2B = dist2(actB, actB)
dist2AB = dist2(actA, actB)

KeyboardInterrupt: 

In [None]:
plt.plot(dist2A / base)
plt.plot(dist2B / base)
plt.plot(dist2AB / base)

In [None]:
plt.plot(dist2A / dist2AB, label="A")
plt.plot(dist2B / dist2AB, label="B")
# plt.plot(np.exp(-(NA) * D2 ** (1 - m)), label="A (theory)")
# plt.plot(np.exp(-(NB) * D2 ** (1 - m)), label="B (theory)")
plt.plot(np.exp(-m / (1 - m) * ((D2 ** (1 - m)))), label="theory")
plt.plot(np.exp(m / (1 - m) * (1 - (D2 ** (-m)))), label="theory")
plt.legend(loc="upper right")
plt.show()

In [None]:
name = "Partial Out-of-dataset generalization"
# animation.to_gif("plots/" + name, step_size=int(len(train_loss) / 1000))