In [1]:
import random

import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import trange

import torch
from torch import nn

source = "../source"
sys.path.append(source)


from data import seq_data
from preprocessing import OneHot
from compilation import Compiler, ScalarTracker, ActivationTracker
from data_analysis.automata import to_automaton_history, reduce_automaton, to_automaton
from visualization.animation import SliderAnimation
from visualization.activations import ActivationsAnimation
from visualization.automata import AutomatonAnimation, display_automata
from visualization.epochs import EpochAnimation

from model import Model
import publication

is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

random.seed(0)
torch.manual_seed(0)

GPU not available, CPU used


<torch._C.Generator at 0x7f44528a9330>

In [2]:
nonlinearity, lr, P, L, n_epochs = "tanh", 0.001, 100, 1, 10000

In [3]:
def trial(gain, seq_len):
    ## Generate data

    # Define problem and data encoding
    symbols = [0, 1]
    encoding = OneHot(symbols)
    problem = lambda seq: np.sum(seq) % 2  # XOR problem

    # Define sequence lengths for training and validation datasets
    train_seq_lengths = list(range(1, seq_len + 1))
    analysis_seq_lengths = train_seq_lengths
    val_seq_length = 50
    val_datapoints = 100

    # Generate datasets
    training_datasets = [
        seq_data(device, problem, encoding, seq_len=length)
        for length in train_seq_lengths
    ]
    validation_datasets = [
        seq_data(
            device,
            problem,
            encoding,
            n_datapoints=val_datapoints,
            seq_len=val_seq_length,
        )
    ]
    analysis_data = [
        seq_data(device, problem, encoding, seq_len=length)
        for length in analysis_seq_lengths
    ]
    tracked_datasets = validation_datasets + analysis_data + training_datasets

    ## Instantiate model
    model = Model(
        encoding=encoding,
        input_size=2,
        output_size=2,
        hidden_dim=P,
        n_layers=L,
        device=device,
        nonlinearity=nonlinearity,
        gain=gain,
    )

    ## Setup compiler
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(
        model.parameters(), lr=lr, amsgrad=True, weight_decay=0.01
    )
    compiler = Compiler(model, criterion, optimizer)
    compiler.trackers = {
        "loss": ScalarTracker(lambda: compiler.validation(tracked_datasets)),
    }

    ## Training run
    compiler.training_run(
        training_datasets,
        n_epochs=n_epochs,
        batch_size=128,
        progress_bar=False,
    )

    ## Collect data
    loss = compiler.trackers["loss"].get_trace()
    val_loss = loss.query("Dataset==0")[0].to_numpy()[-1]
    train_loss = loss.query("Dataset>0").groupby("Epoch").mean().to_numpy()[-1]
    initial_hidden = model.init_hidden(batch_size=1)[-1]
    hidden_function = lambda inputs: model(inputs)[1][-1]
    output_function = lambda inputs: model(inputs)[0]
    automaton = to_automaton(
        hidden_function,
        output_function,
        initial_hidden,
        training_datasets,
        encoding,
        merge_distance_frac=0.1,
    )
    n_states = len(automaton.states)

    return train_loss, val_loss, n_states

In [None]:
N_gain, N_len = 10, 10

gains = np.linspace(0.1, 2.0, N_gain)
seq_lens = np.linspace(1, 10, N_len, dtype=int)
gain_data, seq_len_data, train_loss_data, val_loss_data, n_states_data = (
    [],
    [],
    [],
    [],
    [],
)

for gain in gains:
    for seq_len in seq_lens:

        train_loss, val_loss, n_states = trial(gain, seq_len)
        for data, item in zip(
            (gain_data, seq_len_data, train_loss_data, val_loss_data, n_states_data),
            (gain, seq_len, train_loss, val_loss, n_states),
        ):
            data.append(item)

Training: 100%|██████████| 10000/10000 [02:20<00:00, 71.15steps/s, train_loss=0.00010, val_loss=0.65862]
Computing automata: 100%|██████████| 1/1 [00:00<00:00, 531.06it/s]
Training: 100%|██████████| 10000/10000 [02:54<00:00, 57.15steps/s, train_loss=0.00228, val_loss=0.40487]
Computing automata: 100%|██████████| 1/1 [00:00<00:00, 334.13it/s]
Training:  13%|█▎        | 1298/10000 [00:27<04:25, 32.82steps/s, train_loss=0.00804, val_loss=0.54798]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Training: 100%|██████████| 10000/10000 [03:35<00:00, 46.43steps/s, train_loss=0.00430, val_loss=0.38639]
Computing automata: 100%|██████████| 1/1 [00:00<00:00, 262.52it/s]
Training: 100%|██████████| 10000/10000 [04:26<00:00, 37.59steps

In [None]:
for data, name in zip(
    (gain_data, seq_len_data, train_loss_data, val_loss_data, n_states_data),
    ("gain", "seq_len", "train_loss", "val_loss", "n_states"),
):
    np.save(name, data)

In [None]:
## Plot results
train_loss_grid = np.array(train_loss_data).reshape(N_gain, N_len)
fig = plt.figure(figsize=(4, 4))
publication.set_color_gradient(2)
plt.imshow(train_loss_grid)
publication.im_show(colorbar=False, x_labels=np.around(gains, 1), y_labels=seq_lens,save_path="train_loss_grid")

val_loss_grid = np.array(val_loss_data).reshape(N_gain, N_len)
fig = plt.figure(figsize=(4, 4))
publication.set_color_gradient(2)
plt.imshow(train_loss_grid)
publication.im_show(colorbar=False, x_labels=np.around(gains, 1), y_labels=seq_lens,save_path="val_loss_grid")

n_states_grid = np.array(n_states_data).reshape(N_gain, N_len)
fig = plt.figure(figsize=(4, 4))
publication.set_color_gradient(2)
plt.imshow(train_loss_grid)
publication.im_show(colorbar=False, x_labels=np.around(gains, 1), y_labels=seq_lens,save_path="n_states_grid")