In [1]:
# reload magic
%load_ext autoreload
%autoreload 2

In [20]:
# Import deterministic.py using local file path
import sys
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('../sequence_generators')
import make_datasets
sys.path.append('../entropy')
import entropy

In [40]:
# 1. Import the datasets we will be using across models
n_train = 200
n_val = 50
n_bits = 32
# p_bitflip = 0.15
seed = 334
k = 4
noisy_transition_matrix = {0: 0.15, 1: 0.96, 2: 0.94, 3: 0.10, 4: 0.08}

p_x_conditional = entropy.one_prob_to_conditional(noisy_transition_matrix, k) 

In [42]:
# Step 2: Compute the entropy rate for this dataset
# TODO: how do we determine m?
p_S = np.ones(shape=(2,)*k) / (2 ** k)
m = 1
H_m_ahead_4lookback = entropy.conditional_H_of_xnplusm_given_kbits_klookback(n_bits, m, k, p_S, p_x_conditional)
print(H_m_ahead_4lookback)


0.381112052756314


In [87]:
# Step 3: Train a recurrent neural network on this dataset
import torch
import torch.nn as nn
from torch.nn import functional as F

class BinaryRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(BinaryRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out)
        return out


In [78]:
def get_data(split):
    X, _ = make_datasets.k_lookback_weight_dataset(noisy_transition_matrix, k, n_train+n_val, n_bits, 0, seed)
    if split == 'train':
        train_data = X[:n_train]
        train_data = torch.tensor(train_data).float()
        # train_data = train_data.unsqueeze(0)
        train_data = train_data.unsqueeze(-1) # feature dimension is 1 for bits
        return train_data
    elif split == 'val':
        val_data = X[n_train:]
        val_data = torch.tensor(val_data).float()
        # val_data = val_data.unsqueeze(0)
        val_data = val_data.unsqueeze(-1) # feature dimension is 1 for bits
        return val_data

In [92]:
def estimate_loss(verbose=False):
    # Average the loss over many batches. Hardcoded cross_entropy loss
    # Needs to be in same namespace as model and get_batch
    out = {}
    model.eval()
    for split in ['train', 'val']:
        
        data = get_data(split)
        X = data[:,:-1,:]
        Y = data[:,-1,:]
        output = model(X)
        logits = torch.sigmoid(output[:, -1, :])
        loss = F.binary_cross_entropy_with_logits(output[:, -1, :], Y)
        if verbose:
            preds = torch.argmax(logits.reshape(batch_size, N_BITS + 1, tgt_vocab_size), dim=2)
            for i in range(len(X)):
                print(f"input: {X[i]}")
                print(f"target: {tgt_out[i]}")
                print(f"predicted: {preds[i]}")
                print()
        loss = loss.item()
        out[split] = loss
    model.train()
    return out

In [93]:
def train(model, data, epochs, criterion, optimizer, eval_inter=50):
    model.train()
    # training involves next-bit prediction.
    for epoch in range(epochs):
        for i, seq in enumerate(data):
            seq = seq.unsqueeze(-1)  # Adding feature dimension
            # print(seq.shape)
            inputs = seq[:, :-1]  # All but the last bit
            targets = seq[:, 1:]  # All but the first bit

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        if epoch % eval_inter == 0:
            losses = estimate_loss()
            print(f'Epoch [{epoch+1}/{epochs}]', losses)


In [94]:
hidden_size = 32
num_layers = 1
lr = 0.001
n_epochs = 2000


model = BinaryRNN(1, hidden_size, 1, num_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_data, n_epochs, criterion, optimizer, eval_inter=50)

Epoch [1/2000] {'train': 0.7018217444419861, 'val': 0.6965770125389099}
Epoch [51/2000] {'train': 0.5417907238006592, 'val': 0.554243266582489}
Epoch [101/2000] {'train': 0.3833094835281372, 'val': 0.3069271743297577}
Epoch [151/2000] {'train': 0.3639773428440094, 'val': 0.2917017638683319}
Epoch [201/2000] {'train': 0.35240811109542847, 'val': 0.2819930911064148}
Epoch [251/2000] {'train': 0.3421016037464142, 'val': 0.27489736676216125}
Epoch [301/2000] {'train': 0.32974743843078613, 'val': 0.2673196494579315}
Epoch [351/2000] {'train': 0.31663206219673157, 'val': 0.2570190727710724}
Epoch [401/2000] {'train': 0.307634562253952, 'val': 0.2505490779876709}
Epoch [451/2000] {'train': 0.2992391586303711, 'val': 0.24654994904994965}
Epoch [501/2000] {'train': 0.29195377230644226, 'val': 0.24315164983272552}
Epoch [551/2000] {'train': 0.2884753942489624, 'val': 0.2395324856042862}
Epoch [601/2000] {'train': 0.28489232063293457, 'val': 0.22818109393119812}
Epoch [651/2000] {'train': 0.28469

In [None]:
val_data = torch.tensor(val_data).float()


In [None]:
def predict(model, start_sequence, num_future_bits):
    model.eval()
    inputs = torch.tensor(start_sequence, dtype=torch.float32).unsqueeze(0).unsqueeze(-1)
    predictions = []

    with torch.no_grad():
        for _ in range(num_future_bits):
            output = model(inputs)
            next_bit = torch.round(torch.sigmoid(output[:, -1, :])).item()
            predictions.append(next_bit)
            next_input = torch.tensor([[next_bit]], dtype=torch.float32)
            inputs = torch.cat((inputs, next_input.unsqueeze(-1)), dim=1)
    
    return predictions
