In [1]:
proj_path = '/home/ajhnam/projects/hidden_singles_public/'

In [2]:
import sys
sys.path.append(proj_path + 'python/')

import random
import numpy as np
import itertools
import pandas as pd
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader as DataLoader
from tqdm.auto import tqdm

from hiddensingles.misc import torch_utils as tu
from hiddensingles.misc import utils, TensorDict, TensorDictDataset, RRN

In [3]:
device = 2

In [11]:
def get_results(model, dataset, batch_size, num_steps=16, optimizer=None):
    train = optimizer is not None
    
    dataloader = DataLoader(TensorDictDataset(dataset), batch_size=batch_size, shuffle=train)
    
    losses = []
    correct = []
    for dset in dataloader:
        dset = TensorDict(**dset)
        
        if train:
            optimizer.zero_grad()
            outputs = model(dset.inputs, num_steps=num_steps)
        else:
            with torch.no_grad():
                outputs = model(dset.inputs, num_steps=num_steps)
        outputs = outputs.view(-1, num_steps, model.max_digit, model.max_digit, model.max_digit)
        targets = tu.expand_along_dim(dset.targets, 1, num_steps)
        loss = tu.cross_entropy(outputs, targets)
        
        if train:
            loss.backward()
            optimizer.step()
        
        # record
        losses.append(loss.item())
        correct.append((outputs.argmax(-1) == targets)[:,-1])
    
    correct = torch.cat(correct)
    loss = torch.tensor(losses).mean()
    accuracy = correct.float().mean().cpu()
    solved = correct.all(-1).all(-1).float().mean().cpu()
    
    results = TensorDict(loss=loss,
                         accuracy=accuracy,
                         solved=solved)
    return results

In [5]:
# Load data

train_df = pd.read_csv(proj_path + 'data/rrn/train.csv', names=['input', 'target'])
valid_df = pd.read_csv(proj_path + 'data/rrn/valid.csv', names=['input', 'target'])
test_df = pd.read_csv(proj_path + 'data/rrn/test.csv', names=['input', 'target'])

inputs = torch.tensor(np.array([list(s) for s in train_df.input], dtype=int), device=device).view(-1, 9, 9)
targets = torch.tensor(np.array([list(s) for s in train_df.target], dtype=int), device=device).view(-1, 9, 9) - 1
train_dset = TensorDict(inputs=inputs, targets=targets)
inputs = torch.tensor(np.array([list(s) for s in valid_df.input], dtype=int), device=device).view(-1, 9, 9)
targets = torch.tensor(np.array([list(s) for s in valid_df.target], dtype=int), device=device).view(-1, 9, 9) - 1
valid_dset = TensorDict(inputs=inputs, targets=targets)
inputs = torch.tensor(np.array([list(s) for s in test_df.input], dtype=int), device=device).view(-1, 9, 9)
targets = torch.tensor(np.array([list(s) for s in test_df.target], dtype=int), device=device).view(-1, 9, 9) - 1
test_dset = TensorDict(inputs=inputs, targets=targets)

In [29]:
model = RRN(digit_embed_size=10,
            num_mlp_layers=0,
            hidden_vector_size=96,
            message_size=96,
            encode_coordinates=False).to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-4, weight_decay=1e-4)

In [30]:
num_steps = 16
batch_size = 50
num_epochs = 0
print_epochs = 1

tr_result = get_results(model, train_dset, batch_size=batch_size, num_steps=num_steps)
te_result = get_results(model, test_dset, batch_size=batch_size, num_steps=num_steps)
tr_results = [tr_result]
te_results = [te_result]

for epoch in tqdm(range(num_epochs)):
    tr_result = get_results(model, train_dset, batch_size=batch_size, num_steps=num_steps, optimizer=optimizer)
    tr_results.append(tr_result)
    te_result = get_results(model, test_dset, batch_size=batch_size, num_steps=num_steps)
    te_results.append(te_result)

    if epoch % print_epochs == 0:
        utils.kv_print(epoch=epoch, loss=tr_result.loss,
                       tr_acc=tr_result.accuracy, tr_sol=tr_result.solved,
                       te_acc=te_result.accuracy, te_sol=te_result.solved)
        
tr_results = TensorDict.stack(tr_results, 0)
te_results = TensorDict.stack(te_results, 0)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [33]:
tr_df = tr_results.to_dataframe({0: 'epoch'})
tr_df['dataset'] = 'train'
te_df = te_results.to_dataframe({0: 'epoch'})
te_df['dataset'] = 'test'
df = pd.concat([tr_df, te_df])

df.to_csv(proj_path + "data/rrn/sudoku_3x3_results.tsv", sep='\t', index=False)