In [1]:
import torch
from torch import nn
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split

In [2]:
from xynn.attntcn import AttnTCNClassifier
from xynn.embedding import LinearEmbedding

In [3]:
AttnTCNClassifier.diagram()


X_num ─ Num. embedding ┐ ┌─ Attn ─ TCN ─ MLP ─┐
                       ├─┤                    w+ ── output
X_cat ─ Cat. embedding ┘ └──────── MLP ───────┘

splits are copies and joins are concatenations;
'w+' is weighted element-wise addition;
"Attn" indicates 1 or more of AutoInt's AttentionInteractionLayer



In [4]:
SEED = 34589

# Load MNIST data

In [5]:
def data_generator(root):
    train_set = datasets.MNIST(root=root, train=True, download=True,
                               transform=transforms.Compose([
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.1307,), (0.3081,))
                               ]))
    test_set = datasets.MNIST(root=root, train=False, download=True,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307,), (0.3081,))
                              ]))

    X_train, y_train = zip(*train_set)
    X_valid, y_valid = zip(*test_set)
    
    y_train = torch.tensor(y_train)
    y_valid = torch.tensor(y_valid)

    X_num_train = torch.cat([x.flatten(start_dim=1) for x in X_train], dim=0)
    X_num_valid = torch.cat([x.flatten(start_dim=1) for x in X_valid], dim=0)

    X_cat_train = None
    X_cat_valid = None

    return (X_num_train, X_cat_train, y_train), (X_num_valid, X_cat_valid, y_valid)

In [6]:
train, valid = data_generator(root="../data/")

In [7]:
X_num_train, X_cat_train, y_train = train
X_num_valid, X_cat_valid, y_valid = valid

# Model

In [8]:
def accuracy(y_pred, y_true):
    y_pred = torch.argmax(y_pred, dim=1)
    acc = torch.eq(y_pred, y_true).to(dtype=torch.int).sum()
    return 100 * acc / y_pred.shape[0]

In [9]:
model = AttnTCNClassifier(
    embedding_num=LinearEmbedding(1),
    attn_embedding_size=5,
    attn_num_layers=2,
    tcn_output="non-temporal",
    tcn_hidden_sizes=[10] * 2,
    tcn_kernel_size=3,
    mlp_hidden_sizes=(512, 256, 128, 64),
    seed=SEED,
    #device="cuda",
)

In [10]:
model.fit(
    X_num=X_num_train,
    X_cat=X_cat_train,
    y=y_train,
    optimizer=torch.optim.Adam,
    opt_kwargs={"lr": 1e-2},
    scheduler=torch.optim.lr_scheduler.StepLR,
    sch_kwargs={"step_size": 5, "gamma": 0.1 ** 0.125},
    val_sets=[[X_num_valid, X_cat_valid, y_valid]],
    extra_metrics=[("accuracy", accuracy)],
    num_epochs=5,
    batch_size=2048,
    early_stopping_patience=10,
    early_stopping_metric="accuracy",
    early_stopping_mode="max",
    warm_start=True,
    verbose=True,
)

epoch  lrn rate  non-mlp  train loss   val loss   accuracy
──────────────────────────────────────────────────────────
    0    0.0100     0.43      0.1818     0.3980      87.12         
    1    0.0100     0.42     0.09618     0.1440      95.74          
    2    0.0100     0.41     0.06713     0.1030      96.84          
    3    0.0100     0.40     0.06557    0.09986      96.94          
    4    0.0100     0.40     0.04659     0.1146      96.78          
