In [1]:
import torch
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
from boardGPT.datasets import GameDataset, collate_fn, infinite_loader
from boardGPT.models import GameGPT
from transformers import AutoTokenizer

## Load model and tokenizer

In [23]:
model, model_config = GameGPT.from_pretrained(repo_id="theartificialis/OthelloGPT-Synthetic-20m")
tokenizer = AutoTokenizer.from_pretrained("theartificialis/OthelloGPT-Synthetic-20m", subfolder="tokenizer")
model = model.to('cuda')
model.eval()

GameGPT(
  (token_emb_hook): HookPoint()
  (pos_emb_hook): HookPoint()
  (pre_logits_hook): HookPoint()
  (transformer): ModuleDict(
    (wte): Embedding(61, 512)
    (wpe): Embedding(60, 512)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-7): 8 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (qk_hook): HookPoint()
          (v_hook): HookPoint()
          (c_attn): Linear(in_features=512, out_features=1536, bias=False)
          (c_proj): Linear(in_features=512, out_features=512, bias=False)
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=512, out_features=2048, bias=False)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=2048, out_features=512, bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    

## Loading dataset

In [24]:
val_dataset = GameDataset(
    data_dir="../../data/othello/othello-synthetic",
    split="val"
)

In [25]:
# Create a dataloader
val_dataloader = torch.utils.data.DataLoader(
    dataset=val_dataset,
    batch_size=512,
    num_workers=8,
    pin_memory=True,
    shuffle=True,
    drop_last=False,
    collate_fn=lambda b: collate_fn(b, tokenizer)
)

In [26]:
val_data_iter = infinite_loader(val_dataloader)

In [47]:
from boardGPT import othello

invalid_moves = []

for iter_num in range(10000):
    # Get the first batch
    X, Y = next(val_data_iter)
    X, Y = X.to('cuda'), Y.to('cuda')

    # Forward pass
    with torch.no_grad():
        _, logits, loss, _ = model(X)
    # end with

    # For each sequence
    for bi in range(logits.size(0)):
        # The sequence
        Xs = X[bi]
        preds = logits[bi]
        pred_token = torch.argmax(preds).item()
        pred_ids = Xs.tolist() + [pred_token]

        # Decode
        pred_moves = tokenizer.decode(pred_ids, skip_special_tokens=True)

        # Check game validity
        try:
            othello(pred_moves)
        except ValueError as e:
            print(f"{pred_moves}")
            print("")
            pass
        # end try
    # end for
# end for

d3 c5 d6 c7 b6 c3 e3 b4 b5 b7 e7 f2 b3 f8 b8 a7 f5 a5 c4 d2 a6 g5 a4 f4 f3 e2 g6 g2 h5 b2 c6 f6 a8 h6 d7 e6 c2 c8 a1 b1 h7 d8 e8

c4 c5 d6 c7 c6 b6 d7 b5 b4 c3 d3 f3 b2 c2 a4 e7 e8 a2 a7 b7 f7 f8 a1 f6 d8 a3 e6 b8 a5 c1 f5 f4 d2 b3 g3

e6 d6 c5 f4 d7 b6 d3 e3 g3 f6 d2 e7 f3 e2 f7 h2 f5 c4 b4 d1 c1 e8 c3 g5 g7 b1 f1 b5 f8 g4 a6 a5 g6 a3 e1 g1 h6 d8 a4 b3 c2 a2 h4 h3 h1 h7 g8 a7 c7 h8 h5 b7 b8 c6 g2 c8 a1 a8 b2

e6 f6 g6 e7 f7 d6 d8 f8 c4 g5 f5 c5 f4 c3 b5 a5 c7 e3 g4 h4 g3 h3 e2 h5 b3 d7 c2 b7 a7 h6 h7 e8 g7 b8 h2 c1 d1 f3 d3 e1 b4 d2 b2 a4 f1 h8 f2 c6 a6 a3 a1 c8 b1 h1 a2 g2 g1 b6 a8

f5 f4 c3 g6 g5 f6 e7 c6 g4 h5 h6 f7 h7 h8 b7 h4 f8 g7 g8 e8 c4 g3 d8 b4 f3 e6

e6 d6 c6 f4 c3 c4 g3 d7 c5 f5 c7 g4 f3 b7 e3 d3 e8 c8 h4 e2 g5 c2 f7 h2 b3 b4 d1 b1 a5 h5 b6 h3 b8 g8 f6 g6 b5 a7 h7 a2 a8 e1

f5 f6 c4 e3 d3 f4 e2 c5 b6 f3 g5 e6 f2 c6 c7 b7 a8 h5 d6 e1 g7 f1 h4 a7 h6 b5 a5 h8 f8 b8 d7 d8 c8 a6 e8 b4 g4 f7 d1 a4 a3 g3 c3 d2 h2 b2 e7 g6 a1

f5 f4 c3 e6 f3 c4 e7 f7 c5 e8 d7 c7 g8 g6 h7 e3 d3 h5 