In [6]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from tqdm import tqdm
from typing import Final
import itertools

ID_TO_NAME: Final[dict] = {
    0: 'actr_lag2_decay',
    1: 'adddriftbot2',
    2: 'addshiftbot3',
    3: 'antiflatbot',
    4: 'antirotnbot',
    5: 'biopic',
    6: 'boom',
    7: 'copybot',
    8: 'debruijn81',
    9: 'driftbot',
    10: 'flatbot3',
    11: 'foxtrotbot',
    12: 'freqbot2',
    13: 'granite',
    14: 'greenberg',
    15: 'halbot',
    16: 'inocencio',
    17: 'iocainebot',
    18: 'marble',
    19: 'markov5',
    20: 'markovbails',
    21: 'mixed_strategy',
    22: 'mod1bot',
    23: 'multibot',
    24: 'peterbot',
    25: 'phasenbott',
    26: 'pibot',
    27: 'piedra',
    28: 'predbot',
    29: 'r226bot',
    30: 'randbot',
    31: 'robertot',
    32: 'rockbot',
    33: 'rotatebot',
    34: 'russrocker4',
    35: 'shofar',
    36: 'sunCrazybot',
    37: 'sunNervebot',
    38: 'sweetrock',
    39: 'switchalot',
    40: 'switchbot',
    41: 'textbot',
    42: 'zq_move',
}
NAME_TO_ID: Final[dict] = {v: k for k, v in ID_TO_NAME.items()}

WINDOW_SIZE = 200
num_win_per_series = 1000-WINDOW_SIZE

In [7]:
class RPSLSTM(nn.Module):
    def __init__(self, vocab_size=3, emb_dim=8, hidden_size=64):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hidden_size, num_layers=1, batch_first=True)
        self.fc = nn.Linear(hidden_size, 3)  # logits for R,P,S

    def forward(self, x, hidden=None):
        # x: (batch, T) of ints 0/1/2
        emb = self.embed(x)
        out, hidden = self.lstm(emb, hidden)  # out: (batch, T, hidden)
        last = out[:, -1, :]
        logits = self.fc(last)                 # (batch, T, 3)
        return logits, hidden

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split

PAD_TOKEN = 3  # 0,1,2 are moves; 3 is padding


class RRPSDataset(Dataset):
    def __init__(self, npz_path, seq_len=200):
        """
        npz_path: path to rrps_lstm_data.npz
        seq_len:  length of input context window
        """
        data = np.load(npz_path, allow_pickle=True)
        episodes = data["episodes"]  # array of lists
        self.seq_len = seq_len
        self.samples = []  # list of (context_seq, target_move)

        for ep in episodes:
            moves = list(ep)
            if len(moves) < 2:
                continue  # too short, skip

            # For each position t, predict move at t using moves before t
            for t in range(1, len(moves)):
                target = moves[t]
                # context = last `seq_len` moves before t
                start = max(0, t - seq_len)
                ctx = moves[start:t]

                # left-pad with PAD_TOKEN to fixed length
                pad_len = seq_len - len(ctx)
                ctx = [PAD_TOKEN] * pad_len + ctx

                self.samples.append(
                    (np.array(ctx, dtype=np.int64), int(target))
                )

        print(f"Loaded {len(episodes)} episodes, built {len(self.samples)} samples.")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        ctx, tgt = self.samples[idx]
        # return as torch tensors
        return torch.tensor(ctx, dtype=torch.long), torch.tensor(tgt, dtype=torch.long)

In [None]:
dataset = RRPSDataset("rrps_lstm_data.npz", seq_len=200)

# Split into train / val
num_samples = len(dataset)
train_size = int(0.9 * num_samples)
val_size = num_samples - train_size
train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=64, shuffle=False)

In [None]:
class FocalLoss(nn.Module):
    """
    This class implements the focal loss function for multi-class classification. The focal loss provided
    by torch seems to only be for single class. This implementation is based on the one provided here:
    https://discuss.pytorch.org/t/focal-loss-for-imbalanced-multi-class-classification-in-pytorch/61289
    Some of the unnecessary functionality from the example has been removed.
    """
    def __init__(self, gamma: float = 2) -> None:
        super().__init__()
        self.gamma = gamma
    
    def forward(self, outputs, targets):
        ce_loss = torch.nn.functional.cross_entropy(outputs, targets, reduction='none') # important to add reduction='none' to keep per-batch-item loss
        pt = torch.exp(-ce_loss)
        focal_loss = ((1-pt)**self.gamma * ce_loss).mean() # mean over the batch
        return focal_loss  
    
loss_fn = FocalLoss()
    

def train_lstm(
    npz_path="rrps_lstm_data.npz",
    seq_len=200,
    batch_size=64,
    num_epochs=10,
    lr=1e-3,
    val_split=0.1,
    model_save_path="rps_lstm.pt",
):

    if torch.backends.mps.is_available():
        device = torch.device("mps")
        print("Using MPS (Apple Silicon GPU)")
    elif torch.cuda.is_available():
        device = torch.device("cuda")
        print("Using CUDA GPU")
    else:
        device = torch.device("cpu")
        print("Using CPU")
    print(f"[Train] Using device: {device}")

    # 1. Dataset & loaders
    dataset = RRPSDataset(npz_path, seq_len=seq_len)
    n_total = len(dataset)
    n_val = int(val_split * n_total)
    n_train = n_total - n_val

    train_ds, val_ds = random_split(dataset, [n_train, n_val])
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

    print(f"[Train] Train samples: {n_train}, Val samples: {n_val}")

    # 2. Model, optimizer
    model = RPSLSTM(vocab_size=4, emb_dim=16, hidden_size=64).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0.0

    for epoch in range(1, num_epochs + 1):
        # -------- TRAIN --------
        model.train()
        total_loss = 0.0
        total_correct = 0
        total_count = 0

        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device, dtype=torch.long).long().view(-1)
            # print(x.shape)

            optimizer.zero_grad()
            logits, _ = model(x)         # (B, 3)
            # logits = logits.view(-1, 3)
            if logits.dim() != 2 or logits.size(1) != 3:
                raise RuntimeError(f"Expected logits shape (B,3), got {tuple(logits.shape)}")
            if y.dim() != 1 or y.size(0) != logits.size(0):
                raise RuntimeError(f"Expected target shape ({logits.size(0)},), got {tuple(y.shape)}")
            loss = loss_fn(logits, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * x.size(0)
            preds = logits.argmax(dim=1)
            total_correct += (preds == y).sum().item()
            total_count += x.size(0)

        train_loss = total_loss / total_count
        train_acc = total_correct / total_count

        # -------- VALIDATION --------
        model.eval()
        val_loss_total = 0.0
        val_correct = 0
        val_count = 0

        with torch.no_grad():
            for x, y in val_loader:
                x = x.to(device)
                y = y.to(device)

                logits, _ = model(x)
                loss = loss_fn(logits, y)

                val_loss_total += loss.item() * x.size(0)
                preds = logits.argmax(dim=1)
                val_correct += (preds == y).sum().item()
                val_count += x.size(0)

        val_loss = val_loss_total / val_count if val_count > 0 else 0.0
        val_acc = val_correct / val_count if val_count > 0 else 0.0

        print(
            f"[Epoch {epoch}/{num_epochs}] "
            f"Train loss: {train_loss:.4f}, acc: {train_acc:.4f} | "
            f"Val loss: {val_loss:.4f}, acc: {val_acc:.4f}"
        )

        # Save best model by validation accuracy
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), model_save_path)
            print(f"  -> New best val_acc={best_val_acc:.4f}. Saved to {model_save_path}")

    print(f"[Train] Done. Best val_acc={best_val_acc:.4f}")


# =========================
#  Run training
# =========================
train_lstm(
    npz_path="rrps_lstm_data.npz",
    model_save_path="rps_lstm.pt",
)

Using MPS (Apple Silicon GPU)
[Train] Using device: mps
Loaded 2150 episodes, built 2147850 samples.
[Train] Train samples: 1933065, Val samples: 214785
[Epoch 1/10] Train loss: 0.3917, acc: 0.5246 | Val loss: 0.3799, acc: 0.5401
  -> New best val_acc=0.5401. Saved to rps_lstm.pt
[Epoch 2/10] Train loss: 0.3772, acc: 0.5465 | Val loss: 0.3757, acc: 0.5495
  -> New best val_acc=0.5495. Saved to rps_lstm.pt
[Epoch 3/10] Train loss: 0.3713, acc: 0.5571 | Val loss: 0.3692, acc: 0.5629
  -> New best val_acc=0.5629. Saved to rps_lstm.pt
[Epoch 4/10] Train loss: 0.3643, acc: 0.5692 | Val loss: 0.3622, acc: 0.5725
  -> New best val_acc=0.5725. Saved to rps_lstm.pt
[Epoch 5/10] Train loss: 0.3571, acc: 0.5793 | Val loss: 0.3552, acc: 0.5837
  -> New best val_acc=0.5837. Saved to rps_lstm.pt
[Epoch 6/10] Train loss: 0.3518, acc: 0.5850 | Val loss: 0.3486, acc: 0.5885
  -> New best val_acc=0.5885. Saved to rps_lstm.pt
[Epoch 7/10] Train loss: 0.3487, acc: 0.5880 | Val loss: 0.3471, acc: 0.5900
  