In [13]:
from google.colab import drive


drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [12]:
!unzip -f /content/drive/MyDrive/colab/mlb/tokenized_data.zip -d /content/drive/MyDrive/colab/mlb/

Archive:  /content/drive/MyDrive/colab/mlb/tokenized_data.zip


In [14]:
SPECIAL_TOKENS = [
    "[UNK]",
    "[PAD]",
    "[CLS]",
    "[SEP]",
    "[MASK]",
]
VOCAB_SIZE = 10000

In [15]:
from tokenizers import Tokenizer


tokenizer = Tokenizer.from_file("tokenizer.json")

In [16]:
def decode_tokens(tokens: list[int]) -> str:
    decoded = tokenizer.decode(tokens, skip_special_tokens=False)

    tokens = decoded.split()
    filtered = [token for token in tokens if token not in SPECIAL_TOKENS]
    joined = " ".join(filtered)
    joined = joined.replace(" - ", "-")

    return joined

In [17]:
from glob import glob
from tqdm import tqdm
from torch.utils.data import Dataset

import numpy as np
import torch


class MLBDataset(Dataset):
    @staticmethod
    def read_game_from_path(path: str) -> str:
        with open(path) as f:
            game = f.read()
        return game

    def __init__(self, seasons: list[str] = [], context_length: int = 1024, stride: int = 512) -> None:
        if not seasons:
            seasons = "**"
        elif len(seasons) == 1:
            seasons = seasons[0]
        else:
            seasons = "{" + ",".join(seasons) + "}"

        paths = glob(f"/content/drive/MyDrive/colab/mlb/tokenized_data/{seasons}/**/*.txt")
        games = [self.read_game_from_path(path) for path in tqdm(paths)]

        self.__games_tokenized = [tokenizer.encode(game) for game in tqdm(games)]

        # Process games into overlapping chunks
        self.__chunks = []
        self.__attention_masks = []

        for game in tqdm(self.__games_tokenized):
            ids = game.ids
            mask = game.attention_mask

            # Create chunks with rolling window
            for i in range(0, len(ids) - context_length + 1, stride):
                chunk = ids[i:i + context_length]
                attn_mask = mask[i:i + context_length]

                # Only add if chunk is full length
                if len(chunk) == context_length:
                    self.__chunks.append(torch.tensor(chunk))
                    self.__attention_masks.append(torch.tensor(attn_mask))

    def __len__(self) -> int:
        return len(self.__chunks)

    def __getitem__(self, index: int) -> dict[str, torch.Tensor]:
        return {
            "input_ids": self.__chunks[index],
            "attention_mask": self.__attention_masks[index],
            "labels": self.__chunks[index],
        }

    def summarize(self) -> None:
        print(f"Number of games: {len(self.__games_tokenized)}")
        lens = [len(game) for game in self.__games_tokenized]
        print(f"Average game length: {np.mean(lens)}")
        print(f"Minimum game length: {np.min(lens)}")
        print(f"Maximum game length: {np.max(lens)}")
        print(f"Number of chunks: {len(self)}")
        print(f"Chunk length: {len(self.__chunks[0])}")
        print()

In [18]:
dataset = MLBDataset(["2023"])
dataset.summarize()

print(dataset[0])

100%|██████████| 2910/2910 [16:21<00:00,  2.96it/s]
100%|██████████| 2910/2910 [00:13<00:00, 216.00it/s]
100%|██████████| 2910/2910 [00:02<00:00, 1396.11it/s]

Number of games: 2910
Average game length: 1636.0333333333333
Minimum game length: 1145
Maximum game length: 2589
Number of chunks: 4872
Chunk length: 1024

{'input_ids': tensor([   2,   58, 4934,  ...,   85,   80,   68]), 'attention_mask': tensor([1, 1, 1,  ..., 1, 1, 1]), 'labels': tensor([   2,   58, 4934,  ...,   85,   80,   68])}





In [26]:
from torch.utils.data import DataLoader
from transformers import GPT2LMHeadModel, GPT2Config, AdamW

import os
import torch


# Model config
config = GPT2Config(
    vocab_size=VOCAB_SIZE,
    n_positions=1024,
    n_ctx=1024,
    n_embd=768,
    n_layer=6,
    n_head=12
)

# Initialize model and move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GPT2LMHeadModel(config).to(device)

# Dataset and dataloader
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Training parameters
learning_rate = 5e-4
num_epochs = 10
gradient_clip = 1.0

# Initialize optimizer
optimizer = AdamW(model.parameters(), lr=learning_rate)

from torch.utils.data import random_split, DataLoader
from transformers import GPT2LMHeadModel, GPT2Config, AdamW
from tqdm import tqdm
import os
import torch

# Split dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=8)

# Training loop with validation
for epoch in range(num_epochs):
    # Training phase
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}")

    for batch in progress_bar:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip)
        optimizer.step()
        optimizer.zero_grad()

        progress_bar.set_postfix({"loss": loss.item()})

    avg_train_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation phase
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            total_val_loss += outputs.loss.item()

    avg_val_loss = total_val_loss / len(val_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Validation Loss: {avg_val_loss:.4f}")

    # Save checkpoint
    os.makedirs("/content/drive/MyDrive/colab/mlb/checkpoints", exist_ok=True)
    torch.save(model.state_dict(), f"/content/drive/MyDrive/colab/mlb/checkpoints/model_epoch_{epoch + 1}.pt")


Epoch 1/10: 100%|██████████| 488/488 [07:45<00:00,  1.05it/s, loss=1.32]


Epoch 1/10, Training Loss: 2.3898
Epoch 1/10, Validation Loss: 1.4990


Epoch 2/10: 100%|██████████| 488/488 [07:44<00:00,  1.05it/s, loss=0.685]


Epoch 2/10, Training Loss: 1.1602
Epoch 2/10, Validation Loss: 0.8275


Epoch 3/10: 100%|██████████| 488/488 [07:44<00:00,  1.05it/s, loss=0.671]


Epoch 3/10, Training Loss: 0.7356
Epoch 3/10, Validation Loss: 0.6513


Epoch 4/10: 100%|██████████| 488/488 [07:44<00:00,  1.05it/s, loss=0.7]


Epoch 4/10, Training Loss: 0.5994
Epoch 4/10, Validation Loss: 0.5712


Epoch 5/10: 100%|██████████| 488/488 [07:45<00:00,  1.05it/s, loss=0.466]


Epoch 5/10, Training Loss: 0.5258
Epoch 5/10, Validation Loss: 0.5202


Epoch 6/10: 100%|██████████| 488/488 [07:45<00:00,  1.05it/s, loss=0.665]


Epoch 6/10, Training Loss: 0.4734
Epoch 6/10, Validation Loss: 0.4901


Epoch 7/10: 100%|██████████| 488/488 [07:45<00:00,  1.05it/s, loss=0.384]


Epoch 7/10, Training Loss: 0.4361
Epoch 7/10, Validation Loss: 0.4662


Epoch 8/10: 100%|██████████| 488/488 [07:45<00:00,  1.05it/s, loss=0.335]


Epoch 8/10, Training Loss: 0.4070
Epoch 8/10, Validation Loss: 0.4504


Epoch 9/10: 100%|██████████| 488/488 [07:45<00:00,  1.05it/s, loss=0.341]


Epoch 9/10, Training Loss: 0.3822
Epoch 9/10, Validation Loss: 0.4479


Epoch 10/10: 100%|██████████| 488/488 [07:44<00:00,  1.05it/s, loss=0.369]


Epoch 10/10, Training Loss: 0.3638
Epoch 10/10, Validation Loss: 0.4347


In [50]:
from typing import Generator


def complete(
    model: GPT2LMHeadModel,
    tokenizer: Tokenizer,
    prompt: str,
    max_completion_length: int = 2600,
) -> Generator[int, None, None]:
    """Generates tokens until the end token is encountered or max_length is reached."""

    end_token_id = tokenizer.token_to_id("[SEP]")
    generated_tokens = tokenizer.encode(prompt).ids

    for _ in range(max_completion_length):
        input_ids = torch.tensor([generated_tokens[-1024:]]).to(device)
        outputs = model.generate(
            input_ids,
            max_length=len(input_ids) + 1,
            pad_token_id=tokenizer.token_to_id("[PAD]"),
        )

        next_token = outputs[0][-1].item()
        generated_tokens.append(next_token)

        yield next_token

        if next_token == end_token_id:
            break

In [30]:
game = """[GAME] 716638 [DATE] 2023-09-10 [VENUE] Wrigley Field [WEATHER] Sunny 75 6

[TEAM] 112
[PITCHER] Brad Boxberger
[CATCHER] Yan Gomes
[PITCHER] Kyle Hendricks
[PITCHER] Drew Smyly
[PITCHER] Jameson Taillon
[FIRST_BASE] Jeimer Candelario
[PITCHER] Jose Cuas
[SHORTSTOP] Dansby Swanson
[THIRD_BASE] Patrick Wisdom
[FIRST_BASE] Cody Bellinger
[PITCHER] Mark Leiter Jr.
[CENTER_FIELD] Mike Tauchman
[PITCHER] Justin Steele
[PITCHER] Julian Merryweather
[SECOND_BASE] Nico Hoerner
[THIRD_BASE] Nick Madrigal
[LEFT_FIELD] Ian Happ
[CATCHER] Miguel Amaya
[PITCHER] Javier Assad
[DESIGNATED_HITTER] Christopher Morel
[PITCHER] Hayden Wesneski
[SECOND_BASE] Miles Mastrobuoni
[CENTER_FIELD] Alexander Canario
[RIGHT_FIELD] Seiya Suzuki
[PITCHER] Luke Little
[PITCHER] Daniel Palencia
[PITCHER] Jordan Wicks

[TEAM] 109
[THIRD_BASE] Evan Longoria
[LEFT_FIELD] Tommy Pham
[PITCHER] Merrill Kelly
[PITCHER] Scott McGough
[FIRST_BASE] Christian Walker
[PITCHER] Joe Mantiply
[PITCHER] Zach Davies
[SECOND_BASE] Ketel Marte
[SECOND_BASE] Jace Peterson
[PITCHER] Miguel Castro
[PITCHER] Paul Sewald
[PITCHER] Kevin Ginkel
[THIRD_BASE] Emmanuel Rivera
[PINCH_HITTER] Pavin Smith
[PITCHER] Ryan Thompson
[CATCHER] Seby Zavala
[PITCHER] Luis Frías
[DESIGNATED_HITTER] Lourdes Gurriel Jr.
[PITCHER] Zac Gallen
[PITCHER] Ryne Nelson
[PITCHER] Kyle Nelson
[PINCH_HITTER] Gabriel Moreno
[SHORTSTOP] Geraldo Perdomo
[CENTER_FIELD] Alek Thomas
[RIGHT_FIELD] Corbin Carroll
[PITCHER] Andrew Saalfrank
[SHORTSTOP] Jordan Lawlar
[PITCHER] Brandon Pfaadt

[GAME_START]
[PLAY] Strikeout [BATTER] Corbin Carroll [PITCHER] Kyle Hendricks [MOVEMENTS] Corbin Carroll home -> home [out]
[PLAY] Strikeout [BATTER] Jace Peterson [PITCHER] Kyle Hendricks [MOVEMENTS] Jace Peterson home -> home [out]
[PLAY] Lineout [BATTER] Tommy Pham [PITCHER] Kyle Hendricks [FIELDERS] Dansby Swanson [MOVEMENTS] Tommy Pham home -> home [out]
[PLAY] Triple [BATTER] Christopher Morel [PITCHER] Joe Mantiply [MOVEMENTS] Christopher Morel home -> 3
[PLAY] Fielders Choice Out [BATTER] Nico Hoerner [PITCHER] Joe Mantiply [FIELDERS] Joe Mantiply [SCORING_RUNNER] Nico Hoerner [MOVEMENTS] Christopher Morel 3 -> home [out], Nico Hoerner home -> 1
[PLAY] Double [BATTER] Seiya Suzuki [PITCHER] Joe Mantiply [MOVEMENTS] Nico Hoerner 1 -> 2, Seiya Suzuki home -> 2, Nico Hoerner 2 -> 3, Nico Hoerner 3 -> 4
[PLAY] Flyout [BATTER] Cody Bellinger [PITCHER] Joe Mantiply [FIELDERS] Alek Thomas [MOVEMENTS] Cody Bellinger home -> home [out]
[PLAY] Groundout [BATTER] Dansby Swanson [PITCHER] Joe Mantiply [FIELDERS] Geraldo Perdomo [MOVEMENTS] Dansby Swanson home -> home [out]
[PLAY] Pop Out [BATTER] Christian Walker [PITCHER] Kyle Hendricks [FIELDERS] Nico Hoerner [MOVEMENTS] Christian Walker home -> home [out]
[PLAY] Strikeout [BATTER] Alek Thomas [PITCHER] Kyle Hendricks [MOVEMENTS] Alek Thomas home -> home [out]
[PLAY] Strikeout [BATTER] Lourdes Gurriel Jr. [PITCHER] Kyle Hendricks [MOVEMENTS] Lourdes Gurriel Jr. home -> home [out]
[PLAY] Pop Out [BATTER] Ian Happ [PITCHER] Joe Mantiply [FIELDERS] Jace Peterson [MOVEMENTS] Ian Happ home -> home [out]
[PLAY] Walk [BATTER] Yan Gomes [PITCHER] Scott McGough [MOVEMENTS] Yan Gomes home -> 1
[PLAY] Strikeout [BATTER] Jeimer Candelario [PITCHER] Scott McGough [MOVEMENTS] Yan Gomes 1 -> 2, Jeimer Candelario home -> home [out]
[PLAY] Flyout [BATTER] Nick Madrigal [PITCHER] Scott McGough [FIELDERS] Tommy Pham [MOVEMENTS] Nick Madrigal home -> home [out]
[PLAY] Flyout [BATTER] Emmanuel Rivera [PITCHER] Kyle Hendricks [FIELDERS] Cody Bellinger [MOVEMENTS] Emmanuel Rivera home -> home [out]
[PLAY] Single [BATTER] Seby Zavala [PITCHER] Kyle Hendricks [MOVEMENTS] Seby Zavala home -> 1
[PLAY] Lineout [BATTER] Geraldo Perdomo [PITCHER] Kyle Hendricks [FIELDERS] Ian Happ [MOVEMENTS] Geraldo Perdomo home -> home [out]
[PLAY] Lineout [BATTER] Corbin Carroll [PITCHER] Kyle Hendricks [FIELDERS] Seiya Suzuki [MOVEMENTS] Corbin Carroll home -> home [out]
[PLAY] Home Run [BATTER] Christopher Morel [PITCHER] Brandon Pfaadt [MOVEMENTS] Christopher Morel home -> 4
[PLAY] Flyout [BATTER] Nico Hoerner [PITCHER] Brandon Pfaadt [FIELDERS] Corbin Carroll [MOVEMENTS] Nico Hoerner home -> home [out]
[PLAY] Strikeout [BATTER] Seiya Suzuki [PITCHER] Brandon Pfaadt [MOVEMENTS] Seiya Suzuki home -> home [out]
[PLAY] Home Run [BATTER] Cody Bellinger [PITCHER] Brandon Pfaadt [MOVEMENTS] Cody Bellinger home -> 4
[PLAY] Home Run [BATTER] Dansby Swanson [PITCHER] Brandon Pfaadt [MOVEMENTS] Dansby Swanson home -> 4
[PLAY] Lineout [BATTER] Ian Happ [PITCHER] Brandon Pfaadt [FIELDERS] Jace Peterson [MOVEMENTS] Ian Happ home -> home [out]
[PLAY] Groundout [BATTER] Jace Peterson [PITCHER] Kyle Hendricks [FIELDERS] Jeimer Candelario [MOVEMENTS] Jace Peterson home -> home [out]
[PLAY] Single [BATTER] Tommy Pham [PITCHER] Kyle Hendricks [MOVEMENTS] Tommy Pham home -> 1
[PLAY] Flyout [BATTER] Christian Walker [PITCHER] Kyle Hendricks [FIELDERS] Ian Happ [MOVEMENTS] Christian Walker home -> home [out]
[PLAY] Groundout [BATTER] Alek Thomas [PITCHER] Kyle Hendricks [FIELDERS] Dansby Swanson [MOVEMENTS] Alek Thomas home -> home [out]
[PLAY] Groundout [BATTER] Yan Gomes [PITCHER] Brandon Pfaadt [FIELDERS] Geraldo Perdomo [MOVEMENTS] Yan Gomes home -> home [out]
[PLAY] Pop Out [BATTER] Jeimer Candelario [PITCHER] Brandon Pfaadt [FIELDERS] Jace Peterson [MOVEMENTS] Jeimer Candelario home -> home [out]
[PLAY] Single [BATTER] Nick Madrigal [PITCHER] Brandon Pfaadt [MOVEMENTS] Nick Madrigal home -> 1
[PLAY] Strikeout [BATTER] Christopher Morel [PITCHER] Brandon Pfaadt [MOVEMENTS] Nick Madrigal 1 -> 2, Nick Madrigal 2 -> 3, Christopher Morel home -> home [out]
[PLAY] Double [BATTER] Lourdes Gurriel Jr. [PITCHER] Kyle Hendricks [MOVEMENTS] Lourdes Gurriel Jr. home -> 2
[PLAY] Single [BATTER] Emmanuel Rivera [PITCHER] Kyle Hendricks [MOVEMENTS] Emmanuel Rivera home -> 1, Lourdes Gurriel Jr. 2 -> 3
[PLAY] Single [BATTER] Seby Zavala [PITCHER] Kyle Hendricks [MOVEMENTS] Seby Zavala home -> 1, Lourdes Gurriel Jr. 3 -> 4, Emmanuel Rivera 1 -> 2
[PLAY] Double Play [BATTER] Geraldo Perdomo [PITCHER] Kyle Hendricks [FIELDERS] Ian Happ, Ian Happ [MOVEMENTS] Geraldo Perdomo home -> home [out], Emmanuel Rivera 2 -> home [out]
[PLAY] Pop Out [BATTER] Corbin Carroll [PITCHER] Kyle Hendricks [FIELDERS] Nick Madrigal [MOVEMENTS] Corbin Carroll home -> home [out]
[PLAY] Single [BATTER] Nico Hoerner [PITCHER] Brandon Pfaadt [MOVEMENTS] Nico Hoerner home -> 1
[PLAY] Pop Out [BATTER] Seiya Suzuki [PITCHER] Brandon Pfaadt [FIELDERS] Emmanuel Rivera [MOVEMENTS] Seiya Suzuki home -> home [out]
[PLAY] Flyout [BATTER] Cody Bellinger [PITCHER] Brandon Pfaadt [FIELDERS] Alek Thomas [MOVEMENTS] Cody Bellinger home -> home [out]
[PLAY] Single [BATTER] Dansby Swanson [PITCHER] Brandon Pfaadt [MOVEMENTS] Nico Hoerner 1 -> 2, Dansby Swanson home -> 1, Nico Hoerner 2 -> 4
[PLAY] Flyout [BATTER] Ian Happ [PITCHER] Brandon Pfaadt [FIELDERS] Geraldo Perdomo [MOVEMENTS] Ian Happ home -> home [out]
[PLAY] Single [BATTER] Jace Peterson [PITCHER] Kyle Hendricks [MOVEMENTS] Jace Peterson home -> 1
[PLAY] Double [BATTER] Tommy Pham [PITCHER] Kyle Hendricks [MOVEMENTS] Tommy Pham home -> 2, Jace Peterson 1 -> 3
[PLAY] Pop Out [BATTER] Christian Walker [PITCHER] Kyle Hendricks [FIELDERS] Dansby Swanson [MOVEMENTS] Christian Walker home -> home [out]
[PLAY] Sac Fly [BATTER] Alek Thomas [PITCHER] Kyle Hendricks [FIELDERS] Mike Tauchman [SCORING_RUNNER] Jace Peterson [MOVEMENTS] Alek Thomas home -> home [out], Jace Peterson 3 -> 4
[PLAY] Walk [BATTER] Lourdes Gurriel Jr. [PITCHER] Kyle Hendricks [MOVEMENTS] Tommy Pham 2 -> 3, Lourdes Gurriel Jr. home -> 1
[PLAY] Forceout [BATTER] Emmanuel Rivera [PITCHER] Jose Cuas [FIELDERS] Nico Hoerner [MOVEMENTS] Lourdes Gurriel Jr. 1 -> home [out], Emmanuel Rivera home -> 1
[PLAY] Strikeout [BATTER] Yan Gomes [PITCHER] Brandon Pfaadt [MOVEMENTS] Yan Gomes home -> home [out]
[PLAY] Walk [BATTER] Mike Tauchman [PITCHER] Brandon Pfaadt [MOVEMENTS] Mike Tauchman home -> 1
[PLAY] Flyout [BATTER] Nick Madrigal [PITCHER] Brandon Pfaadt [FIELDERS] Alek Thomas [MOVEMENTS] Nick Madrigal home -> home [out]
[PLAY] Pop Out [BATTER] Christopher Morel [PITCHER] Brandon Pfaadt [FIELDERS] Geraldo Perdomo [MOVEMENTS] Christopher Morel home -> home [out]
[PLAY] Lineout [BATTER] Seby Zavala [PITCHER] Mark Leiter Jr. [FIELDERS] Nico Hoerner [MOVEMENTS] Seby Zavala home -> home [out]
[PLAY] Walk [BATTER] Geraldo Perdomo [PITCHER] Mark Leiter Jr. [MOVEMENTS] Geraldo Perdomo home -> 1
[PLAY] Single [BATTER] Corbin Carroll [PITCHER] Mark Leiter Jr. [MOVEMENTS] Geraldo Perdomo 1 -> home [out], Corbin Carroll home -> 1
[PLAY] Groundout [BATTER] Jace Peterson [PITCHER] Mark Leiter Jr. [FIELDERS] Nick Madrigal [MOVEMENTS] Jace Peterson home -> home [out]
[PLAY] Flyout [BATTER] Nico Hoerner [PITCHER] Brandon Pfaadt [FIELDERS] Alek Thomas [MOVEMENTS] Nico Hoerner home -> home [out]
[PLAY] Groundout [BATTER] Seiya Suzuki [PITCHER] Brandon Pfaadt [FIELDERS] Brandon Pfaadt [MOVEMENTS] Seiya Suzuki home -> home [out]
[PLAY] Strikeout [BATTER] Cody Bellinger [PITCHER] Brandon Pfaadt [MOVEMENTS] Cody Bellinger home -> home [out]
[PLAY] Groundout [BATTER] Tommy Pham [PITCHER] Hayden Wesneski [FIELDERS] Nick Madrigal [MOVEMENTS] Tommy Pham home -> home [out]
[PLAY] Flyout [BATTER] Christian Walker [PITCHER] Hayden Wesneski [FIELDERS] Dansby Swanson [MOVEMENTS] Christian Walker home -> home [out]
[PLAY] Single [BATTER] Alek Thomas [PITCHER] Hayden Wesneski [MOVEMENTS] Alek Thomas home -> 1
[PLAY] Flyout [BATTER] Lourdes Gurriel Jr. [PITCHER] Hayden Wesneski [FIELDERS] Ian Happ [MOVEMENTS] Lourdes Gurriel Jr. home -> home [out]
[PLAY] Single [BATTER] Dansby Swanson [PITCHER] Brandon Pfaadt [MOVEMENTS] Dansby Swanson home -> 1
[PLAY] Flyout [BATTER] Ian Happ [PITCHER] Brandon Pfaadt [FIELDERS] Tommy Pham [MOVEMENTS] Ian Happ home -> home [out]
[PLAY] Grounded Into Double Play [BATTER] Yan Gomes [PITCHER] Brandon Pfaadt [FIELDERS] Emmanuel Rivera, Jace Peterson [MOVEMENTS] Dansby Swanson 1 -> home [out], Yan Gomes home -> home [out]
[PLAY] Walk [BATTER] Pavin Smith [PITCHER] Julian Merryweather [MOVEMENTS] Pavin Smith home -> 1
[PLAY] Lineout [BATTER] Gabriel Moreno [PITCHER] Julian Merryweather [FIELDERS] Mike Tauchman [MOVEMENTS] Gabriel Moreno home -> home [out]
[PLAY] Strikeout [BATTER] Geraldo Perdomo [PITCHER] Julian Merryweather [MOVEMENTS] Geraldo Perdomo home -> home [out]
[PLAY] Strikeout [BATTER] Corbin Carroll [PITCHER] Julian Merryweather [MOVEMENTS] Pavin Smith 1 -> 2, Corbin Carroll home -> home [out]
[GAME_END]"""

In [51]:
from IPython.display import clear_output

import time


game_start = game.split("[GAME_START]")[0]

print(game_start)

completion = []

start = time.time()
for token in complete(model=model, tokenizer=tokenizer, prompt=game_start):
    end = time.time()

    completion.append(token)
    completion_str = decode_tokens(completion)

    clear_output(wait=True)
    print(f"Completion time: {end - start:.2f}s")
    print(f"Completion length: {len(completion)}")
    print(completion_str[-100:])

    if "[GAME_END]" in completion_str:
        break

    start = time.time()

[GAME] 716638 [DATE] 2023-09-10 [VENUE] Wrigley Field [WEATHER] Sunny 75 6

[TEAM] 112
[PITCHER] Brad Boxberger
[CATCHER] Yan Gomes
[PITCHER] Kyle Hendricks
[PITCHER] Drew Smyly
[PITCHER] Jameson Taillon
[FIRST_BASE] Jeimer Candelario
[PITCHER] Jose Cuas
[SHORTSTOP] Dansby Swanson
[THIRD_BASE] Patrick Wisdom
[FIRST_BASE] Cody Bellinger
[PITCHER] Mark Leiter Jr.
[CENTER_FIELD] Mike Tauchman
[PITCHER] Justin Steele
[PITCHER] Julian Merryweather
[SECOND_BASE] Nico Hoerner
[THIRD_BASE] Nick Madrigal
[LEFT_FIELD] Ian Happ
[CATCHER] Miguel Amaya
[PITCHER] Javier Assad
[DESIGNATED_HITTER] Christopher Morel
[PITCHER] Hayden Wesneski
[SECOND_BASE] Miles Mastrobuoni
[CENTER_FIELD] Alexander Canario
[RIGHT_FIELD] Seiya Suzuki
[PITCHER] Luke Little
[PITCHER] Daniel Palencia
[PITCHER] Jordan Wicks

[TEAM] 109
[THIRD_BASE] Evan Longoria
[LEFT_FIELD] Tommy Pham
[PITCHER] Merrill Kelly
[PITCHER] Scott McGough
[FIRST_BASE] Christian Walker
[PITCHER] Joe Mantiply
[PITCHER] Zach Davies
[SECOND_BASE] Kete

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
