In [5]:
# pip install tokenizers safetensors

import os, sys, math, random, textwrap
import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass
from typing import List, Tuple, Dict, Optional
from transformers import GPT2Tokenizer
from yaml import safe_load, Loader

sys.path.append(f"{os.environ['TT_METAL_HOME']}/tt-train/build/sources/ttml")
import _ttml as ttml

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)

set_seed()
# Change working directory to TT_METAL_HOME
os.chdir(os.environ['TT_METAL_HOME'])

@dataclass
class TransformerConfig:
    n_head: int = 12
    embed_dim: int = 768
    dropout: float = 0.2
    n_blocks : int = 12
    vocab_size: int = 96
    max_seq_len: int = 1024
    runner_type: str = "memory_efficient"
    weight_tying: str = "enabled"

In [6]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
print(os.getcwd())
transformer_cfg = safe_load(open("tt-train/configs/training_shakespeare_gpt2s.yaml", "r"))["training_config"]["transformer_config"]

/home/ubuntu/tt-metal


In [7]:
def build_causal_mask(T: int) -> np.ndarray:
    # [1,1,T,T] float32 with 1s for allowed positions (i >= j), else 0
    m = np.tril(np.ones((T, T), dtype=np.float32))
    return m.reshape(1, 1, T, T)



In [12]:
def create_model(cfg, vocab_size: int, seq_len: int):
    # GPT2 config via your bindings
    gcfg = ttml.models.gpt2.GPT2TransformerConfig()
    gcfg.num_heads = cfg["num_heads"]
    gcfg.embedding_dim = cfg["embedding_dim"]
    gcfg.num_blocks = cfg["num_blocks"]
    gcfg.vocab_size = int(vocab_size)
    gcfg.max_sequence_length = seq_len
    gcfg.dropout_prob = cfg["dropout_prob"]
    # optional flags exist (runner_type, weight_tying, positional_embedding_type, experimental, ...)
    # we keep defaults for a minimal demo

    model = ttml.models.gpt2.create_gpt2_model(gcfg)

vocab_size = tokenizer.vocab_size
padded_vocab_size = ((tokenizer.vocab_size + 31) // 32) * 32
model = create_model(transformer_cfg, vocab_size, transformer_cfg["max_sequence_length"])
model


Transformer configuration:
    Vocab size: 50257
    Max sequence length: 1024
    Embedding dim: 768
    Num heads: 12
    Dropout probability: 0.2
    Num blocks: 12
    Positional embedding type: Trainable
    Runner type: Default
    Composite layernorm: false
    Weight tying: Disabled


In [11]:
model.eval()

logits_mask = np.zeros(1, 1, 1, padded_vocab_size, dtype=np.float32)
logits_mask[:, :, :, vocab_size:] = 1e4

logits_mask_tensor = ttml.autograd.Tensor.from_numpy(logits_mask, ttml.Layout.ROW_MAJOR, ttml.autograd.DataType.BFLOAT16)   # [1,1,1,T], float32


while True:
    logits = model()
    ttml.ops.sample.sample_op(logits, 1.0, ttml.autograd.AutoContext.get_generator()(),logits_mask_tensor)

AttributeError: 'NoneType' object has no attribute 'eval'