In [1]:
from bpe import BPE
import torch
from model import Model

In [8]:
tokenizer = BPE()
tokenizer.from_json("Saved_Vocab/vocab_v2.json")
tokenizer.EOS_TOKEN = "<EOS>"
tokenizer.PAD_TOKEN = "<PAD>"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CONTEXT_LENGTH = 256
EMBEDDING_DIM = 128
D_MODEL = 256

model = Model(
    num_heads=8
    , d_model=D_MODEL
    , vocab_size=tokenizer.vocab_size
    , num_layers=3
    , dropout=0.3
    , context_length=CONTEXT_LENGTH
    , embedding_dim=EMBEDDING_DIM
    , padding_idx=tokenizer.encode_vocab[tokenizer.PAD_TOKEN]
    )
model.to(DEVICE)
model.load_state_dict(torch.load("Saved_Models/model_v4.pth"))
model.eval()

Model(
  (lang_embedding): Embedding(512, 128, padding_idx=175)
  (decoder): Decoder_layer(
    (blocks): ModuleList(
      (0-2): 3 x DecoderBlock(
        (multi_head_attention): MultiHeadAttention(
          (heads): ModuleList(
            (0-7): 8 x attention_head(
              (K): Linear(in_features=128, out_features=256, bias=False)
              (Q): Linear(in_features=128, out_features=256, bias=False)
              (V): Linear(in_features=128, out_features=256, bias=False)
              (dropout): Dropout(p=0.3, inplace=False)
              (softmax): Softmax(dim=-1)
            )
          )
          (linear): Linear(in_features=2048, out_features=128, bias=False)
        )
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (feed_forward): Sequential(
          (0): Linear(in_features=128, out_features=512, bias=True)
          (1): ReLU()
          (2): Linear(in_features=512, out_features=128, bias=True)
        )
        (norm2): LayerNorm((

In [9]:
num_beams =  3
context = "Once upon a time"
max_length = 10
device = DEVICE

In [24]:
import torch

context_ids = tokenizer.encode(context)[:-1]
context_ids = torch.Tensor(context_ids).long().to(device)

generated_sequences = []
generated_scores = []

with torch.no_grad():
    ## Initial beam of sequences
    outputs = model(context_ids.unsqueeze(0)[:,-model.context_length:]) ## Probs for the next token, (1, context_length, vocab_size)
    log_probs = torch.log(outputs[:, -1, :]) ## log probs (, vocab_size)
    top_scores, top_indices = torch.topk(log_probs, num_beams)
    
    for score, index in zip(top_scores[0], top_indices[0]):
        generated_sequences.append([index.item()])
        generated_scores.append(score.item())
    

    ## Expanding the beams until all sequences reach the end or maximum length
    for _ in range(max_length):
        new_sequences = []
        new_scores = []
        for seq, score in zip(generated_sequences, generated_scores):
            if seq[-1] == tokenizer.encode_vocab[tokenizer.EOS_TOKEN] or len(seq) >= max_length:
                new_sequences.append(seq)
                new_scores.append(score)
            else:
                # Scores for the next token based on the current sequence
                input_ids = torch.tensor(seq).long().to(device)
                # outputs = model(input_ids)
                outputs = model(input_ids.unsqueeze(0)[:,-model.context_length:])
                # log_probs = torch.log_softmax(outputs[:, -1, :], dim=-1)
                log_probs = torch.log(outputs[:, -1, :])
                top_scores, top_indices = torch.topk(log_probs, num_beams)
                for s, i in zip(top_scores[0], top_indices[0]):
                    new_seq = seq + [i.item()]
                    new_sequences.append(new_seq)
                    new_scores.append(score + s.item())
        
        # Updating the top-k sequences
        combined = list(zip(new_sequences, new_scores))
        combined.sort(key=lambda x: x[1], reverse=True)
        generated_sequences = [s for s, _ in combined[:num_beams]]
        generated_scores = [s for _, s in combined[:num_beams]]
    
    print(generated_sequences)


[[290, 420, 313, 197, 69, 317, 69, 185, 288, 2], [235, 90, 207, 176, 188, 228, 234, 2, 70, 90], [235, 90, 207, 176, 188, 228, 234, 2, 272, 198]]
