In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import torch
from tokenizers import Tokenizer
import gc
from gptmodel import GPTLanguageModel

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

# everything below here NEEDS to be identical to load an extistng model
n_embed = 1536
n_head = 24
n_layer = 32
dropout = 0.2
vocab_size = 30000

model_id = "01"
model_step = "43100"
load_path = f"checkpoints/model{model_id}/model{model_id}_step{model_step}.pt"

tokenizer = Tokenizer.from_file("tokenizer/tokenizer-01.json")
eot_id = 0

In [2]:
m = GPTLanguageModel(vocab_size, n_embed, n_head, n_layer, dropout).to(device)
checkpoint = torch.load(load_path, map_location='cpu', weights_only=False)
m.load_state_dict(checkpoint['model'])
print(f"Model loaded successfuly from {load_path}")
del checkpoint
gc.collect()
torch.cuda.empty_cache()

Model loaded successfuly from checkpoints/model01/model01_step43100.pt


In [3]:
styles = [(0.4, 50, 0.5), (0.8, 50, 0.9), (1.5, 50, 1.0)]
names = ["Calculated", "Normal", "Chaotic"]
for i, (temp, topk, topp) in enumerate(styles):
    ids = tokenizer.encode("ELI5: How does a computer actually work?", add_special_tokens=False).ids
    context = torch.tensor([ids], dtype=torch.long, device=device)
    out = m.generate(context, max_new_tokens=200, temp=temp, top_k=topk, top_p=topp, eot_id=eot_id)

    print(f"Style: {names[i]}")
    print(tokenizer.decode(out[0].tolist(), skip_special_tokens=False))
    print()

Style: Calculated
ELI5: How does a computer actually work? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do? What does it do?

Style: Normal
ELI5: How does a computer actually work? And how do you make it work in practice? What is it that you do to make it work?

NIKOLA ELIYA: I do what I do to make it work. I do what I do to make it work in practice. I try to make it work for everybo

In [2]:
steps = ["22700"]
prompts = ["The cat sat on the mat. The cat sat on the"] #"Once upon a time,", "The capital of Illinois is", 

m = GPTLanguageModel(vocab_size, n_embed, n_head, n_layer, dropout).to(device)

for step in steps:
    path = f"{load_path}{step}.pt"
    checkpoint = torch.load(path, map_location='cpu', weights_only=False)
    m.load_state_dict(checkpoint['model'])
    print(f"Model loaded successfuly from {path}")
    del checkpoint
    gc.collect()
    torch.cuda.empty_cache()
    
    print(f"Step {step}:")
    print()

    for prompt in prompts:
        ids = tokenizer.encode(prompt, add_special_tokens=False).ids
        context = torch.tensor([ids], dtype=torch.long, device=device)
        out = m.generate(context, max_new_tokens=200, eot_id=eot_id)

        print(f"PROMPT: {prompt}")
        print(tokenizer.decode(out[0].tolist(), skip_special_tokens=False))
        print()

    print("-" * 30)

Model loaded successfuly from checkpoints/model01/model01_step22700.pt
Step 22700:

PROMPT: The cat sat on the mat. The cat sat on the
The cat sat on the mat. The cat sat on the others.

The cats danced a little around the pout, gushing something down.

And then the cat started on the other side. The cats danced a little through the pout, but eventually the cat got to sleep.

The cats danced a little beside each other and seemed to enjoy their laughter. The cat was not happy, just thrilled.

The cats even had a jolly moments.

The cats were really happy

For a short while the cat and the cats danced over each other.

The cats danced a little through the pout, gushing something down.

The cats did not sleep that night, but that's all their problems ever were.

For the next year and a half the cats lived in a big house in front of a kitchen where a beautiful Jelly seemed to let out a warm chilling message: "yeah i ain't no." Jack crouched in a large wheelchair himself and he rose at the 

In [7]:
# Once upon a time, | The capital of Illinois is | The cat sat on the mat. The cat sat on the
prompt = tokenizer.encode("The capital of Illinois is", add_special_tokens=False).ids
context = torch.tensor([prompt], dtype=torch.long, device=device)
print(tokenizer.decode(m.generate(context, max_new_tokens=200, eot_id=eot_id)[0].tolist(), skip_special_tokens=False))

The capital of Illinois is actually a shift in direction for the increasingly powerful bank, which has been noticeably more aggressive in its issuance of new bonds, and has moved ever closer to its charter."

Agriculture tends to preserve its independence

Oxford economists said other banks looked to the United States for help. "We tend to think of the United States as the 28th largest market," Dr. David Sutt of Bayard-Rockwell College in Connecticut said of the United States. "They are effectively telling us all the important things about the economy of the country as a whole.

"The longer this policy goes on, the worse it will be for U.S. banks. If they have to resort to bank-broking, that's another potential problem."

Earlier this month, the Federal Government admonished the banks that hold Chicago Liquor (CRE) stock to include Chicago Stakeholders Direct (LDS) on a notice of serious market instability stemming from the collapse of Chase Manhattan


In [1]:
tokenizer.decode([0], skip_special_tokens=False)

NameError: name 'tokenizer' is not defined

In [5]:
tokenizer.token_to_id("A")

36

In [None]:
while True:
    prompt = input("Prompt:\n")
    context = torch.tensor([tokenizer.encode(prompt, add_special_tokens=False).ids], dtype=torch.long, device=device)
    generated_chars = tokenizer.decode(m.generate(context, max_new_tokens=200)[0].tolist())
    print(f"Completed:\n{generated_chars}")