In [1]:
import torch
import torch.nn as nn
from tokenizers import Tokenizer
from settings import ModelSettings
from model import ChatModel

tokenizer = Tokenizer.from_file("tokenizer.json")
device="cpu"

@torch.no_grad()
def generate(model, start, max_new_tokens=50):
    idx = torch.tensor([tokenizer.encode(start,add_special_tokens=False).ids], device=device, dtype=torch.long)

    for _ in range(max_new_tokens):
        idx_cond = idx[:, -ModelSettings.max_context_length:]
        logits = model(idx_cond)
        logits = logits[:, -1, :]
        probs = nn.functional.softmax(logits, dim=-1)
        next_id = torch.multinomial(probs, num_samples=1)
        idx = torch.cat([idx, next_id], dim=1)

    return tokenizer.decode(idx[0].tolist())

In [2]:
minified=False 

if not minified:
    model = ChatModel(
        vocabulary_size=ModelSettings.vocabulary_size,
        embedding_size=ModelSettings.embedding_size,
        max_context_length=ModelSettings.max_context_length,
        ff_size_multiplier=ModelSettings.ff_size_multiplier,
        transformer_blocks=ModelSettings.transformer_blocks,
        attention_heads=ModelSettings.attention_heads,
        dropout=0.0,
        bias=ModelSettings.bias,
        device=device,
    )
else:
    model = ChatModel(
        vocabulary_size=ModelSettings.vocabulary_size,
        embedding_size=64,
        max_context_length=64,
        ff_size_multiplier=2,
        transformer_blocks=4,
        attention_heads=4,
        dropout=0.0,
        bias=ModelSettings.bias,
        device=device,
    )

using flash attention
using flash attention
using flash attention
using flash attention
using flash attention
using flash attention
using flash attention
using flash attention
using flash attention
using flash attention
using flash attention
using flash attention


In [3]:
step=100_000
state = torch.load(f"output_100k/pre_checkpoints/state/{step:05d}.pt",map_location=torch.device('cpu'))

In [4]:
state_dict=state["model"]
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)

In [5]:
model.load_state_dict(state["model"])

<All keys matched successfully>

In [12]:
print(generate(model, "Microsoft",500))

Microsoft prototype, implemented as an update for the XP8.
London Empress manufacturer from 1975 as a new name.
Ultic pyrogl deposits () is an eel-like eel that feeds a broad fishy marine gastropod mollusc ( threw something about false), primarily in the genus Ulticia. Regarded as a bobacilla, it was previously assigned to the family Palaitropiidae (million-branch zentivie) but now lives with live bacteria. The species is deeply encouraged to subfunction with one species, Ultic species. It has not managed to flare a fish licks.
Category:Gymnastics at the Southeast Asian Games
This puts an article of its purpose article in the Top 10 songwriters in Caribbean sports.
Pikewriters
Football
The Small shareholders' equity concept administrative system article hits organisation scale in the UK before 2024.
----
This article relates to
Australian Democratic Interprofit Schools and Colleges or The Startup Partnerships. Comprising past winners of either the Annual Meeting and Abraham Lincoln Awa

In [11]:
print(generate(model, "Once upon a time",500))

Once upon a time, there was a little duck named Tom. Tom was very troubled because he didn't have any friends. One day, he met a new friend named Lily. They decided to meet and play every day in the pond.

One day, a big wind blew and the sun came out. The wind was very strong and it blew Tom away from his home. Tom and Lily ran as fast as they could, but they did not see a big stick on the side of the pond.

They reached the pond and saw that they were stuck in a tree. Tom fell into the mud and got very wet. He was sad and hurt. Lily saw Tom fall and remembered that they had struggled to make it back home was bathed. She took Tom to her home and said sorry. Tom was happy again and hugged Lily. They made a new friendrett that led all the other ducks back to the pond.
Once upon a time, in a small village, there lived a kind elderly man named Tom. Tom had a big plane. He liked to go fast and fly his plane through the trees and on the streets.

One day, Tom was playing with his plane in t

In [9]:
print(generate(model, "Michael Jackson", 500))

Michael Jackson is the CEO of the company Cichas, and the son of Hannah Jackson.
In 2011, Jackson was ranked number 95 in the Billboard magazine 100, was named a Top Cult of the Year in 2011, "the top commercially signed and selling rock guitar players" and "the seventh-placed country guitar player in the world",RIAA Bonus Leading The Year" and "Little Sbeats to Rock Wire Club". Upon its non-mile-all-unspotted second-week report, he labelled drummer Jay Thompson on Gameplay and on radio, it saw numerous professional musicians such as Stilnis, Debbie Griffin, Jeff Liberty, Harry Pickfield, well-known rock musician Evan Ork and Reformed Sound.
He spent typically living 35 years in Rock 'n'Feel the Tips in closures on a technical list to send multiple calls to his job as the manjitress of country rock band Metallica Record.
He is also the author of the "AllMusic Libraries" series, where he explains: "The growing form makes many music eye stock, but the most interesting in medical comedy f