In [1]:
import torch
from gpt_model import GPTModel

GPT_CONFIG_124M = {
    "vocab_size": 50257,   
    "context_length": 256, 
    "emb_dim": 768,        
    "n_heads": 12,         
    "n_layers": 12,        
    "drop_rate": 0.1,      
    "qkv_bias": False      
}
device = torch.device("mps") # 以后任何数据的 device 都要设为搬到 gpu
torch.manual_seed(123)
model = GPTModel(GPT_CONFIG_124M)
model.to(device)
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=0.0004,
    weight_decay=0.1
)

In [2]:
with open("the-verdict.txt","r",encoding="utf-8") as f:
    raw_text = f.read()
import tiktoken
tokenizer = tiktoken.get_encoding("gpt2")
train_ratio = 0.9
train_txt = raw_text[:int(train_ratio*len(raw_text))]
val_txt = raw_text[int(train_ratio*len(raw_text)):]

start_context = "Every effort moves you"
encoded = tokenizer.encode(start_context,allowed_special={"<|endoftext|>"})
encoded_tensor = torch.tensor(encoded).unsqueeze(0)

In [3]:
from gpt_model import create_dataloader_v1
train_loader = create_dataloader_v1(train_txt, batch_size=2, max_length=GPT_CONFIG_124M["context_length"],
                         stride=GPT_CONFIG_124M["context_length"], shuffle=True, drop_last=True, num_workers=0)
val_loader = create_dataloader_v1(val_txt, batch_size=2, max_length=GPT_CONFIG_124M["context_length"],
                         stride=GPT_CONFIG_124M["context_length"], shuffle=True, drop_last=True, num_workers=0)

In [4]:
import torch.nn.functional as f

def calc_loss_batch(input_batch, target_batch, model, device):
    input_batch, target_batch = input_batch.to(device), target_batch.to(device)
    logits = model(input_batch)
    loss = f.cross_entropy(logits.flatten(0,1),target_batch.flatten())
    return loss

In [5]:
def calc_loss_loader(data_loader, model, device, num_batches=None):
    total_loss = 0.
    if len(data_loader) == 0:
        return float("nan")
    elif num_batches is None:
        num_batches = len(data_loader)
    else:
        num_batches = min(num_batches, len(data_loader))
    for i, (input_batch, target_batch) in enumerate(data_loader):
        if i < num_batches:
            loss = calc_loss_batch(input_batch, target_batch, model, device)
            total_loss += loss.item()
        else:
            break
    return total_loss / num_batches

In [6]:
def generate_text_simple(model, idx, max_new_tokens, context_size,device,temperature,topk):
    model.eval()
    idx = idx.to(device)
    for _ in range(max_new_tokens):
        idx_cond = idx[:, -context_size:]
        with torch.no_grad():
            logits = model(idx_cond)
        logit = logits[:,-1, :]
        top_logits,_ = torch.topk(logit,topk)
        min_val = top_logits[:,-1]
        logit = torch.where(
            condition=logit<min_val,
            input=torch.tensor(float("-inf")).to(logit.device),
            other=logit
            )
        idx_next = torch.multinomial(torch.softmax(logit/temperature,dim=-1) ,num_samples=1)
        idx = torch.cat((idx, idx_next), dim=1) 
    decoded_text = tokenizer.decode(idx.squeeze(0).tolist())
    print(decoded_text.replace("\n"," "))

In [7]:
def train_model_simple(model,optimizer,train_loader,val_loader,num_epochs):
    for _ in range(num_epochs):
        model.train()
        for input_batch, target_batch in train_loader:
            optimizer.zero_grad()
            loss = calc_loss_batch(input_batch, target_batch, model, device)
            loss.backward()
            optimizer.step()
        generate_text_simple(
            model=model,
            idx=encoded_tensor,
            max_new_tokens=20,
            context_size=GPT_CONFIG_124M["context_length"],
            device=device,
            temperature=2,
            topk=3
        )
train_model_simple(model,optimizer,train_loader,val_loader,num_epochs=20)

Every effort moves you the. , the.    ,..       
Every effort moves you, the. ",." to, and, I,.   I,
Every effort moves you his his, and, I had, of the of the.  "--, I,
Every effort moves you, I had been, and, I had.  He.   "  
Every effort moves you, and I had the picture. The to have to me.       
Every effort moves you know the donkey to the picture. Gisburn.  He. I had been--and
Every effort moves you of the, and in the of his eyes to have of his of the fact of that, and
Every effort moves you?" I had always that my host. The- the picture by the fact that it happened he had
Every effort moves you in an him to me, and as his own, with a good-century so that he said
Every effort moves you?"   "Oh, he chucked, and uncertain. "Oh, he ch
Every effort moves you know without bitterness, a little Mrs. The--his of the frame. "Once, an
Every effort moves you in the never, and pushed one of the deep arm-chairs. "Oh: make yourself
Every effort moves you?"  I didn my hostess.    I didn't _have_