In [None]:
# 操作完之后再运行gpt_model_5
import torch
from gpt_model_5 import GPTModel,generate_text_simple

GPT_CONFIG_124M = {
    "vocab_size": 50257,   
    "context_length": 256, 
    "emb_dim": 768,        
    "n_heads": 12,         
    "n_layers": 12,        
    "drop_rate": 0.1,      
    "qkv_bias": False      
}
device = torch.device("mps") # 以后任何数据的 device 都要设为搬到 gpu
torch.manual_seed(123)
model = GPTModel(GPT_CONFIG_124M)
model.to(device)
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=0.0004,
    weight_decay=0.1
)

In [2]:
with open("the-verdict.txt","r",encoding="utf-8") as f:
    raw_text = f.read()
import tiktoken
tokenizer = tiktoken.get_encoding("gpt2")
train_ratio = 0.9
train_txt = raw_text[:int(train_ratio*len(raw_text))]
val_txt = raw_text[int(train_ratio*len(raw_text)):]

start_context = "Every effort moves you"
encoded = tokenizer.encode(start_context,allowed_special={"<|endoftext|>"})
encoded_tensor = torch.tensor(encoded).unsqueeze(0)

In [3]:
from gpt_model_5 import create_dataloader_v1
train_loader = create_dataloader_v1(train_txt, batch_size=2, max_length=GPT_CONFIG_124M["context_length"],
                         stride=GPT_CONFIG_124M["context_length"], shuffle=True, drop_last=True, num_workers=0)
val_loader = create_dataloader_v1(val_txt, batch_size=2, max_length=GPT_CONFIG_124M["context_length"],
                         stride=GPT_CONFIG_124M["context_length"], shuffle=True, drop_last=True, num_workers=0)

In [4]:
import torch.nn.functional as f

def calc_loss_batch(input_batch, target_batch, model, device):
    input_batch, target_batch = input_batch.to(device), target_batch.to(device)
    logits = model(input_batch)
    loss = f.cross_entropy(logits.flatten(0,1),target_batch.flatten())
    return loss

In [5]:
def calc_loss_loader(data_loader, model, device, num_batches=None):
    total_loss = 0.
    if len(data_loader) == 0:
        return float("nan")
    elif num_batches is None:
        num_batches = len(data_loader)
    else:
        num_batches = min(num_batches, len(data_loader))
    for i, (input_batch, target_batch) in enumerate(data_loader):
        if i < num_batches:
            loss = calc_loss_batch(input_batch, target_batch, model, device)
            total_loss += loss.item()
        else:
            break
    return total_loss / num_batches

In [6]:
def train_model_simple(model,optimizer,train_loader,val_loader,num_epochs):
    for _ in range(num_epochs):
        model.train()
        for input_batch, target_batch in train_loader:
            optimizer.zero_grad()
            loss = calc_loss_batch(input_batch, target_batch, model, device)
            loss.backward()
            optimizer.step()
        out = generate_text_simple(
            model=model,
            tokenizer=tokenizer,
            idx=encoded_tensor,
            max_new_tokens=10,
            context_size=GPT_CONFIG_124M["context_length"],
            device=device,
            temperature=0,
            topk=None
        )
        decoded_text = tokenizer.decode(out.squeeze(0).tolist())
        print(decoded_text.replace("\n",""))
train_model_simple(model,optimizer,train_loader,val_loader,num_epochs=5)

Every effort moves you,,,,,,,,,,
Every effort moves you, and, and, and, and, and
Every effort moves you, and, and, and, and, and
Every effort moves you, and I had the of the of the to
Every effort moves you know one of the picture.


In [7]:
torch.save({
    "model_state_dict": model.state_dict(),
    "optimizer_state_dict": optimizer.state_dict(),
    }, 
    "model_and_optimizer.pth"
)