In [7]:
from mingpt.trainer import Trainer, TrainerConfig
import torch
from torch.utils.data import Dataset

class CharDataset(Dataset):

    def __init__(self, data, block_size):
        chars = sorted(list(set(data)))
        data_size, vocab_size = len(data), len(chars)
        print(f"data has {data_size:d} characters, {vocab_size:d} unique.")

        self.stoi = { ch: i for i, ch in enumerate(chars) }
        self.itos = { i: ch for i, ch in enumerate(chars) }
        self.block_size = block_size
        self.vocab_size = vocab_size
        self.data = data
    
    def __len__(self):
        return len(self.data) - self.block_size
    
    def __getitem__(self, idx):
        # grab a chunk of (block_size + 1) characters from the data
        chunk = self.data[idx:idx+self.block_size+1]
        # encode every character to an integer
        dix = [self.stoi[s] for s in chunk]
       
        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        return x, y

block_size = 128  

text = open('input15.txt', 'r').read()

train_dataset = CharDataset(text, block_size = 128) 
tconf = TrainerConfig(
    max_epochs=2,
    batch_size=912,
    learning_rate=6e-4,
    lr_decay=True,
    warmup_tokens=512*20,
    final_tokens=2*len(train_dataset)*block_size,
    num_workers=8,
    ckpt_path = './kan-gpt-instruct.pth'
)

data has 7318210 characters, 146 unique.


In [8]:
from mingpt.model import GPT, GPTConfig
from mingpt.kan import *

config = GPTConfig(
    train_dataset.vocab_size,
    train_dataset.block_size,
    n_layer=4,
    n_head=8,
    n_embd=512,
)


In [13]:
model = GPT(config)
model.load_state_dict(torch.load('./kan-gpt-instruct.pth'))
model.to(torch.device('cuda'))
trainer = Trainer(model, train_dataset, None, tconf)

In [28]:
from mingpt.utils import sample

context = "<human> What technology does Apple use?<endOfText>\n<bot> "
x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None, ...].to(trainer.device)
y = sample(model, x, 100, temperature=0.8, sample=True, top_k=40)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
print(completion)

<human> What technology does Apple use?<endOfText>
<bot> hi i just installed ubuntu in the menu<endOfText>
<human> can you post the drive?<endOfText>
<bot> p
