In [1]:
!git clone https://github.com/karpathy/minGPT.git

Cloning into 'minGPT'...
remote: Enumerating objects: 175, done.[K
remote: Total 175 (delta 0), reused 0 (delta 0), pack-reused 175[K
Receiving objects: 100% (175/175), 1.37 MiB | 3.99 MiB/s, done.
Resolving deltas: 100% (101/101), done.


In [None]:
!pip install snakeviz

In [2]:
from fastai.text.all import *
from minGPT.mingpt.model import GPT, GPTConfig, GPT1Config

In [3]:
with open('/kaggle/input/lyrics-v2/lyrics.txt', encoding="utf8", errors='ignore') as f:
    raw_text=f.read()
len(raw_text)

29785599

In [4]:
class CharTransform(DisplayedTransform):
    def __init__(self, data, block_size):
        self.cat = Categorize()
        self.block_size = block_size + 1
        self.data = data
        
    def setups(self, items=None):
        self.data_len = len(self.data)
        self.cat.setup(L(*self.data))
        self.itos = self.cat.vocab
        self.stoi = self.itos.items.val2idx()
        print(f'data has {self.data_len} characters, {len(self.itos)} unique.')
        self.n_sequences = math.ceil(self.data_len / (self.block_size))
        self.idxs = L(np.random.randint(0, self.data_len - (self.block_size), self.n_sequences).tolist())

    def encodes(self, o):
        chunk = self.data[self.idxs[o]:self.idxs[o]+self.block_size]
        return tensor([self.stoi[s] for s in chunk])

    def decodes(self, o): return TitledStr(''.join([self.itos[s.item()] for s in o]))

In [5]:
block_size=128
n_sequences = math.ceil(len(raw_text) / (block_size+1)); n_sequences

230897

In [6]:
t = CharTransform(data=raw_text, block_size=block_size)
t.setups()

data has 29785599 characters, 186 unique.


In [7]:
dset = Datasets(L(range(n_sequences)), tfms=[CharTransform(raw_text, block_size)], dl_type=LMDataLoader)

data has 29785599 characters, 186 unique.


In [None]:
dset[0],show_at(dset.train, 0)

In [8]:
bs = 256
lens = [block_size+1 for i in range(len(dset.train))]
dls = dset.dataloaders(bs=bs, seq_len=block_size, lens=lens)

In [None]:
dls.show_batch(max_n=2)

In [9]:
class DropLoss(Callback):
    def after_pred(self): self.learn.pred = self.pred[0]

In [13]:
mconf = GPTConfig(vocab_size=len(dls.itos), block_size=block_size, n_layer=6, n_head=8, n_embd=512)
model = GPT(mconf)

In [None]:
learn = Learner(dls, model, loss_func=CrossEntropyLossFlat(), 
                opt_func=partial(Adam, sqr_mom=0.95, wd=0.1), cbs=[DropLoss]).to_fp16()

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(30, 6e-4, div_final=10)

In [None]:
learn.recorder.plot_loss()

In [None]:
from minGPT.mingpt.utils import sample


context = "This is nice "
x = torch.tensor([dls.char_transform.stoi[s] for s in context], dtype=torch.long)[None,...].to(dls.device)
y = sample(model, x, 2000, temperature=0.9, sample=True, top_k=20)[0]
completion = ''.join([dls.char_transform.itos[int(i)] for i in y])
print(completion)

In [16]:
learn1=load_learner('/kaggle/input/models/24.pkl')
learn1.model

GPT(
  (tok_emb): Embedding(186, 512)
  (drop): Dropout(p=0.1, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (ln1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (ln2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (attn): CausalSelfAttention(
        (key): Linear(in_features=512, out_features=512, bias=True)
        (query): Linear(in_features=512, out_features=512, bias=True)
        (value): Linear(in_features=512, out_features=512, bias=True)
        (attn_drop): Dropout(p=0.1, inplace=False)
        (resid_drop): Dropout(p=0.1, inplace=False)
        (proj): Linear(in_features=512, out_features=512, bias=True)
      )
      (mlp): Sequential(
        (0): Linear(in_features=512, out_features=2048, bias=True)
        (1): GELU()
        (2): Linear(in_features=2048, out_features=512, bias=True)
        (3): Dropout(p=0.1, inplace=False)
      )
    )
    (1): Block(
      (ln1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      

In [17]:
from minGPT.mingpt.utils import sample


context = "To explore is to "
x = torch.tensor([dls.char_transform.stoi[s] for s in context], dtype=torch.long)[None,...].to(dls.device)
y = sample(learn1.model, x, 500, temperature=0.9, sample=True, top_k=5)[0]
completion = ''.join([dls.char_transform.itos[int(i)] for i in y])
print(completion)

To explore is to prove them right away
To explore is to prove them right away

It's not an illusion, no pain
It's not an illusion, no lie
The pain is gone and every time

I can feel this war wide
We're caught in a waterfall
A true blood music that we should be missin'
It's a shame that we had it all
The trut we're all alone
Are you lonesome things I'll never tell

See, you're the only one
So I'm going to leave today

It's gonna be lifeless
I'll be out on the edge of the world
Tear down the walls of the son of a 


In [None]:
from minGPT.mingpt.utils import sample


context = "To infinity and beyond "
x = torch.tensor([dls.char_transform.stoi[s] for s in context], dtype=torch.long)[None,...].to(dls.device)
y = sample(model, x, 999, temperature=0.9, sample=True, top_k=5)[0]
completion = ''.join([dls.char_transform.itos[int(i)] for i in y])
print(completion)

In [None]:
from minGPT.mingpt.utils import sample


context = "I wish our college had a super computer "
x = torch.tensor([dls.char_transform.stoi[s] for s in context], dtype=torch.long)[None,...].to(dls.device)
y = sample(model, x, 2000, temperature=0.9, sample=True, top_k=40)[0]
completion = ''.join([dls.char_transform.itos[int(i)] for i in y])
print(completion)

In [None]:
learn.export("24.pkl")