In [150]:
import torch
import torch.nn as nn
from torch.nn import functional as F
# use gpu for training, if there
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# hyperparameters, important for training
block_size = 8
batch_size = 4
max_iters = 2500

learning_rate = 3e-4

eval_iters = 250

dropout = 0.2

cpu


In [113]:
# get text

with open('wizard_of_oz.txt', 'r', encoding='utf-8') as f:
    text = f.read()
print(len(text))
print(text[:200])


232309
﻿  DOROTHY AND THE WIZARD IN OZ

  BY

  L. FRANK BAUM

  AUTHOR OF THE WIZARD OF OZ, THE LAND OF OZ, OZMA OF OZ, ETC.

  ILLUSTRATED BY JOHN R. NEILL

  BOOKS OF WONDER WILLIAM MORROW & CO., INC. NEW


In [114]:
# make vocabulary from text

chars = sorted(set(text))
print(chars)
print(len(chars))
vocab_size = len(chars)

['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\ufeff']
81


In [115]:
# initialize encoder and decoder

string_to_int = { ch:i for i,ch in enumerate(chars) }
int_to_string = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [string_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])



In [116]:
# encode, decode example

print(encode('hello'))
encoded_hello = encode('hello')
decoded_hello = decode(encoded_hello)
print(decoded_hello)

[61, 58, 65, 65, 68]
hello


In [117]:
# encode corpus

data = torch.tensor(encode(text), dtype=torch.long)
print(data[:100])

tensor([80,  1,  1, 28, 39, 42, 39, 44, 32, 49,  1, 25, 38, 28,  1, 44, 32, 29,
         1, 47, 33, 50, 25, 42, 28,  1, 33, 38,  1, 39, 50,  0,  0,  1,  1, 26,
        49,  0,  0,  1,  1, 36, 11,  1, 30, 42, 25, 38, 35,  1, 26, 25, 45, 37,
         0,  0,  1,  1, 25, 45, 44, 32, 39, 42,  1, 39, 30,  1, 44, 32, 29,  1,
        47, 33, 50, 25, 42, 28,  1, 39, 30,  1, 39, 50,  9,  1, 44, 32, 29,  1,
        36, 25, 38, 28,  1, 39, 30,  1, 39, 50])


In [118]:
# get training, evaluation splits

n = int(0.8*len(data))
train_data = data[:n]
val_data = data[n:]

# based on the hyperparameters, get random blocks of data and batch them
# into input and target tensors
def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    # print(ix)
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    # activate gpu, if available:
    x, y = x.to(device),y.to(device)
    return x, y

In [119]:
# execute the above function for training data to produce batches

x, y = get_batch('train')
print('inputs:')
print(x.shape)
print(x)
print('targets:')
print(y.shape)
print(y)

inputs:
torch.Size([4, 8])
tensor([[60, 72,  1, 73, 61, 58, 66, 72],
        [ 1, 47, 62, 79, 54, 71, 57,  9],
        [ 1, 69, 58, 71, 59, 74, 66, 58],
        [54, 65, 64,  1, 72, 68,  1, 58]])
targets:
torch.Size([4, 8])
tensor([[72,  1, 73, 61, 58, 66, 72, 58],
        [47, 62, 79, 54, 71, 57,  9,  1],
        [69, 58, 71, 59, 74, 66, 58,  1],
        [65, 64,  1, 72, 68,  1, 58, 54]])


In [120]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [121]:
# block_size = 8

x = train_data[:block_size]
y = train_data[1:block_size+1]

for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print('when input is', context, 'target is', target)

when input is tensor([80]) target is tensor(1)
when input is tensor([80,  1]) target is tensor(1)
when input is tensor([80,  1,  1]) target is tensor(28)
when input is tensor([80,  1,  1, 28]) target is tensor(39)
when input is tensor([80,  1,  1, 28, 39]) target is tensor(42)
when input is tensor([80,  1,  1, 28, 39, 42]) target is tensor(39)
when input is tensor([80,  1,  1, 28, 39, 42, 39]) target is tensor(44)
when input is tensor([80,  1,  1, 28, 39, 42, 39, 44]) target is tensor(32)


In [122]:
# create nn class as subclass of nn.Module

class BigramLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, index, targets=None):
        logits = self.token_embedding_table(index)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        
        return logits, loss

    def generate(self, index, max_new_tokens):
        for _ in range(max_new_tokens):
            logits, loss = self.forward(index)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            index_next = torch.multinomial(probs, num_samples=1)
            index = torch.cat((index, index_next), dim=1)
        return index

model = BigramLanguageModel(vocab_size)
m = model.to(device)

context = torch.zeros((1,1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)
    
    


EWKk.8x3RI1D﻿Y.8h n"_l!'ZJI81E;AqIL﻿5EUX!BZpIM4f:?AMx
&_tEw4hr&EfMhP3YlK3v*﻿zDyOb7pAkEw .8lHF_o5e(;*﻿6s)YsE7c'",o(L_4&0[VGeivg!]'28s2Oc x]5,PAzbs)!2s)
QP﻿(2.6SHsFtdLlz6s?AMAM147KlOxb)8"Fa﻿D(x9BpLcZ5Q9u]u*wol?g[Tx4.Z?﻿Y0,PB9qH9jw
RqyyeR2﻿3GtEvW*u7cF5&x_6fJ]Ng.jKl3:dlX-IOoBZz.8hjK7XbS"9[3jD﻿'*Nnf9 N2x).8p'Fab0Ku*﻿4eCH&L,oU5EMSiFW57Klw vKZkyO!p:eOxQGwxFGyeDpB
)SmLdwCoSKms,pxgm]:gnDP0icGLDn3i'D﻿6,XrqZJjkSYopw[VNR;Bpm?YV-[JP:nO:?O:
Ud6&7uIxU[03C6G﻿w﻿3vj,L1z[3M-D[3T-]vHyp nx3ViW,IC2k73vIncGUq])-IJ[UR)


In [145]:
# create a PyTorch optimizer

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

# training loop
for iter in range(max_iters):
    if iter % eval_iters == 0:
        losses = estimate_loss()
        print(f"step: {iter}, train loss: {losses['train']:.3f} val loss: {losses['val']:.3f}")
     
        
    xb, yb  = get_batch('train')

    logits, loss = model.forward(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
print(loss.item())

step: 0, train loss: 2.653 val loss: 2.689
step: 250, train loss: 2.677 val loss: 2.707
step: 500, train loss: 2.679 val loss: 2.682
step: 750, train loss: 2.647 val loss: 2.679
step: 1000, train loss: 2.638 val loss: 2.701
step: 1250, train loss: 2.633 val loss: 2.668
step: 1500, train loss: 2.630 val loss: 2.674
step: 1750, train loss: 2.620 val loss: 2.666
step: 2000, train loss: 2.622 val loss: 2.649
step: 2250, train loss: 2.631 val loss: 2.626
step: 2500, train loss: 2.608 val loss: 2.648
step: 2750, train loss: 2.617 val loss: 2.625
step: 3000, train loss: 2.591 val loss: 2.622
step: 3250, train loss: 2.581 val loss: 2.644
step: 3500, train loss: 2.599 val loss: 2.584
step: 3750, train loss: 2.575 val loss: 2.593
step: 4000, train loss: 2.553 val loss: 2.606
step: 4250, train loss: 2.544 val loss: 2.590
step: 4500, train loss: 2.598 val loss: 2.583
step: 4750, train loss: 2.571 val loss: 2.593
step: 5000, train loss: 2.543 val loss: 2.605
step: 5250, train loss: 2.559 val loss: 

In [149]:
context = torch.zeros((1,1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)



b ry hed d eLM35e ovKI(mpamprvBGLE﻿6ZJint t bllkil wfZy an fit igre ts gl iX;G6_ taly b," wancy beeaglpllapURJwampafo jjimak eard. wnd Tirind t,


ce thoff ly FETEWEites. he by BORpo ck?X7Wushan ayasurere  thee ss othy mal ny dkORA*qme lll matry tout l and win Ewe thateehu tyeebou be othe mabev, th letecorey
t Uis.
" I "

"mbutacldunt at oAkS;ms a ttJitl?jN se, cafGOzl IChsthey woowh. thedif d
" wo
n ithed fr ikAITh."Tomeelena are arandighe up, otrvour, aitankb subld win s, igothe d!S"W"Heireedo
