In [2]:
import torch
import torch.nn as nn
from torch.nn import functional as F
# use gpu for training, if there
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# hyperparameters, important for training
block_size = 8
batch_size = 4
max_iters = 10000

learning_rate = 3e-3

eval_iters = 250

dropout = 0.2

cpu


In [3]:
# get text

with open('wizard_of_oz.txt', 'r', encoding='utf-8') as f:
    text = f.read()
print(len(text))
print(text[:200])


232309
﻿  DOROTHY AND THE WIZARD IN OZ

  BY

  L. FRANK BAUM

  AUTHOR OF THE WIZARD OF OZ, THE LAND OF OZ, OZMA OF OZ, ETC.

  ILLUSTRATED BY JOHN R. NEILL

  BOOKS OF WONDER WILLIAM MORROW & CO., INC. NEW


In [4]:
# make vocabulary from text

chars = sorted(set(text))
print(chars)
print(len(chars))
vocab_size = len(chars)

['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\ufeff']
81


In [5]:
# initialize encoder and decoder

string_to_int = { ch:i for i,ch in enumerate(chars) }
int_to_string = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [string_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])



In [6]:
# encode, decode example

print(encode('hello'))
encoded_hello = encode('hello')
decoded_hello = decode(encoded_hello)
print(decoded_hello)

[61, 58, 65, 65, 68]
hello


In [7]:
# encode corpus

data = torch.tensor(encode(text), dtype=torch.long)
print(data[:100])

tensor([80,  1,  1, 28, 39, 42, 39, 44, 32, 49,  1, 25, 38, 28,  1, 44, 32, 29,
         1, 47, 33, 50, 25, 42, 28,  1, 33, 38,  1, 39, 50,  0,  0,  1,  1, 26,
        49,  0,  0,  1,  1, 36, 11,  1, 30, 42, 25, 38, 35,  1, 26, 25, 45, 37,
         0,  0,  1,  1, 25, 45, 44, 32, 39, 42,  1, 39, 30,  1, 44, 32, 29,  1,
        47, 33, 50, 25, 42, 28,  1, 39, 30,  1, 39, 50,  9,  1, 44, 32, 29,  1,
        36, 25, 38, 28,  1, 39, 30,  1, 39, 50])


In [8]:
# get training, evaluation splits

n = int(0.8*len(data))
train_data = data[:n]
val_data = data[n:]

# based on the hyperparameters, get random blocks of data and batch them
# into input and target tensors
def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    # print(ix)
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    # activate gpu, if available:
    x, y = x.to(device),y.to(device)
    return x, y

In [9]:
# execute the above function for training data to produce batches

x, y = get_batch('train')
print('inputs:')
print(x.shape)
print(x)
print('targets:')
print(y.shape)
print(y)

inputs:
torch.Size([4, 8])
tensor([[65, 62, 75, 58,  1, 68, 67,  0],
        [77, 62, 73, 78,  9,  1, 54, 67],
        [65,  1, 59, 71, 62, 58, 67, 57],
        [49, 58, 72, 23,  1, 76, 62, 73]])
targets:
torch.Size([4, 8])
tensor([[62, 75, 58,  1, 68, 67,  0, 73],
        [62, 73, 78,  9,  1, 54, 67, 72],
        [ 1, 59, 71, 62, 58, 67, 57, 72],
        [58, 72, 23,  1, 76, 62, 73, 61]])


In [10]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [11]:
# block_size = 8

x = train_data[:block_size]
y = train_data[1:block_size+1]

for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print('when input is', context, 'target is', target)

when input is tensor([80]) target is tensor(1)
when input is tensor([80,  1]) target is tensor(1)
when input is tensor([80,  1,  1]) target is tensor(28)
when input is tensor([80,  1,  1, 28]) target is tensor(39)
when input is tensor([80,  1,  1, 28, 39]) target is tensor(42)
when input is tensor([80,  1,  1, 28, 39, 42]) target is tensor(39)
when input is tensor([80,  1,  1, 28, 39, 42, 39]) target is tensor(44)
when input is tensor([80,  1,  1, 28, 39, 42, 39, 44]) target is tensor(32)


In [12]:
# create nn class as subclass of nn.Module

class BigramLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, index, targets=None):
        logits = self.token_embedding_table(index)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        
        return logits, loss

    def generate(self, index, max_new_tokens):
        for _ in range(max_new_tokens):
            logits, loss = self.forward(index)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            index_next = torch.multinomial(probs, num_samples=1)
            index = torch.cat((index, index_next), dim=1)
        return index

model = BigramLanguageModel(vocab_size)
m = model.to(device)

context = torch.zeros((1,1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)
    
    


OT3R(v.A).udMBhxth!EezD
sxauB&DljGV,!GKEfo(﻿"";e?FCTKFeTf4QeJQ?RY_)'IXzqh6Og:TZ_QfpTZQxp3abyqd-l?iG!DPj!;Glc4R*P9 1uk?RvrKJ9sif_1[Da,Nf4G
"R:0szJp*OfCY2,Gegbd-79s8AF﻿'O7iZ0
pOc.Aez(hJEV4q?R-C]8&
FDxLcprWOKXsYLYfUrK("vf)oG[ECY,mchJR:ZgKIkuvWu[4aiZO7f[:v[3azlUIZOl!!**)Q*:qQq
35;k;e0J&E
Y54TSq*S_vUiHLUKL_mHP?HRy5M﻿*rC4Jj,N﻿guL[2Ti_4P9B,GEtp)pqzxI2E
BHNv4Gp 1z f_2r-TSF2f,
-CA.:d,(DdlvOS8Lvo-H0
F7y:hnFvW2_4RMf'XRxby4]XUkN1YRaspgfgb),u"4v4Vpmk*sxraAsx6"K]m17k;qu!QX[s:aAF?NZMv'&zNTi;G!f7hHqu,AE9C3J]J?k


In [14]:
# create a PyTorch optimizer

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

# training loop
for iter in range(max_iters):
    if iter % eval_iters == 0:
        losses = estimate_loss()
        print(f"step: {iter}, train loss: {losses['train']:.3f} val loss: {losses['val']:.3f}")
     
        
    xb, yb  = get_batch('train')

    logits, loss = model.forward(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
print(loss.item())

step: 0, train loss: 2.438 val loss: 2.495
step: 250, train loss: 2.436 val loss: 2.488
step: 500, train loss: 2.459 val loss: 2.471
step: 750, train loss: 2.430 val loss: 2.512
step: 1000, train loss: 2.421 val loss: 2.510
step: 1250, train loss: 2.424 val loss: 2.498
step: 1500, train loss: 2.417 val loss: 2.484
step: 1750, train loss: 2.444 val loss: 2.477
step: 2000, train loss: 2.443 val loss: 2.501
step: 2250, train loss: 2.421 val loss: 2.480
step: 2500, train loss: 2.430 val loss: 2.502
step: 2750, train loss: 2.419 val loss: 2.492
step: 3000, train loss: 2.419 val loss: 2.467
step: 3250, train loss: 2.443 val loss: 2.515
step: 3500, train loss: 2.428 val loss: 2.479
step: 3750, train loss: 2.415 val loss: 2.479
step: 4000, train loss: 2.436 val loss: 2.495
step: 4250, train loss: 2.407 val loss: 2.469
step: 4500, train loss: 2.418 val loss: 2.461
step: 4750, train loss: 2.427 val loss: 2.490
step: 5000, train loss: 2.442 val loss: 2.454
step: 5250, train loss: 2.421 val loss: 

In [18]:
context = torch.zeros((1,1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)



the ane salicat " tharcesis
tt; h th--Winy eralin o fr ou fotuilad
sla m," fe t ged warade s tha attre an-bemed t!" Bu _la ofrn s.
Thapl Doply.
[I Is gene gsheloryo p h clof tho beseopanory llir. ERaid
"Onge s
m e t-I y jun tove knsak I ttherorack!"

walmalikirewheiglvou?"I t stl.
" shan MAPr wand ais hertar thtthas isnd  se thethepincane
Str


thy waworitthe othen susinghin berves itst om "I furd tes he fro hea caurwit by bury t skim tho "Wed ar L.
OZeany
com. thtsoulan s s ar


"Winbofore Th h
