In [171]:
import numpy as np
import matplotlib.pyplot as plt
import torch

In [172]:
names = open("names.txt", "r").read().splitlines()

In [173]:
str = ".abcdefghijklmnopqrstuvwxyz"
stoi = {}
itos = {}
arr = list(str)
for idx, char in enumerate(arr):
    stoi[char] = idx
    itos[idx] = char

In [174]:
# Build the Dataset

xs = [] 
ys = []
context_window = 3 # Context length: Number of characters do we take to predict the next one.
for name in names:
  
  context = [0] * context_window # 0 is the index of the delimiter. The context window is considered to consist of only delimiters initially.
  
  name = name + "."

  for idx in range(len(name) - 1):
  
    ix = stoi[name[idx]]
    context = context[1:] + [ix] # crop and append
    xs.append(context)
    ys.append(stoi[name[idx + 1]])
    # print(''.join(itos[i] for i in context), '--->', name[idx + 1])
  
xs = torch.tensor(xs)
print(xs.shape)
ys = torch.tensor(ys)

torch.Size([196113, 3])


In [175]:
gen = torch.Generator().manual_seed(2147483647) # for reproducibility
vector_dimensionality = 10
char_vectors = torch.randn((27, vector_dimensionality), generator=gen) # This represents the collection of Alphabets in their vector format. 27 Characters are represented as 2D vectors.
W1 = torch.randn((vector_dimensionality * context_window, 200), generator=gen) # Shape is (Ctx window * Alphabet Vector Dims, # Neurons). Here, # Neurons = 200 
B1 = torch.randn(200, generator=gen)
W2 = torch.randn((200, 27), generator=gen)
B2 = torch.randn(27, generator=gen)
parameters = [char_vectors, W1, B1, W2, B2]

In [176]:
for param in parameters:
  param.requires_grad = True

In [177]:
# # Training loop for training over the entire dataset. Inherently slow

# for _ in range(100):
    
#     # Forward Pass
#     emb = char_vectors[xs] # This returns the collection of vectors for each input context. Each Context is of length = Context length. For each context character, there is a 2D vector. So for each context, the output is Context length x Vector dimensionality. And for all input contexts, the shape is [# of contexts x context length x Vector dimensionality = 2].

#     tan_h = torch.tanh(emb.view(-1, context_window * vector_dimensionality) @ W1 + B1)
#     output = tan_h @ W2 + B2
#     loss = torch.nn.functional.cross_entropy(output, ys)
#     print(loss.item())

#     # Backward Pass
#     for param in parameters:
#         param.grad = None

#     loss.backward()

#     # Update
#     for param in parameters:
#         param.data -= 0.1 * param.grad

In [178]:
# Training loop for training over batches of the dataset. Faster but maybe less accurate

for _ in range(1000):
    
    indices = torch.randint(0, xs.shape[0], (1024, )) # Returns a list of indices to select a few examples from the entire input dataset

    # Forward Pass

    # xs[indices] returns a tensor of inputs with only those indices

    emb = char_vectors[xs[indices]] # This returns the collection of vectors for specific contexts. Each Context is of length = Context length. For each context character, there is a 2D vector. So for each context, the output is Context length x Vector dimensionality. And for all input contexts, the shape is [# of contexts x context length x Vector dimensionality = 2].

    tan_h = torch.tanh(emb.view(-1, context_window * vector_dimensionality) @ W1 + B1) # @ is the matrix multiplication operator
    output = tan_h @ W2 + B2
    loss = torch.nn.functional.cross_entropy(output, ys[indices])
    # print(loss.item())

    # Backward Pass
    for param in parameters:
        param.grad = None

    loss.backward()

    # Update
    for param in parameters:
        param.data -= 0.1 * param.grad

In [179]:
print(loss)

tensor(3.0342, grad_fn=<NllLossBackward0>)


In [180]:
# sample from the model
sample_gen = torch.Generator().manual_seed(2147483647 + 10)

for _ in range(100):
    
    out = []
    context = [0] * (context_window - 1) # context = [. .]
    start_char_idx = torch.multinomial(torch.tensor([1] * 26) / 26, num_samples=1, replacement=True).item() + 1
    context.append(start_char_idx)
    # print(''.join(itos[i] for i in context))
    while True:
      emb = char_vectors[torch.tensor([context])] # (1,block_size,d)
      tan_h = torch.tanh(emb.view(1, -1) @ W1 + B1)
      output = tan_h @ W2 + B2
      probs = torch.nn.functional.softmax(output, dim=1)
      ix = torch.multinomial(probs, num_samples=1, generator=sample_gen).item()
      context = context[1:] + [ix]
      if ix == 0:
        break
      
      out.append(ix)
    
    print(''.join(itos[i] for i in out))

erbm
imiazehrtlend
ei
ameethaniend
ari
udereo
elii
lly
odna
ieilocoenarteat
ivsytes
iosadher
ahim
ele
in
oselyn
anteona
ubewaeder
ameul
eka
lay
hahanien
hyis
arlansyn
unzomueu
ri
ca
aki
eahar
amilin
eyko
or
hdneila
amaezia
amieonlin
airi
eva
htralaiirta
atti
ee
oelisatia
at
avis
reu
oelirreckenn
yxkir
uhmii
ie
rakyyilo
udt
iwtd
ili
arle
adalena
eni
are
ina
icearilon
et
aynerekian
ire
amisan
eriz
elso
amee
erymoren
etti
ara
hilan
ryena
ecelieannia
enbiyrta
mhsk
on
ilandyck
eno
troon
tnen
reehara
aminkanuena
htlen
aracaely
ca
ehansiel
te
imi
ansila
alenee
hdnlyigantonson
oy
zanyriy
yey
ri
rnlinistartiiyt
oriane
ore
ad
adea
armon
mmytkeebaniah
