In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [3]:
len(words)

32033

In [4]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s, i in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [5]:
block_size = 3
X, Y = [], []
for w in words:
    context = [0]* block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)

In [6]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([228146, 3]), torch.int64, torch.Size([228146]), torch.int64)

In [7]:
C = torch.randn((27, 2))

In [8]:
emb = C[X]
emb.shape

torch.Size([228146, 3, 2])

In [9]:
W1 = torch.randn((6, 100))
b1 = torch.randn(100)

In [10]:
h = torch.tanh(emb.view(-1, 6) @ W1 + b1)

In [11]:
h

tensor([[-0.9698, -1.0000,  0.9793,  ...,  0.1872, -0.9927,  0.9982],
        [-0.5624, -1.0000,  0.9998,  ..., -0.3349, -0.9939,  0.9379],
        [-0.4062, -0.9724, -0.8502,  ..., -0.9994,  0.8379, -1.0000],
        ...,
        [-0.9923,  0.2711, -0.9602,  ...,  0.1144, -0.2926,  0.9998],
        [-0.9963, -0.9222,  0.9998,  ...,  0.1071, -0.9993,  0.0069],
        [ 0.9133, -0.1989, -0.0404,  ..., -0.9932,  0.9692, -1.0000]])

In [12]:
h.shape

torch.Size([228146, 100])

In [13]:
W2 = torch.randn((100, 27))
b2 = torch.randn(27)

In [14]:
logits = h @ W2 + b2

In [15]:
logits.shape

torch.Size([228146, 27])

In [16]:
coutns = logits.exp()

In [17]:
prob = coutns / coutns.sum(1, keepdim=True)


In [18]:
prob.shape

torch.Size([228146, 27])

In [19]:
loss = -prob[torch.arange(32), Y].log().mean()
loss

IndexError: shape mismatch: indexing tensors could not be broadcast together with shapes [32], [228146]

In [None]:
# ------ now made respectable :) --------

In [20]:
X.shape, Y.shape

(torch.Size([228146, 3]), torch.Size([228146]))

In [21]:
g = torch.Generator().manual_seed(2147483647)
C = torch.randn((27,2), generator=g)
W1 = torch.randn((6, 100), generator=g)
b1 = torch.randn(100, generator=g)
W2 = torch.randn((100, 27), generator=g)
b2 = torch.randn(27, generator=g)
parameters = [C, W1, b1,W2, b2]

In [22]:
sum(p.nelement() for p in parameters)

3481

In [23]:
for p in parameters:
    p.requires_grad = True

In [24]:
for _ in range(1000):
    # forward pass
    ix = torch.randint(0, X.shape[0], (32,))
    emb = C[X[ix]]
    h = torch.tanh(emb.view(-1, 6) @ W1 + b1)
    logits = h@W2+b2
    # loss
    loss = F.cross_entropy(logits,Y[ix])
    print(loss.item())
    for p in parameters:
        p.grad = None
    loss.backward()
    for p in parameters:
        p.data += -0.1 * p.grad


16.64824867248535
18.20742416381836
16.996305465698242
15.613388061523438
15.976612091064453
12.096548080444336
13.544082641601562
14.048921585083008
13.036532402038574
8.82808780670166
7.94864559173584
11.922504425048828
12.97964096069336
11.316204071044922
10.442916870117188
6.8387885093688965
10.687067985534668
11.087312698364258
9.47104263305664
9.750601768493652
7.081040382385254
7.538924217224121
8.16815185546875
6.729472637176514
10.52293872833252
8.362360000610352
7.159945011138916
5.21429967880249
6.891787052154541
6.989955425262451
5.871933937072754
6.539275646209717
5.371041774749756
6.314457893371582
7.863722801208496
6.327244281768799
6.438353061676025
5.51461935043335
5.111258029937744
6.936929225921631


5.941615104675293
4.8235344886779785
6.045795917510986
5.239694595336914
6.530384540557861
4.823698043823242
4.945514678955078
5.587668418884277
5.610849857330322
5.839310169219971
6.192060470581055
5.851850986480713
5.547386169433594
5.538985729217529
4.829273223876953
5.7924113273620605
6.203950881958008
4.235345363616943
4.776021957397461
4.867580413818359
6.14561128616333
5.8381428718566895
5.0647993087768555
4.189263820648193
5.2944464683532715
3.610656261444092
5.151655197143555
3.6808433532714844
4.961021423339844
3.4905664920806885
3.409208297729492
4.886463165283203
4.307537078857422
3.7818868160247803
4.105038166046143
3.3251118659973145
4.081025123596191
4.648796558380127
3.2137057781219482
3.7628297805786133
4.221642971038818
4.680723667144775
3.7427942752838135
3.792158365249634
4.117949962615967
3.6903085708618164
3.1731135845184326
3.618046283721924
4.454434394836426
5.312251091003418
4.265388488769531
3.906682014465332
3.978053569793701
3.532928466796875
4.5023007392883

In [25]:
emb = C[X]
h = torch.tanh(emb.view(-1, 6) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entorpy(logits, Y)

TypeError: randint() received an invalid combination of arguments - got (int, torch.Size, tuple), but expected one of:
 * (int high, tuple of ints size, *, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (int high, tuple of ints size, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (int low, int high, tuple of ints size, *, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (int low, int high, tuple of ints size, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


In [None]:
# 45:05