In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
with open('../names.txt', 'r') as file:
    names = file.read().split()

print('Total names:', len(names))
print(names[:5])

Total names: 32033
['emma', 'olivia', 'ava', 'isabella', 'sophia']


In [3]:
# Form stoi and itos
vocab = sorted(list(set(''.join(names))))
stoi = {s: i+1 for i, s in enumerate(vocab)}
stoi['.'] = 0
itos = {stoi[s]: s for s in stoi}

vocab_size = len(stoi)

In [19]:
block_size = 3
def form_dataset(words):
    X = []
    Y = []

    for word in words:
        word = ['.'] * block_size + list(word) + ['.']
        for ind in range(3, len(word)):
            X.append([stoi[x] for x in word[ind-3:ind]])
            Y.append(stoi[word[ind]])
            # print(''.join(word[ind-3:ind]), '--->', word[ind])

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y

In [20]:
import random
random.seed(42)
random.shuffle(names)

X, Y = form_dataset(names)

n1 = int(0.8 * X.shape[0])
n2 = int(0.9 * X.shape[0])
Xtr, Xdev, Xts = X.tensor_split((n1, n2), dim=0) # input is split into X[:n1], X[n1:n2] and X[n2:]
Ytr, Ydev, Yts = Y.tensor_split((n1, n2), dim=0)

g = torch.Generator().manual_seed(2147483647)

In [None]:
class Layer:
    def __init__(self, in_features, out_features, bias=True):
        self.W = torch.randn((in_features, out_features), generator=g) / (in_features ** 2)
        if bias:
            self.b = torch.randn(out_features)
        
        self.W.requires_grad = True
        self.b.requires_grad = True
    
    def __call__(self, x):
        # forward pass
        x_new = x.view(-1, self.W.shape[0])
        self.out = x.view(-1, self.W.shape[0]) @ self.W
        if self.b is not None:
            self.out = self.out + self.b
        
        return self.out

    def parameters(self):
        return [self.W] + ([] if self.b is None else [self.b])

In [73]:
layer = Layer(4, 6)
layer(torch.randn(100, 4))
layer.parameters()

[tensor([[-0.0926,  0.0156,  0.0108,  0.0244, -0.0760, -0.0221],
         [ 0.0100,  0.0737, -0.0499, -0.0480,  0.0579,  0.0126],
         [-0.0741,  0.0625,  0.0586, -0.0157, -0.0544,  0.0337],
         [-0.0951, -0.0624,  0.0006,  0.0288,  0.0610,  0.0414]],
        requires_grad=True),
 tensor([ 1.0182, -1.4659, -2.8715, -0.6593,  1.4714,  1.1501],
        requires_grad=True)]

In [None]:
class Tanh:
    def __call__(self, x):
        self.out = torch.tanh(x)
        return self.out
    def parameters(self):
        return []

In [80]:
th = Tanh()
th(torch.randn(10))

tensor([-9.6161e-01, -6.2886e-04,  9.6404e-01,  8.9996e-01, -2.3058e-01,
         7.4750e-01,  5.3551e-01,  5.3403e-01,  2.9424e-02,  8.2400e-01])

In [None]:
class BatchNorm1d:
    def __init__(self):
        pass