In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [7]:
words = open('names.txt', 'r').read().splitlines()
print(words[:10])
print(len(words))
chars = sorted(list(set(''.join(words))))
print(chars)
stoi = {s: i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s,i in stoi.items()}
print(itos)
vocab_size = len(itos)

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia', 'harper', 'evelyn']
32033
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [8]:
class Linear:
    def __init__(self, fanin, fanout, bias=True, device='cpu') -> None:
        self.W = torch.randn((fanin, fanout), device=device) / fanin**0.5
        self.b = torch.zeros(fanout) if bias else None

    def to(self, device):
        self.W = self.W.to(device)
        if self.b is not None:
            self.b = self.b.to(device)
        return self

    def __call__(self, x) -> torch.tensor:
        self.out = x @ self.W
        if self.b is not None:
            self.out += self.b          
        return self.out    

    def parameters(self) -> list:
        params = [self.W] + ([self.b] if self.b is not None else [])
        return params

class BatchNorm1D:
    def __init__(self, dim, eps=1e-5, momentum=0.1, device='cpu') -> None:
        self.gamma = torch.ones(dim, device=device)
        self.beta = torch.zeros(dim, device=device)
        self.eps = eps
        self.momentum = momentum
        self.training = True
        self.running_mean = torch.zeros(dim, device=device)
        self.running_var = torch.ones(dim, device=device)
    
    def to(self, device):
        self.gamma = self.gamma.to(device)
        self.beta = self.beta.to(device)
        self.running_mean = self.running_mean.to(device)
        self.running_var = self.running_var.to(device)
        return self

    def __call__(self, x) -> torch.tensor:
        if self.training:
            if x.ndim == 2:
                dim = 0
            elif x.ndim == 3:
                dim = (0, 1)
            xmean = x.mean(dim=dim, keepdim=True)
            xvar = x.var(dim=dim, keepdim=True)
        else:
            xmean = self.running_mean
            xvar = self.running_var

        xhat = (x - xmean)/torch.sqrt(xvar + self.eps)
        self.out = self.gamma * xhat + self.beta

        if self.training:
            with torch.no_grad():
                self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * xmean
                self.running_var = (1 - self.momentum) * self.running_var + self.momentum * xvar
        return self.out
    
    def parameters(self):
        params = [self.gamma, self.beta]
        return params
    
class Tanh:
    def __call__(self, x):
        self.out = torch.tanh(x)
        return self.out
    def to(self, device):
        return self
    def parameters(self):
        return []
    
class Embedding:
    def __init__(self, fanin, fanout, device='cpu') -> None:
        self.W = torch.randn((fanin, fanout), device=device) / fanin**0.5
    
    def to(self, device):
        self.W = self.W.to(device)
        return self

    def __call__(self, x) -> torch.tensor:
        self.out = self.W[x]
        return self.out    
    
    def parameters(self):
        return [self.W]    

class FlattenConsecutive:
    def __init__(self, n) -> None:
        self.n = n

    def to(self, device):
        return self
    
    def __call__(self, x) -> torch.tensor:
        B, T, C = x.shape
        x = x.view(B, T//self.n, C*self.n)
        if x.shape[1] == 1: x = x.squeeze(1)
        self.out = x
        return self.out
    
    def parameters(self):
        return []

class Sequential:
    def __init__(self, layers) -> None:
        self.layers = layers
    
    def to(self, device):
        for layer in self.layers:
            layer.to(device)
        return self
    
    def __call__(self, x) -> torch.tensor:
        for layer in self.layers:
            x = layer(x)
        self.out = x
        return self.out
    
    def parameters(self) -> list:
        params = [p for layer in self.layers for p in layer.parameters()]
        return params

In [9]:
model = torch.load('makemore_wavenet.pt', weights_only=False)

In [10]:
block_size = 8
torch.manual_seed(42)
for _ in range(10):
    x = [0]*block_size
    out = []
    while True:
        logits = model(torch.tensor(x, dtype=torch.long).reshape((1, -1)))
        counts = logits.exp()
        p = counts / counts.sum(dim=1, keepdim=True)
        ix = torch.multinomial(p, num_samples=1, replacement=True).item()
        out.append(itos[ix])
        if ix == 0:
            break
        x = x[1:] + [ix]
    print(''.join(out[:-1]))

yessy
havilynn
legolin
maryda
trysten
hendersen
patrice
abdihas
louisophael
kimpur
