In [2]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import random
import math
%matplotlib inline

In [3]:
words = open('./names.txt').read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [4]:
chars = sorted(list(set(''.join(words)))) #lol
token_lookup = {c: i+1 for i, c in enumerate(chars)}
token_lookup['.'] = 0
char_lookup = {i:c for c, i in token_lookup.items()}
TOTAL_TOKENS = len(char_lookup.keys())

In [5]:
# build the dataset

BLOCK_SIZE = 3 # context size to give the model in order to predict the next character

def build_dataset(corpus, block_size, codebook, padding_char="."):
    X, Y = [], []
    for word in corpus:
        start_padding = padding_char * block_size
        padded_word = f"{start_padding}{word}."
        tokenized_word = [codebook[c] for c in padded_word]
        for i in range(len(tokenized_word)-block_size):
            X.append(tokenized_word[i:i+block_size])
            Y.append(tokenized_word[i+block_size])
        
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y


random.shuffle(words)

n1 = int(len(words)*0.8)
n2 = int(len(words)*0.9)

x_train, y_train = build_dataset(words[:n1], BLOCK_SIZE, token_lookup)
x_valid, y_valid = build_dataset(words[n1:n2], BLOCK_SIZE, token_lookup)
x_test, y_test = build_dataset(words[n2:], BLOCK_SIZE, token_lookup)

In [14]:
class Linear:
    
    def __init__(self, fan_in, fan_out, use_bias=True):
        
        self.weights = torch.randn((fan_in, fan_out)) / fan_in**0.5
        self.bias = torch.zeros(fan_out) if use_bias else None
    
    def __call__(self, x):
        self.out = x * self.weights
        if self.bias is not None:
            self.out += self.bias
        return self.out
    
    def paramaters(self):
        return [self.weights] + ([] if self.bias is None else [self.bias])
    
class BatchNorm1d:
    
    def __init__(self, dims, momentum=0.001, epsilon=1e-5):
        
        self.dims = dims
        self.running_mean = torch.zeros(self.dims)
        self.running_var = torch.ones(self.dims)
        self.momentum = momentum
        self.epsilon = epsilon
        self.gamma = torch.ones(self.dims)
        self.beta = torch.zeros(self.dims)
        self.training = True
    
    def __call__(self, x):
        if self.training:
            cur_mean = x.mean(0, keepdims=True)
            cur_var = x.var(0, keepdims=True, unbiased=True)
        else:
            cur_mean = self.running_mean
            cur_var = self.running_std
        
        if self.training:
            with torch.no_grad():
                self.running_mean = ((1-self.momentum) * self.running_mean) + (self.momentum * cur_mean)
                self.running_var = ((1-self.momentum) * self.running_var) + (self.momentum * cur_var)
        
        self.out = (x - cur_mean) / torch.sqrt(cur_var + self.epsilon)
        self.out = (self.gamma * self.out) + self.beta
        
        return self.out
    
    def paramaters(self):
        return [self.gamma, self.beta]
    
class Tanh:
    
    def __call__(self, x):
        return torch.tanh(x)
    
    def paramaters(self):
        return []

In [12]:
class Model:
    
    def __init__(self, embedding_dims, vocab_size, hidden_states, device='cpu'):
        
        self.embedding_dims = embedding_dims
        self.hidden_states = hidden_states
        self.vocab_size = vocab_size
        
        self.embedding_layer = torch.randn((self.vocab_size, self.embedding_dims))
        
        self.layers = [
            Linear(self.embedding_dims, self.hidden_states, False),
            BatchNorm1d(self.hidden_states),
            Tanh(),
            Linear(self.hidden_states, self.vocab_size)
        ]
        
        for layer in self.layers:
            for param in layer.paramaters():
                param.requires_grad = True
                param.to(device)
        
    def __call__(self, x):
        self.out = self.embedding_layer[x]
        for layer in self.layers:
            self.out = layer(self.out)
        return self.out
            
            

In [15]:
model = Model(embedding_dims=10, vocab_size=TOTAL_TOKENS, hidden_states=200)