In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
#from mytorch.optim import SGD
#from mytorch.utils import DataLoader
import matplotlib.pyplot as plt
import pytorch_lightning as pl
import random
from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import RichProgressBar
from pytorch_lightning.callbacks import TQDMProgressBar

In [8]:
# Load makemore data
words = open('names.txt', 'r').read().splitlines()
print(f'Number of names: {len(words)}')
print(
    f'Median name length: {torch.median(torch.tensor(list(len(w) for w in words))).item()}')
print(f'Max name length: {max(len(w) for w in words)}')
print(f'Example names: {words[:8]}')

# build the vocapbulary of charactors mapping to/from integers
chars = sorted(list(set(''.join(words))))
stoi = {s: i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s, i in stoi.items()}
vocab_size = len(itos)
print(itos)
print(vocab_size)

Number of names: 32033
Median name length: 6
Max name length: 15
Example names: ['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']
{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}
27


In [9]:
random.seed(42)
random.shuffle(words)
block_size = 3


def build_dataset(words):
    X, Y = [], []

    for w in words:
        context = [0] * block_size
        for ch in w + '.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)
            context = context[1:] + [ix]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    print(X.shape, Y.shape)
    return X, Y


random.seed(42)
random.shuffle(words)
n1 = int(0.8*len(words))
n2 = int(0.9*len(words))


Xtr, Ytr = build_dataset(words[:n1])  # 80%
Xdev, Ydev = build_dataset(words[n1:n2])  # 10%
Xte, Yte = build_dataset(words[n2:])  # 10%

torch.Size([182580, 3]) torch.Size([182580])
torch.Size([22767, 3]) torch.Size([22767])
torch.Size([22799, 3]) torch.Size([22799])


In [10]:
torch.manual_seed(42)
n_embd = 10
n_hidden = 200
batch_size = 64

class MakeMore(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Embedding(vocab_size, n_embd),
            nn.Flatten(),
            nn.Linear(n_embd*block_size, n_hidden,    
                   bias=False),nn.BatchNorm1d(n_hidden), nn.Tanh(),
            nn.Linear(n_hidden, vocab_size))
        self.batch_size = 64

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        #x = x.view(x.size(0), -1)
        logits = self.model(x)
        loss = F.cross_entropy(logits, y)
        #self.log('train_loss', loss)
        self.log("train_loss", loss, prog_bar=False, on_step=False, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        #x = x.view(x.size(0), -1)
        logits = self.model(x)
        loss = F.cross_entropy(logits, y)
        #self.log('train_loss', loss)
        self.log("train_loss", loss, prog_bar=False, on_step=False, on_epoch=True)
        return loss

    def predict_step(self, batch, batch_idx):
        # training_step defines the train loop.
        #x, y = batch
        x = batch
        #x = x.view(x.size(0), -1)
        logits = self.model(x)
        return logits

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.05)
        return optimizer

#data = DataLoader(torch.concatenate((Xtr,torch.unsqueeze(Ytr,dim=1)),dim=1), batch_size=batch_size)

class Dataset(torch.utils.data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, features, targets):
        'Initialization'
        self.features = features
        self.targets = targets

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.targets)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        #ID = self.target[index]

        # Load data and get label
        X = self.features[index]
        y = self.targets[index]

        return X, y

train_data = Dataset(Xtr,Ytr)
valid_data = Dataset(Xdev,Ydev)

data = DataLoader(train_data, batch_size=batch_size)
data_dev = DataLoader(valid_data, batch_size=batch_size)




In [11]:
makemore = MakeMore()
#trainer = pl.Trainer(max_epochs=5,accelerator="mps",callbacks=[RichProgressBar(refresh_rate=200)])
#trainer = pl.Trainer(max_epochs=10,accelerator="mps",callbacks=[TQDMProgressBar(refresh_rate=1000)])
#trainer = pl.Trainer(max_epochs=10,enable_progress_bar=False)
trainer = pl.Trainer(max_epochs=40,  enable_progress_bar=False)
trainer.fit(model=makemore, train_dataloaders=data)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 12.1 K
-------------------------------------
12.1 K    Trainable params
0         Non-trainable params
12.1 K    Total params
0.048     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=40` reached.


In [12]:
#trainer.validate(model=model, dataloaders=data_dev)
trainer.validate(model=makemore, dataloaders=data)

[{'train_loss': 2.1033780574798584}]

In [15]:
#context = [0,0,0]
#x = torch.tensor([context])
#trainer.predict(model=model, dataloaders=x);

In [13]:
# This Should work
x = Xtr[:1]
print (x, x.shape)

# What about this?
context = [0,0,0]
x = torch.tensor([context])
print (x, x.shape)

makemore.model.eval()

print (makemore.model(x))

#batch = next(iter(data))
#x, y = batch
#print (x)
#trainer.predict(makemore,x)

#trainer.predict()


tensor([[0, 0, 0]]) torch.Size([1, 3])
tensor([[0, 0, 0]]) torch.Size([1, 3])
tensor([[-5.8411,  2.5711,  0.6888,  0.9979,  0.7877,  0.8133, -0.7523,  0.0363,
          0.2019,  0.2049,  1.4215,  1.4555,  1.1140,  1.5603,  0.1062, -0.2693,
         -0.7146, -1.8764,  0.4274,  1.3341,  0.9290, -2.0732, -0.2959, -0.3547,
         -1.5642, -0.1308,  0.4757]], grad_fn=<AddmmBackward0>)


In [15]:
with torch.no_grad():
    for _ in range(20):

        out = []
        context = [0] * block_size
        while True:
            x = torch.tensor([context])
            logits = makemore.model(x)

            probs = F.softmax(logits, dim=1)
            ix = torch.multinomial(probs, num_samples=1).item()

            context = context[1:] + [ix]
            out.append(ix)
            if ix == 0:
                break

        print(''.join(itos[i] for i in out))

ant.
mel.
ter.
javayis.
sobhia.
kaeptassey.
avanyelelny.
lazoluwashrtisausiah.
yar.
albeth.
yia.
mala.
vilynn.
dhe.
alden.
kalyn.
tos.
caytlous.
ablynn.
lynn.
