In [1]:
from tinygrad import Tensor, nn, TinyJit

In [2]:
names = open("./names.txt").read().splitlines()

In [3]:
SPECIAL_TOKEN = "."
vocab = [SPECIAL_TOKEN] + [chr(unicode) for unicode in range(ord("a"), ord("a") + 26)]
vocab_size = len(vocab)

In [4]:
x, y = [], []

for name in names:
    name_chars = [SPECIAL_TOKEN] + list(name) + [SPECIAL_TOKEN]
    for char_a, char_b in zip(name_chars, name_chars[1:]):
        x.append(vocab.index(char_a))
        y.append(vocab.index(char_b))

split = int(0.8 * len(x))
X_train, y_train, X_test, y_test = (
    Tensor(x[:split]).one_hot(vocab_size),
    Tensor(y[:split]),
    Tensor(x[split:]).one_hot(vocab_size),
    Tensor(y[split:]),
)

In [5]:
class FCNN:
    def __init__(self, input_size, output_size):
        self.linear = nn.Linear(input_size, output_size)

    def __call__(self, x: Tensor) -> Tensor:
        return self.linear(x)


fcnn = FCNN(vocab_size, vocab_size)

In [6]:
optim = nn.optim.Adam(nn.state.get_parameters(fcnn))
batch_size = 128


# def nll_loss(y_pred: Tensor, y_true: Tensor) -> Tensor:
#     return -y_pred.log()[y_true].mean()


@TinyJit
@Tensor.train()
def train_step():
    optim.zero_grad()
    samples = Tensor.randint(batch_size, high=X_train.shape[0])
    X_samples, y_samples = X_train[samples], y_train[samples]
    # loss = nll_loss(fcnn(X_samples), y_samples).backward()
    loss = fcnn(X_samples).sparse_categorical_crossentropy(y_samples).backward()
    optim.step()
    return loss

In [7]:
for step in range(1, 701):
    loss = train_step()
    if step == 1 or step % 100 == 0:
        Tensor.training = False
        acc = (fcnn(X_test).argmax(axis=1) == y_test).mean().item()
        print(f"step {step}, loss {loss.item():.2f}, acc {acc*100.:.2f}%")

step 1, loss 3.30, acc 3.40%
step 100, loss 3.17, acc 7.89%
step 200, loss 3.10, acc 14.23%
step 300, loss 3.00, acc 15.22%
step 400, loss 2.87, acc 17.40%
step 500, loss 2.86, acc 18.61%
step 600, loss 2.76, acc 18.61%
step 700, loss 2.66, acc 19.41%


In [10]:
def generate_word(starting_chars="", max_len=20):
    word = SPECIAL_TOKEN + starting_chars
    while len(word[1:]) < max_len:
        x = Tensor([vocab.index(word[-1])]).one_hot(vocab_size)
        y = fcnn(x).argmax().item()
        if y == vocab.index(SPECIAL_TOKEN):
            break
        word += vocab[y]
    return word[1:]

In [11]:
for item in vocab:
    print(generate_word(item.replace(SPECIAL_TOKEN, "")))

a
a
be
ca
da
e
fa
ga
h
i
ja
ka
le
ma
n
on
pa
qa
ri
sa
t
u
vi
wa
x
ya
za
