In [2]:
import csv
import pandas as pd
import torch

# load the names from dataset/indianames.csv
names = []
with open('dataset/indianames.csv', 'r') as f:
    names = f.readlines()
names = [name.strip() for name in names]
names.sort()

In [3]:
xs = []
ys = []
stoi = {c: i for i, c in enumerate('.abcdefghijklmnopqrstuvwxyz')}

for name in names:
    chars  = list(f".{name}.")
    for c1,c2 in zip(chars, chars[1:]):
        xs.append(stoi[c1])
        ys.append(stoi[c2])

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [12]:
# we will need to convert the numbers to one-hot encoding
# instead of using a vector of numbers, we will use a matrix of one-hot vectors
# each row of the matrix will be a one-hot vector
# the number of rows will be the same as the number of characters in the dataset
# the number of columns will be the same as the number of characters in the dataset
import torch.nn.functional as F
ws = torch.randn((27, 27), requires_grad=True)


# we convert this into float so we can feed it to the neural network
# the neural network expects float values
x_enc = F.one_hot(xs, num_classes=27).float()
for i in range(1000):
    logits = x_enc @ ws
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims=True)
    # the probabilies are good when they are high for the given label
    # and low for the other labels
    # we calculate the loss by taking the negative log of the probability of the correct label
    # the loss is high when the probability of the correct label is low
    # the loss is low when the probability of the correct label is high
    # we want to minimize the loss
    # we want to maximize the probability of the correct label

    loss = - probs[torch.arange(0, len(probs)), ys].log().mean() + 0.01 * (ws**2).mean()
    print(loss.item())
    # we set the gradient to 0
    ws.grad = None
    loss.backward()
    lr = 50
    
    if loss.item() < 2.4:
        lr = 1
    ws.data += - lr * ws.grad

3.8058674335479736
3.354444980621338
3.092466354370117
2.938866376876831
2.838832139968872
2.765158176422119
2.7088215351104736
2.6650397777557373
2.63059663772583
2.6029953956604004
2.580413341522217
2.5615854263305664
2.5456361770629883
2.531947374343872
2.5200722217559814
2.5096821784973145
2.5005264282226562
2.4924111366271973
2.4851789474487305
2.478703737258911
2.4728803634643555
2.4676201343536377
2.462850570678711
2.4585094451904297
2.4545445442199707
2.4509117603302
2.447573184967041
2.4444968700408936
2.441655158996582
2.439023733139038
2.436582326889038
2.4343130588531494
2.432199001312256
2.430227518081665
2.428385019302368
2.426661491394043
2.425046443939209
2.4235310554504395
2.422107696533203
2.4207687377929688
2.4195079803466797
2.4183189868927
2.4171969890594482
2.4161365032196045
2.415133237838745
2.4141831398010254
2.4132819175720215
2.412426710128784
2.411614179611206
2.410841464996338
2.4101059436798096
2.409404993057251
2.4087367057800293
2.4080989360809326
2.4074

In [5]:
start_char = 0 # '.'
output = []

while True:
    # encode the start character
    x_enc = F.one_hot(torch.tensor([start_char]), num_classes=27).float()
    # calculate the logits
    logits = x_enc @ ws
    # calculate the probabilities
    counts = logits.exp()
    # normalize the probabilities
    probs = counts / counts.sum(1, keepdims=True)
    # sample from the probabilities
    # this will give us the next character
    next_char = torch.multinomial(probs, 1).item()
    # if the next character is not the start character
    # we add it to the output
    if next_char != start_char:
        output.append(next_char)
        start_char = next_char
    if next_char == 0 or len(output) > 20:
        break
    
''.join([list(stoi.keys())[i] for i in output])



'jul.'