In [78]:
# Implement Layers of a Nueral Net by hand
import torch
import os
import matplotlib.pyplot as plt

In [79]:
def get_absolute_directory_path(folder_name):
    current_dir = os.path.abspath(__file__)
    while not os.path.exists(os.path.join(current_dir, folder_name)):
        current_dir = os.path.dirname(current_dir)
    return os.path.join(current_dir, folder_name)

In [134]:
dataset = open('../names.txt', 'r').read().splitlines()
chars = sorted(list(set(''.join(dataset))))
index_lookup_table = {index + 1: char for index, char in enumerate(chars)}
index_lookup_table[0] = '.'

# index_lookup_table

In [94]:
xenc = torch.nn.functional.one_hot(torch.tensor([0]), num_classes=27).float()
elems = xenc.clone().detach()
print(elems)
# plt.plot(elems)


tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0.]])


In [132]:
g = torch.Generator().manual_seed(2147483647)

# completely untrained
def nueral_net_algorithm(
        seed_generator, 
        num_of_obj_classes, 
        W, 
        lookup_table
):
    out = []
    ix = 0
    while True:
        # forward pass
        xenc = torch.nn.functional.one_hot(torch.tensor([ix]), num_classes=num_of_obj_classes).float()
        # used to predict log counts , why does tensor multiplication automatically return log count?
        logits = xenc @ W
        # manual softmax function (make nueral net from probs),  e^z_i / from j->k sum(e^z_j) aka 'normalization function'
        counts = logits.exp()  # equivalent to N matrix in counting bigram or P matrix in normalized matrix of probabilities based on count
        p = counts / counts.sum(1, keepdims=True)  # 1 doesn't work, but -1 does? why? 
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=seed_generator).item()
        out.append(lookup_table[ix])
        if ix == 0:
            break
    return ''.join(out)

# Sample NueroNet
nueral_net_algorithm(g, 27, torch.randn((27, 27), generator=g, requires_grad=True), index_lookup_table)

'yofvwqjkdktcnblz.'

In [187]:
# invert atoi into i2c
ctoi = {c: ix+1 for ix, c in enumerate(chars)}
itoc = {ix:c for c, ix in ctoi.items()}
itoc[0]='.'
ctoi["."]=0

In [242]:
blocksize = 3 
# X is input, 
# Y is the output. in test Y is the expected value
X,Y = [], []
for word in dataset[:1]:
    print(word)
    context = [0] * blocksize # A zero vector
    for ch in word + '.':
        ix = ctoi[ch]
        X.append(context)
        Y.append(ix) # the solution in space. 
        print(
            ''.join(itoc[i] for i in context),
            '----->',
            itoc[ix]
        )
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)

emma
... -----> e
..e -----> m
.em -----> m
emm -----> a
mma -----> .


In [282]:
print(X)
print(X.shape)

tensor([[ 0,  0,  0],
        [ 0,  0,  5],
        [ 0,  5, 13],
        [ 5, 13, 13],
        [13, 13,  1]])
torch.Size([5, 3])


In [283]:
print(Y)
print(Y.shape)

tensor([ 5, 13, 13,  1,  0])
torch.Size([5])


In [245]:
print(f"{X.shape}: {X.dtype}", f"{Y.shape}: { Y.dtype}")

torch.Size([5, 3]): torch.int64 torch.Size([5]): torch.int64


In [258]:
C = torch.randn((27,2)) # embedding
# C
# C.shape

In [280]:
emb = C[X]
print(emb.shape)
emb
# torch.nn.functional.one_hot(torch.tensor(5),num_classes=27).float() @ C

torch.Size([5, 3, 2])


tensor([[[ 0.3766,  0.8133],
         [ 0.3766,  0.8133],
         [ 0.3766,  0.8133]],

        [[ 0.3766,  0.8133],
         [ 0.3766,  0.8133],
         [ 0.3327, -1.6047]],

        [[ 0.3766,  0.8133],
         [ 0.3327, -1.6047],
         [-0.0398, -0.5801]],

        [[ 0.3327, -1.6047],
         [-0.0398, -0.5801],
         [-0.0398, -0.5801]],

        [[-0.0398, -0.5801],
         [-0.0398, -0.5801],
         [ 0.2930, -0.2716]]])

In [290]:
# x = 3*2, because 3 wrds each for the 2d embedding
# y = any number of nuerons
W1 = torch.randn((6,100)) 

# biases
b1 = torch.randn(100)

# torch.cat([emb[:,0,:], emb[:,1,:], emb[:,2,:]],1) # wont scale

# scales better when inputs>3 or n inputs
# torch.cat(torch.unbind(emb,1),1)

# hidden layer 1
# (emb @ W1) + b1 # cant multiply 15x2 by 6x100
# hlyr1_v = emb.view(5,6) @ W1 + b1 # cast emb to 5,6 by
hlyr1_v = emb.view(-1,6) @ W1 + b1 # cast emb to 5,6 by

h = torch.tan(hlyr1_v)
print(h)
print(h.shape)

tensor([[ 2.1817e+00, -1.9138e+00,  5.5493e+00, -1.2683e+00,  8.6979e-02,
         -1.1546e+00, -3.2126e-02,  1.9304e+00,  9.8445e-01, -5.6624e+00,
         -4.6631e+00, -1.6535e+00,  1.2791e-01, -1.9631e-01, -1.3475e+01,
         -1.0500e+00, -2.4372e+00, -5.1074e-01,  2.2103e-01,  2.4169e-01,
          1.1571e+00,  6.3173e-01,  3.0266e+01,  4.1182e+00, -6.4970e-01,
         -3.9281e+00, -2.0625e+00, -1.1188e+00, -5.9488e-01,  4.1467e-01,
         -1.3135e+00, -1.1390e+00,  4.2340e-01,  6.5654e-01, -4.7994e+00,
         -2.7832e-02, -4.0230e-01,  9.5865e-01, -1.0010e+00,  4.3827e-02,
          2.9399e-01,  3.0620e+00,  2.0305e-01, -3.6356e-01,  6.0252e-01,
          3.4954e-01, -1.6969e+00, -3.8861e-01,  3.0887e+00,  2.4025e+00,
          2.0700e+00,  5.8174e-01,  5.4589e+00, -1.1988e+01, -8.1238e+00,
          7.9179e-01,  1.6649e+00, -6.7288e+01,  3.0885e-01, -7.0528e-01,
         -4.6119e+01, -3.2396e-01,  7.0800e-02,  1.5717e+00, -8.8467e-01,
          2.2254e-01,  2.2337e+00, -5.