In [1]:
import math
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import trange
import matplotlib.pyplot as plt
from visualize import draw_dot
import requests
from nn import Embedding
from nn import Linear, Model
from optimize import Adam



In [3]:
from optimize import Momentum


l1 = Linear(1, 10, activation='Relu')
l2 = Linear(10, 1)
batch_size=1

model = Model([l1, l2])
optim = Momentum(model)

In [4]:
X = np.random.uniform(0, 10000000, 1000)
Y = 2*X

X_norm = (X - np.mean(X)) / np.std(X)
Y_norm = (Y - np.mean(Y)) / np.std(Y)

X_norm = X_norm.astype(np.float32)
Y_norm = Y_norm.astype(np.float32)

In [5]:
for _ in (t:=trange(1000)):
    ix = np.random.randint(0, X_norm.shape[0], (batch_size,)).flatten()

    out = model(X_norm[ix])
    
    optim.zero_grad()

    if (batch_size > 1):
        batch_loss = sum([((val-Y_norm[ix][i])**2)/batch_size for i, val in enumerate(out)])
        batch_loss.backward()
    
    else:
        
        loss = (out-Y_norm[ix][0])**2
        loss.backward()

    optim.step()

    t.set_description(f'loss: {loss.data}')

loss: 0.2827460467815399:   0%|          | 0/1000 [00:00<?, ?it/s] 

loss: 0.007062779273837805: 100%|██████████| 1000/1000 [00:01<00:00, 975.95it/s]  


In [208]:
import onnx
from star2onnx import export_to_onnx


onnx_model = export_to_onnx(model.input_shape, model.output_shape, model)
onnx.checker.check_model(onnx_model, True)
onnx.save(onnx_model, 'model.onnx')

In [209]:
import onnx2pytorch

# Load the ONNX model
onnx_model = onnx.load('model.onnx')

pytorch_model = onnx2pytorch.ConvertModel(onnx_model)

In [2]:
names = open('datasets/names.txt', 'r').read().splitlines()
random.shuffle(names)
content_len = 2
feature_count = 5
w_size = 20

chars = sorted(set('.'.join(names)))

itos = {i: c for i, c in enumerate(chars)}
stoi = {c: i for i, c in enumerate(chars)}

def build_dataset(names):
    X, Y = [], []
    context = [0] * content_len
    for name in names:
        name += '.'
        for c in name:
            X.append(context)
            Y.append(stoi[c])
            context = context[1:] + [stoi[c]]
    
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

In [3]:
from optimize import Momentum


Xtrain, Ytrain = build_dataset(names)

C = Embedding(27, feature_count)
l1 = Linear(content_len * feature_count, w_size, activation='Tanh')
l2 = Linear(w_size, 27)

model = Model([C, l1, l2])

optim = Adam(model=model)
batch_size = 16

In [212]:
for _ in (t:=trange(100)):   
    ix = np.random.randint(0, Xtrain.shape[0], (1,)).flatten()

    out_pytorch = pytorch_model(torch.tensor(Xtrain[ix]))
    torch_int = F.softmax(out_pytorch, dim=1).argmax(dim=1).item()

    out_star = model(Xtrain[ix])

    maxVal = max([num.data for num in out_star])
    exp = [(math.e**(num-maxVal)) for num in out_star]
    count = sum([num.data for num in exp])
    prob = [val.data/count for val in exp]
    star_int = prob.index(max(prob))

    if (torch_int != star_int):
        raise Exception(f'error! Not equal! torch: {torch_int}, star: {star_int}')

    t.set_description(f'torch: {itos[torch_int]}, star: {itos[star_int]}')

torch: a, star: a: 100%|██████████| 100/100 [00:01<00:00, 62.26it/s]


In [207]:
from value import Value


for _ in (t:=trange(100)):
    ix = np.random.randint(0, Xtrain.shape[0], (batch_size,))

    out = model(Xtrain[ix])

    if type(out[0]) == Value:
        out = [out]

    expected = Ytrain[ix]

    losses = []

    for i, o in enumerate(out):
        maxVal = max([num.data for num in o])

        exp = [(math.e**(num-maxVal)) for num in o]

        count = sum([num for num in exp])

        prob = [val/count for val in exp]

        loss = prob[int(Ytrain[ix][i])].log()*-1

        losses.append(loss)
    
    batch_loss = sum(losses)/batch_size

    optim.zero_grad()

    batch_loss.backward()

    optim.step()
    
    t.set_description(f'loss: {batch_loss.data}')

loss: 2.2498700618743896: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]


In [None]:
avg_loss = 0
countt = 0
random.shuffle(names)
for name in names[:10]:
    context = [0] * content_len
    for ch in name:

        out = model([context])

        maxVal = max([num.data for num in out])

        exp = [(2**(num-maxVal)) for num in out]

        count = sum([num.data for num in exp])

        prob = [val/count for val in exp]

        loss = prob[stoi[ch]].log()*-1

        context = context[1:] + [stoi[ch]]

        avg_loss += loss.data
        countt += 1

print(f'loss: {avg_loss/countt}')

loss: 2.842767604484911


In [None]:
for _ in range(10):
    name = ''
    context = [0] * content_len
    while True:
        out = model([context])
        maxVal = max([num.data for num in out])
        exp = [(math.e**(num-maxVal)) for num in out]
        count = sum([num.data for num in exp])
        prob = [val.data/count for val in exp]
        ix = np.random.choice(len(prob), p=prob)
        name += itos[ix]
        context = context[1:] + [ix]
        if name[-1] == '.':
            print(name)
            break

ianyaridine.
ersed.
jiyiya.
kimdaralays.
rianina.
ryam.
abuca.
emlaeseyaravian.
miaynon.
kilaemsaharesal.
