# Introducing automatic optimization - deep learning framework

In [3]:
import sys
sys.path.append('../')
import numpy as np # noqa: E402
from dl_framework.tensor import Tensor # noqa: E402
from dl_framework.optimisers import SGD # noqa: E402
from dl_framework.layers import Linear, Sequential, Tanh, Sigmoid, Embedding, RNNCell # noqa: E402
from dl_framework.loss import MSE, CrossEntropy # noqa: E402

## Creating tensors

In [4]:
a = Tensor([1,2,3,4,5], autograd=True)
b = Tensor([2,2,2,2,2], autograd=True)
c = Tensor([5,4,3,2,1], autograd=True)

d = a + (-b)
e = (-b) + c
f = d + e

f.backward(Tensor(np.array([1,1,1,1,1])))

b.grad.data == np.array([-2,-2,-2,-2,-2])

array([ True,  True,  True,  True,  True])

## Using tensors

In [3]:
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

w = [
    Tensor(np.random.randn(2,3), autograd=True),
    Tensor(np.random.randn(3,1), autograd=True)
]

for i in range(10):
    # predict: 
    pred = data.mm(w[0]).mm(w[1])
    # compare:
    loss = ((pred - target)*(pred - target)).sum(0)
    # learn:
    
    loss.backward(Tensor(np.ones_like(loss.data)))
    for w_ in w:
        w_.data -= w_.grad.data * 0.1
        w_.grad.data *= 0
        
    print(loss)

[22.63845013]
[118.90031279]
[3149.2825024]
[2.32355657e+08]
[1.79813859e+23]
[8.37034314e+67]
[8.44466488e+201]
[inf]
[nan]
[nan]


  return Tensor(self.data * other.data, autograd=True, creators=[self, other], creation_op="mul")
  w_.grad.data *= 0


## Adding automacic optimization

In [4]:
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

w = [
    Tensor(np.random.randn(2,3), autograd=True),
    Tensor(np.random.randn(3,1), autograd=True)
]

optim = SGD(parameters=w, alpha=0.1)

for i in range(10):
    # predict: 
    pred = data.mm(w[0]).mm(w[1])
    # compare:
    loss = ((pred - target)*(pred - target)).sum(0)
    # learn:
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
        
    print(loss)

[21.01333338]
[5.14996252]
[0.07354472]
[0.00206634]
[0.00057971]
[0.00021449]
[7.91933347e-05]
[2.91593458e-05]
[1.07185403e-05]
[3.93592422e-06]


## Adding layers

In [65]:
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([
    Linear(2,3),
    Tanh(),
    Linear(3,1),
    Sigmoid(),
])

loss_func = MSE()
optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    # predict: 
    pred = model.forward(data)
    # compare:
    loss = loss_func.forward(pred, target)
    # learn:
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
        
    print(loss)

[0.60298668]
[0.4046423]
[0.25937405]
[0.16806312]
[0.11504623]
[0.08385281]
[0.06443996]
[0.05159964]
[0.04264263]
[0.03611468]


## Embedding

In [81]:
data = Tensor(np.array([1,2,1,2]), autograd=True)
target = Tensor(np.array([0,1,0,1]), autograd=True)

model = Sequential([
    Embedding(3,3),
    Tanh(),
    Linear(3,4),
])

loss_func = CrossEntropy()
optim = SGD(parameters=model.get_parameters(), alpha=0.1)

for i in range(10):
    # predict: 
    pred = model.forward(data)
    # compare:
    loss = loss_func.forward(pred, target)
    # learn:
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
        
    print(loss)

1.3476557905994486
0.94522014909927
0.7137810427562177
0.5648645150998857
0.45972076269814255
0.3814834100264345
0.32156384376170194
0.2748283464255008
0.23785275500108136
0.20821862904375293


## RNN

In [7]:
f = open('../data/tasksv11/en/qa1_single-supporting-fact_train.txt','r')
raw = f.readlines()
f.close()

tokens = list()
for line in raw[0:1000]:
    tokens.append(line.lower().replace("\n","").split(" ")[1:])

new_tokens = list()
for line in tokens:
    new_tokens.append(['-'] * (6 - len(line)) + line)

tokens = new_tokens

vocab = set()
for sent in tokens:
    for word in sent:
        vocab.add(word)

vocab = list(vocab)

word2index = {}
for i,word in enumerate(vocab):
    word2index[word]=i
    
def words2indices(sentence):
    idx = list()
    for word in sentence:
        idx.append(word2index[word])
    return idx

indices = list()
for line in tokens:
    idx = list()
    for w in line:
        idx.append(word2index[w])
    indices.append(idx)

data = np.array(indices)

In [8]:
embed = Embedding(vocab_size=len(vocab),dim=16)
model = RNNCell(n_inputs=16, n_hidden=16, n_output=len(vocab))

criterion = CrossEntropy()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05)

for iter in range(1000):
    batch_size = 100
    total_loss = 0
    
    hidden = model.init_hidden(batch_size=batch_size)

    for t in range(5):
        input = Tensor(data[0:batch_size,t], autograd=True)
        rnn_input = embed.forward(input=input)
        output, hidden = model.forward(input=rnn_input, hidden=hidden)

    target = Tensor(data[0:batch_size,t+1], autograd=True)    
    loss = criterion.forward(output, target)
    loss.backward()
    optim.step()
    total_loss += loss.data
    if(iter % 200 == 0):
        p_correct = (target.data == np.argmax(output.data,axis=1)).mean()
        print("Loss:",total_loss / (len(data)/batch_size),"% Correct:",p_correct)


Loss: 0.516188684878311 % Correct: 0.0
Loss: 0.18015247092953957 % Correct: 0.21
Loss: 0.1615806150053133 % Correct: 0.31
Loss: 0.14675377673074025 % Correct: 0.36
Loss: 0.14176057097488015 % Correct: 0.36


In [13]:
batch_size = 1
hidden = model.init_hidden(batch_size=batch_size)
for t in range(5):
    input = Tensor(data[0:batch_size,t], autograd=True)
    rnn_input = embed.forward(input=input)
    output, hidden = model.forward(input=rnn_input, hidden=hidden)

target = Tensor(data[0:batch_size,t+1], autograd=True)    
loss = criterion.forward(output, target)

ctx = ""
for idx in data[0:batch_size][0][0:-1]:
    ctx += vocab[idx] + " "
print("Context:",ctx)
print("True:",vocab[target.data[0]])
print("Pred:", vocab[output.data.argmax()])

Context: - mary moved to the 
True: bathroom.
Pred: hallway.
