# Test om koden er riktig implementert

Her er et forslag til testfunksjoner for å sjekke om koden er riktig implementert.
```assert variabel``` vil gi en feilmelding med mindre variabelen ```variabel = True```. For eksempel vil ```assert a == b``` gi en feilmelding med mindre ```a``` og ```b``` er like.

In [1]:
#For eksempel:
variable = True
assert variable, "You need to change 'variable' to True"

In [2]:
from layers import *
from neural_network import NeuralNetwork
from utils import onehot
import numpy as np

In [3]:
#We choose some arbitrary values for the dimensions
b = 6
n_max = 7
m = 8
n = 5

d = 10
k = 5
p = 20

#Create an arbitrary dataset
x = np.random.randint(0, m, (b,n))
y = np.random.randint(0, m, (b,n_max))

print(x)
print(y)

#initialize the layers
feed_forward = FeedForward(d,p)
attention = Attention(d,k) # Changed from k to n to n_max
embed_pos = EmbedPosition(n_max,m,d)
un_embed = LinearLayer(d,m)
softmax = Softmax()


#a manual forward pass
X = onehot(x, m)
z0 = embed_pos.forward(X)
z1 = feed_forward.forward(z0)
z2 = attention.forward(z1)
z3 = un_embed.forward(z2)
Z = softmax.forward(z3) 

z = np.argmax(Z,axis=1)
print(f"big z: {Z[0]}")
print(f"z: {z[0]}")

#check the shapes
assert X.shape == (b,m,n), f"X.shape={X.shape}, expected {(b,m,n)}"
assert z0.shape == (b,d,n), f"z0.shape={z0.shape}, expected {(b,d,n)}"
assert z1.shape == (b,d,n), f"z1.shape={z1.shape}, expected {(b,d,n)}"
assert z2.shape == (b,d,n), f"z2.shape={z2.shape}, expected {(b,d,n)}"
assert z3.shape == (b,m,n), f"z3.shape={z3.shape}, expected {(b,m,n)}"
assert Z.shape == (b,m,n), f"Z.shape={Z.shape}, expected {(b,m,n)}"

#is X one-hot?
assert X.sum() == b*n, f"X.sum()={X.sum()}, expected {b*n}"


assert np.allclose(Z.sum(axis=1), 1), f"Z.sum(axis=1)={Z.sum(axis=1)}, expected {np.ones(b)}"
assert np.abs(Z.sum() - b*n) < 1e-5, f"Z.sum()={Z.sum()}, expected {b*n}"
assert np.all(Z>=0), f"Z={Z}, expected all entries to be non-negative"


[[4 2 1 4 5]
 [3 7 4 4 2]
 [7 7 5 3 4]
 [2 7 6 0 2]
 [7 4 3 4 4]
 [1 7 4 3 5]]
[[7 1 7 4 7 5 7]
 [5 3 6 6 1 6 3]
 [4 4 2 7 4 3 5]
 [0 3 3 1 2 7 4]
 [7 5 6 2 7 3 5]
 [5 5 0 0 3 5 1]]
w_Q
w_K
w_O
w_V
Shape of A: (6, 5, 5)
big z: [[0.12051205 0.12750496 0.12535268 0.12944061 0.12079102]
 [0.13318627 0.13432582 0.12533915 0.12686629 0.12486061]
 [0.12711141 0.12725518 0.12986862 0.12104472 0.1285896 ]
 [0.11478912 0.1230089  0.11762892 0.11784457 0.12262771]
 [0.12857727 0.12437798 0.12698345 0.12558873 0.13415843]
 [0.13114463 0.11927612 0.11452461 0.13086574 0.12365433]
 [0.1215084  0.1243745  0.13066867 0.12625427 0.12318838]
 [0.12317084 0.11987654 0.1296339  0.12209507 0.12212991]]
z: [1 1 6 5 4]


In [4]:

#test the forward pass
x = np.random.randint(0, m, (b,n_max))
X = onehot(x, m)

#we test with a y that is shorter than the maximum length
#COMMENT 2P&1P: WTF!? Skal ikke disse alltid være like lange???????
n_y = n_max - 1
y = np.random.randint(0, m, (b,n_y))
print(y)

#initialize a neural network based on the layers above
network = NeuralNetwork([embed_pos, feed_forward, attention, un_embed, softmax])
#and a loss function
loss = CrossEntropy()

#do a forward pass
Z = network.forward(X)

#compute the loss
print(Z.shape)
print(y.shape)
L = loss.forward(Z, y)

#get the derivative of the loss wrt Z
grad_Z = loss.backward()
print(f"Grad_z: {grad_Z.shape}")

#and perform a backward pass
_ = network.backward(grad_Z)

#and and do a gradient descent step
_ = network.step_gd(0.01)

[[7 0 4 7 5 5]
 [0 6 1 3 3 1]
 [4 0 4 0 1 0]
 [6 5 6 4 3 4]
 [1 6 2 1 6 7]
 [0 5 5 5 6 6]]
w_Q
w_K
w_O
w_V
Shape of A: (6, 7, 7)
(6, 8, 7)
(6, 6)
onehotshape: (6, 8, 6)
x shape: (6, 8, 7)
[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
Grad_z: (6, 8, 7)
z_l:(6, 8, 7)
grad: (6, 8, 7)
P: (6, 8, 7)
Q: (6, 1, 7)
g_OV: (6, 10, 7)
z_l:(6, 7, 7)
grad: (6, 7, 7)
P: (6, 7, 7)
Q: (6, 1, 7)
g_S: (6, 7, 7)
wpd shape: (10, 7)
grad shape: (6, 10, 7)


In [5]:
"""
Here you may add additional tests to for example:

- Check if the ['d'] keys in the parameter dictionaries are not None, or receive something when running backward pass
- Check if the parameters change when you perform a gradient descent step
- Check if the loss decreases when you perform a gradient descent step

This is voluntary, but could be useful.
"""

"\nHere you may add additional tests to for example:\n\n- Check if the ['d'] keys in the parameter dictionaries are not None, or receive something when running backward pass\n- Check if the parameters change when you perform a gradient descent step\n- Check if the loss decreases when you perform a gradient descent step\n\nThis is voluntary, but could be useful.\n"

In [6]:
#check if loss is non-negative
assert L >= 0, f"L={L}, expected L>=0"
assert grad_Z.shape == Z.shape, f"grad_Z.shape={grad_Z.shape}, expected {Z.shape}"

#check if onehot(y) gives zero loss
Y = onehot(y, m)
L = loss.forward(Y, y)
assert L < 1e-5, f"L={L}, expected L<1e-5"


onehotshape: (6, 8, 6)
x shape: (6, 8, 6)


In [7]:
m = np.array([[[.29,.15,.10],
     [.21,.5,.04],
     [.15,.03,.11],
     [.13,.21,.43],
     [.22,.11,.32]],
     [
     [1,0,0],
     [0,1,0],
     [0,0,0],
     [0,0,0],
     [0,0,1]]])



loss = CrossEntropy()

loss.forward(m,np.array([[0,1,4],[0,1,4]]))
loss.backward()

onehotshape: (2, 5, 3)
x shape: (2, 5, 3)
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]]


array([[[-1.14942525, -0.        , -0.        ],
        [-0.        , -0.66666665, -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -1.04166663]],

       [[-0.33333333, -0.        , -0.        ],
        [-0.        , -0.33333333, -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -0.33333333]]])

In [8]:
x_T = np.array([[[0,0,1],
      [0,0,0]]
     ,[[0,0,0],
       [1,0,0]]])

a = np.ones((3,2))
b = a.T

s = np.einsum('bad,ds,sq,bqk -> bak',x_T,a, b, np.transpose(x_T, axes=(0,2,1)), optimize=True)
for i in range(len(x_T)):
    print(s == (x_T[i]@a)@(b@x_T[i].T))

print(s)


[[[ True  True]
  [ True  True]]

 [[False  True]
  [ True False]]]
[[[False  True]
  [ True False]]

 [[ True  True]
  [ True  True]]]
[[[2. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 2.]]]
