# Test om koden er riktig implementert

Her er et forslag til testfunksjoner for å sjekke om koden er riktig implementert.
```assert variabel``` vil gi en feilmelding med mindre variabelen ```variabel = True```. For eksempel vil ```assert a == b``` gi en feilmelding med mindre ```a``` og ```b``` er like.

In [1]:
#For eksempel:
variable = True
assert variable, "You need to change 'variable' to True"

In [2]:
from layers import *
from neural_network import NeuralNetwork
from utils import onehot
import numpy as np

In [3]:
#We choose some arbitrary values for the dimensions
b = 6
n_max = 7
m = 8
n = 5

d = 10
k = 3
p = 20

#Create an arbitrary dataset
x = np.random.randint(0, m, (b,n))
y = np.random.randint(0, m, (b,n_max))

print(x)
print(y)

#initialize the layers
feed_forward = FeedForward(d,p)
attention = Attention(d,k) # Changed from k to n to n_max
embed_pos = EmbedPosition(n_max,m,d)
un_embed = LinearLayer(d,m)
softmax = Softmax()


#a manual forward pass
X = onehot(x, m)
z0 = embed_pos.forward(X)
z1 = feed_forward.forward(z0)
z2 = attention.forward(z1)
z3 = un_embed.forward(z2)
Z = softmax.forward(z3) 

z = np.argmax(Z,axis=1)
print(f"big z: {Z[0]}")
print(f"z: {z[0]}")

#check the shapes
assert X.shape == (b,m,n), f"X.shape={X.shape}, expected {(b,m,n)}"
assert z0.shape == (b,d,n), f"z0.shape={z0.shape}, expected {(b,d,n)}"
assert z1.shape == (b,d,n), f"z1.shape={z1.shape}, expected {(b,d,n)}"
assert z2.shape == (b,d,n), f"z2.shape={z2.shape}, expected {(b,d,n)}"
assert z3.shape == (b,m,n), f"z3.shape={z3.shape}, expected {(b,m,n)}"
assert Z.shape == (b,m,n), f"Z.shape={Z.shape}, expected {(b,m,n)}"

#is X one-hot?
assert X.sum() == b*n, f"X.sum()={X.sum()}, expected {b*n}"


assert np.allclose(Z.sum(axis=1), 1), f"Z.sum(axis=1)={Z.sum(axis=1)}, expected {np.ones(b)}"
assert np.abs(Z.sum() - b*n) < 1e-5, f"Z.sum()={Z.sum()}, expected {b*n}"
assert np.all(Z>=0), f"Z={Z}, expected all entries to be non-negative"


[[7 7 4 1 0]
 [7 4 0 5 4]
 [6 5 7 7 2]
 [6 1 4 4 2]
 [0 5 0 5 7]
 [4 2 4 4 7]]
[[5 6 1 7 1 3 3]
 [1 5 0 3 0 2 2]
 [7 6 1 5 7 7 7]
 [6 4 1 7 1 5 6]
 [1 6 1 5 2 1 6]
 [5 4 7 5 7 1 7]]
softmax input shape: (6, 5, 5)
softmax input shape: (6, 8, 5)
big z: [[0.12865772 0.12252745 0.12045068 0.11636366 0.12084251]
 [0.12210292 0.12562365 0.1340768  0.11965655 0.123948  ]
 [0.12112009 0.12457227 0.11981894 0.12056444 0.12256235]
 [0.13171911 0.12588557 0.13534263 0.13524617 0.13283516]
 [0.11719596 0.12186541 0.12735415 0.12607371 0.12617547]
 [0.12142782 0.12123568 0.13337525 0.13416313 0.13194341]
 [0.13034176 0.12857848 0.11661605 0.12756998 0.12230866]
 [0.12743462 0.12971149 0.11296551 0.12036236 0.11938443]]
z: [3 7 3 3 3]


In [4]:

#test the forward pass
x = np.random.randint(0, m, (b,n_max))
X = onehot(x, m)

#we test with a y that is shorter than the maximum length
#COMMENT 2P&1P: WTF!? Skal ikke disse alltid være like lange???????
n_y = n_max - 1
y = np.random.randint(0, m, (b,n_y))
print(y)

#initialize a neural network based on the layers above
network = NeuralNetwork([embed_pos, feed_forward, attention, un_embed, softmax])
#and a loss function
loss = CrossEntropy()

#do a forward pass
Z = network.forward(X[:,:,:-1])

#compute the loss
print(Z.shape)
print(y.shape)
L = loss.forward(Z, y)

#get the derivative of the loss wrt Z
grad_Z = loss.backward()
print(grad_Z.shape)

#and perform a backward pass
_ = network.backward(grad_Z)

#and and do a gradient descent step
_ = network.step_gd(0.01)

[[2 1 6 4 5 6]
 [7 2 2 7 0 7]
 [0 4 3 2 3 2]
 [6 3 1 0 2 0]
 [7 2 5 6 6 1]
 [2 6 7 1 0 7]]
softmax input shape: (6, 6, 6)
softmax input shape: (6, 8, 6)
(6, 8, 6)
(6, 6)
6
onehotshape: (6, 8, 6)
(6, 8, 6)
z_l:(6, 8, 6)
grad: (6, 8, 6)
P: (6, 8, 6)
Q: (6, 1, 6)
z_l:(6, 6, 6)
grad: (6, 6, 6)
P: (6, 6, 6)
Q: (6, 1, 6)
wpd shape: (10, 6)
grad shape: (6, 10, 6)


TypeError: isinstance expected 2 arguments, got 5

In [None]:
"""
Here you may add additional tests to for example:

- Check if the ['d'] keys in the parameter dictionaries are not None, or receive something when running backward pass
- Check if the parameters change when you perform a gradient descent step
- Check if the loss decreases when you perform a gradient descent step

This is voluntary, but could be useful.
"""

In [None]:
#check if loss is non-negative
assert L >= 0, f"L={L}, expected L>=0"
assert grad_Z.shape == Z.shape, f"grad_Z.shape={grad_Z.shape}, expected {Z.shape}"

#check if onehot(y) gives zero loss
Y = onehot(y, m)
L = loss.forward(Y, y)
assert L < 1e-5, f"L={L}, expected L<1e-5"


In [None]:
m = np.array([[[.29,.15,.10],
     [.21,.5,.04],
     [.15,.03,.11],
     [.13,.21,.43],
     [.22,.11,.32]],
     [
     [1,0,0],
     [0,1,0],
     [0,0,0],
     [0,0,0],
     [0,0,1]]])



loss = CrossEntropy()

loss.forward(m,np.array([[0,1,4],[0,1,4]]))
loss.backward()

array([[[-1.14942525, -0.        , -0.        ],
        [-0.        , -0.66666665, -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -1.04166663]],

       [[-0.33333333, -0.        , -0.        ],
        [-0.        , -0.33333333, -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -0.        ],
        [-0.        , -0.        , -0.33333333]]])

In [None]:
x_T = np.array([[[0,0,1],
      [0,0,0]]
     ,[[0,0,0],
       [1,0,0]]])

a = np.ones((3,2))
b = a.T

s = np.einsum('bad,ds,sq,bqk -> bak',x_T,a, b, np.transpose(x_T, axes=(0,2,1)), optimize=True)
for i in range(len(x_T)):
    print(s == (x_T[i]@a)@(b@x_T[i].T))

print(s)


[[[ True  True]
  [ True  True]]

 [[False  True]
  [ True False]]]
[[[False  True]
  [ True False]]

 [[ True  True]
  [ True  True]]]
[[[2. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 2.]]]
