In [1]:
import autograd.numpy as np
import matplotlib.pyplot
from autograd import grad, elementwise_grad, hessian, jacobian

import sys
sys.path.insert(0, "../../project_2/src")
from SGD import minibatch

In [2]:
# Inspired by mortens example code in week 43

def activation_function(z):
    return np.maximum(0, z)

def activation_out(z):
    return z

"""
network_shape = [inputs, w1, w2, ..., wL, outputs]

W = wwwwb
    wwwwb
"""
def initialize_params(network_shape):
    P = []
    for i in range(1, len(network_shape)):
        k = network_shape[i-1]
        j = network_shape[i]
        P.append(np.random.randn(j, k) * np.sqrt(2) / np.sqrt(k))
        P[i-1] = np.concatenate((P[i-1], np.zeros(j).reshape(-1,1)), axis=1)

    return P

def Network(x, P):
    # Assume always one point input at a time. Note the [0][0] is to ensure the output to
    # the jacobian and hessians is scalar. Have a separate function for predictions after training
    a = x.reshape(1,-1)
    for P_i in P:
        #a = np.concatenate((a, np.ones(np.size(a, 0)).reshape(-1, 1)), axis=1) 
        a = np.concatenate((a, np.ones((1,1))), axis=1) 
        z = np.matmul(a, np.transpose(P_i))
        a = activation_function(z)
    
    return activation_out(z)[0][0]

def Network_predict(x, P):
    #This is for predictions after training, where we can pass the whole x-matrix in one go.
    a = x
    for P_i in P:
        a = np.concatenate((a, np.ones(np.size(a, 0)).reshape(-1, 1)), axis=1) 
        z = np.matmul(a, np.transpose(P_i))
        a = activation_function(z)
    
    return activation_out(z)

def optimize(X, P, N_minibatches, learning_rate, n_epochs):
    
    assert N_minibatches <= np.size(X, 0)

    cost_func_grad = grad(costfunction, 1)
    
    for epoch in range(n_epochs):
        mb = minibatch(X, N_minibatches)
        for i in range(N_minibatches):
            X_mb = X[mb[i]]
            M = np.size(X_mb, 0)
            # compute gradients of weights
            cost_grad = cost_func_grad(X_mb, P)
            
            for l in range(len(P)):
                P[l] -= learning_rate * cost_grad[l]
    return P

def g_trial(x, P):
    return (1 - x[1]) * np.sin(np.pi * x[0]) + x[0] * (1 - x[0]) * x[1] * Network(x, P)

def g_trial_predict(x, P):
    return (1 - x[1]) * np.sin(np.pi * x[0]) + x[0] * (1 - x[0]) * x[1] * Network_predict(x, P)

def costfunction(x, P):
    cost = 0
    u_hess = hessian(g_trial)
    u_jacob = jacobian(g_trial)
    
    for point in x:
        dudx2 = u_hess(point, P)[0,0]
        dudt = u_jacob(point, P)[1]
        cost += (dudt - dudx2)**2

    return cost / np.size(x, 0)

In [3]:
x = np.linspace(0,1,10)
t = np.linspace(0,1,10)
x, t = np.meshgrid(x, t) 
x = x.flatten()
t = t.flatten()
X = np.concatenate((x.reshape(-1,1), t.reshape(-1, 1)), axis=1)

network_shape = [np.size(X, 1), 10, 1]
P = initialize_params(network_shape)

In [4]:
P = optimize(X, P, 10, 0.002, 10)
#y = Network(X[0, :].reshape(1, 2), P)

In [10]:
Network_predict(X,P)

array([[-0.15754312],
       [-0.17524231],
       [-0.19143896],
       [-0.20209225],
       [-0.19808909],
       [-0.19408593],
       [-0.19008277],
       [-0.18607961],
       [-0.18207645],
       [-0.17807329],
       [-0.19856022],
       [-0.21967623],
       [-0.23756207],
       [-0.25333206],
       [-0.26911943],
       [-0.2849068 ],
       [-0.30069417],
       [-0.31648154],
       [-0.3322689 ],
       [-0.34805627],
       [-0.24336743],
       [-0.26151958],
       [-0.28263559],
       [-0.3010364 ],
       [-0.31697383],
       [-0.33271659],
       [-0.34850396],
       [-0.36429133],
       [-0.3800787 ],
       [-0.39586607],
       [-0.29026999],
       [-0.30336292],
       [-0.32447893],
       [-0.34559495],
       [-0.36378445],
       [-0.38067306],
       [-0.3963856 ],
       [-0.41210113],
       [-0.42788849],
       [-0.44367586],
       [-0.33717255],
       [-0.34520626],
       [-0.36632227],
       [-0.38743829],
       [-0.4085543 ],
       [-0

In [128]:
# network_shape = [inputs, w1, w2, ..., wL, outputs]
def initialize_params(network_shape):
    P = []
    for i in range(1, len(network_shape)):
        k = network_shape[i-1]
        j = network_shape[i]
        print(i, "->", f"k={k}", f"j={j}")
        P.append(np.random.randn(j, k) * np.sqrt(2) / np.sqrt(k))
        P[i-1] = np.concatenate((P[i-1], np.zeros(j).reshape(-1,1)), axis=1)
    
    return P

P = initialize_params([2, 3, 1])
print(P[0])
print(P[1])

#X_i = [x, t, 1]

1 -> k=2 j=3
2 -> k=3 j=1
[[ 1.53955784 -0.65574862  0.        ]
 [-0.25409971 -1.06275851  0.        ]
 [-1.2672916  -0.99436508  0.        ]]
[[-1.03736743 -0.64167878  0.18840454  0.        ]]


In [117]:
k = 2
j = 4

P = np.zeros([j,k])
print(P)
b = np.ones(j).reshape(-1, 1)
print(b)
P = np.concatenate((P, b), axis=1)
print(P)

[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]
[[1.]
 [1.]
 [1.]
 [1.]]
[[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]
