In [1]:
import autograd.numpy as np
import matplotlib.pyplot as plt
from autograd import grad, elementwise_grad, hessian, jacobian

import sys
sys.path.insert(0, "../../project_2/src")
from SGD import minibatch

In [29]:
# Inspired by mortens example code in week 43

def activation_function(z):
    return np.maximum(0, z)

def activation_out(z):
    return z

"""
network_shape = [inputs, w1, w2, ..., wL, outputs]

W = wwwwb
    wwwwb
"""
def initialize_params(network_shape):
    P = []
    for i in range(1, len(network_shape)):
        k = network_shape[i-1]
        j = network_shape[i]
        P.append(np.random.randn(j, k) * np.sqrt(2) / np.sqrt(k))
        P[i-1] = np.concatenate((P[i-1], np.zeros(j).reshape(-1,1)), axis=1)

    return P

def Network(t, P):
    # Assumes the input t to be a scalar, returns a 2d-row vector e.g. shape=[1,6]
    a = t.reshape(-1,1)
    for P_i in P:
        #a = np.concatenate((a, np.ones(np.size(a, 0)).reshape(-1, 1)), axis=1) 
        a = np.concatenate((a, np.ones((1,1))), axis=1) 
        z = np.matmul(a, np.transpose(P_i))
        a = activation_function(z)
    
    return activation_out(z) 

def Network_predict(t, P):
    # For predictions
    # Assumes the input t to be a 1d-array, 
    # returns a matrix where each row corresponds to a vector for a particular t
    a = t.reshape(-1,1)
    for P_i in P:
        a = np.concatenate((a, np.ones(np.size(a, 0)).reshape(-1, 1)), axis=1) 
        #a = np.concatenate((a, np.ones((1,1))), axis=1) 
        z = np.matmul(a, np.transpose(P_i))
        a = activation_function(z)
    
    return activation_out(z) 

def optimize(t, P, A, x_0, N_minibatches, learning_rate, n_epochs):
    # Assumes t is a 1d-array.
    assert N_minibatches <= np.size(t, 0)

    cost_func_grad = grad(costfunction, 1) # Check which grad-call is correct.
    
    for epoch in range(n_epochs):
        mb = minibatch(t, N_minibatches)
        for i in range(N_minibatches):
            t_mb = t[mb[i]]
            M = np.size(t_mb, 0)
            # compute gradients of weights
            cost_grad = cost_func_grad(t_mb, P, A, x_0)
            for l in range(len(P)):
                P[l] -= learning_rate * cost_grad[l]
    return P

def g_trial(t, P, x_0):
    #assumes the input t to be a scalar, x_0 is 1d-row-vector which broadcasts to e.g. (1,6)
    
    return np.exp(-t)*x_0 + (1-np.exp(-t))*Network(t,P)

def g_trial_predict(t, P, x_0):
    # For predictions
    #assumes the input t to be a 1d-array, broadcasts along the rows of Network_predict's output
    t = t.reshape(-1,1)
    return np.exp(-t)*x_0 + (1-np.exp(-t))*Network_predict(t,P)

def costfunction(t, P, A, x_0):
    
    cost = 0
    g_grad = jacobian(g_trial,0) # Check that this is the correct grad-call
    
    for time in t:
        d_dt = g_grad(time,P,x_0).reshape(-1,1) # should have shape (eigenvector_length,1)
        x_t = g_trial(time,P,x_0).reshape(-1,1) #check shape, should be (eigenvector_length,1)
        # Right hand side is: -x + [x.T @ x @ A + (1 - x.T @ A @ x) @ I] @ x where x is x_t is a column vector
        right_side = -x_t + np.matmul( # -x + [
                                np.matmul(np.transpose(x_t), np.matmul(A, x_t)) #x.T @ A @ x
                                + ( 1 - np.matmul(np.transpose(x_t),np.matmul(A, x_t))) # + (1-x.t @ A @ x)
                                * np.identity(np.size(x_t, 0)) # * I
                                , x_t) # ] @ x
        cost = cost + np.sum((d_dt - right_side)**2) / np.size(x_t) 
                                                              
        

    return cost / np.size(t)

In [48]:
t = np.linspace(0,1,20)
Q = np.random.randn(6,6)
A = (np.transpose(Q) + Q) /2 #standard trick for making symmetric real
x_0 = np.random.randn(6) #initial guess for eigenvector.

network_shape = [1, 10, np.size(x_0)] #output must match eigenvector length
P = initialize_params(network_shape)

In [49]:
P = optimize(t, P, A, x_0,10, 0.002, 10)
#y = Network(X[0, :].reshape(1, 2), P)

In [50]:
g_trial_predict(t,P,x_0)

array([[ 0.37038099, -0.03053606, -0.63589588, -0.1285855 ,  0.50985643,
        -0.46647811],
       [ 0.34960479, -0.0322334 , -0.60725409, -0.1236994 ,  0.48766743,
        -0.44635143],
       [ 0.32604887, -0.03221664, -0.57528048, -0.12132387,  0.46617283,
        -0.42410407],
       [ 0.29992456, -0.02936822, -0.53759712, -0.12114634,  0.44699049,
        -0.39792149],
       [ 0.27148759, -0.02430781, -0.49550677, -0.12301638,  0.4295234 ,
        -0.36877857],
       [ 0.24093133, -0.01720412, -0.44940244, -0.12679486,  0.41362508,
        -0.33692905],
       [ 0.2084465 , -0.00821474, -0.39963155, -0.13233796,  0.39919966,
        -0.30260806],
       [ 0.17420988,  0.00251351, -0.34651645, -0.13951155,  0.38615697,
        -0.26603379],
       [ 0.13838515,  0.01484392, -0.29035607, -0.14819066,  0.3744122 ,
        -0.22740852],
       [ 0.10112384,  0.02864922, -0.2314275 , -0.15825881,  0.36388561,
        -0.18691973],
       [ 0.06256612,  0.04381099, -0.16998739, -0.

In [78]:
eigval, eigvec = np.linalg.eigh(A)
print(eigval)
print(eigvec)

[-2.43535591 -0.86674269  0.54381493  0.64100287  1.23398329  2.28431894]
[[-0.39857791 -0.484146   -0.05187932  0.37540123  0.68051778  0.00403822]
 [-0.36878851  0.00343064  0.34339448 -0.4610389   0.07126209 -0.72693076]
 [-0.69474421 -0.14977827  0.25445235 -0.0401098  -0.47461927  0.45086439]
 [ 0.15702803  0.25173375  0.62447677 -0.29661133  0.47961362  0.45165648]
 [ 0.00801069  0.32019802  0.50963524  0.74607829 -0.13874704 -0.24859087]
 [-0.44467874  0.75977674 -0.4061323   0.01723845  0.23931955  0.04985641]]


In [87]:
x_t = x_0.reshape(-1,1)
dt = 1
print(x_0)
for i in range(100):
    x_t = x_t + (-x_t + np.matmul(
                                np.matmul(np.transpose(x_t), np.matmul(A, x_t))
                                + ( 1 - np.matmul(np.transpose(x_t),np.matmul(A, x_t)))
                                * np.identity(np.size(x_t, 0))
                                , x_t))*dt
    print(x_t)

[ 0.37038099 -0.03053606 -0.63589588 -0.1285855   0.50985643 -0.46647811]
[[ 0.23484955]
 [-0.09377635]
 [-0.589981  ]
 [-0.17414605]
 [ 0.34917553]
 [-0.45111171]]
[[ 0.45837711]
 [ 0.05322078]
 [-0.55854008]
 [-0.04586543]
 [ 0.59932735]
 [-0.3873309 ]]
[[ 0.24349344]
 [ 0.09501306]
 [-0.12918307]
 [ 0.05870027]
 [ 0.29514842]
 [-0.06643888]]
[[ 0.29039866]
 [ 0.16941991]
 [-0.01325048]
 [ 0.139833  ]
 [ 0.33248608]
 [ 0.03787219]]
[[0.38258108]
 [0.27833818]
 [0.12093788]
 [0.25284423]
 [0.41884625]
 [0.16498839]]
[[0.32711584]
 [0.21819489]
 [0.05373103]
 [0.19155687]
 [0.36500846]
 [0.09975837]]
[[0.38652989]
 [0.28458059]
 [0.13064349]
 [0.25964757]
 [0.42199713]
 [0.17372478]]
[[0.30159963]
 [0.19283574]
 [0.02860903]
 [0.16623613]
 [0.33943761]
 [0.07457   ]]
[[0.37230116]
 [0.27312923]
 [0.1233858 ]
 [0.24887546]
 [0.40680217]
 [0.16529344]]
[[0.30399011]
 [0.19925382]
 [0.04110854]
 [0.17363921]
 [0.34042692]
 [0.08536754]]
[[0.36271484]
 [0.26530249]
 [0.11821589]
 [0.241479

In [88]:
x_t.T @ A @ x_t

array([[4.50616948e-05]])

In [86]:
#from eigh
x_t = eigvec[:,-1].reshape(-1,1)
print(x_t)
x_t.T @ A @ x_t

[[ 0.00403822]
 [-0.72693076]
 [ 0.45086439]
 [ 0.45165648]
 [-0.24859087]
 [ 0.04985641]]


array([[2.28431894]])