In [1]:
import autograd.numpy as np
import matplotlib.pyplot as plt
from autograd import grad, elementwise_grad, hessian, jacobian

import sys
sys.path.insert(0, "../../project_2/src")
from SGD import minibatch

In [2]:
# Inspired by mortens example code in week 43

def activation_function(z):
    return np.maximum(0, z)

def activation_out(z):
    return z

"""
network_shape = [inputs, w1, w2, ..., wL, outputs]

W = wwwwb
    wwwwb
"""
def initialize_params(network_shape):
    P = []
    for i in range(1, len(network_shape)):
        k = network_shape[i-1]
        j = network_shape[i]
        P.append(np.random.randn(j, k) * np.sqrt(2) / np.sqrt(k))
        P[i-1] = np.concatenate((P[i-1], np.zeros(j).reshape(-1,1)), axis=1)

    return P

def Network(x, P):
    # Assume always one point input at a time. Note the [0][0] is to ensure the output to
    # the jacobian and hessians is scalar. Have a separate function for predictions after training
    a = x.reshape(1,-1)
    for P_i in P:
        #a = np.concatenate((a, np.ones(np.size(a, 0)).reshape(-1, 1)), axis=1) 
        a = np.concatenate((a, np.ones((1,1))), axis=1) 
        z = np.matmul(a, np.transpose(P_i))
        a = activation_function(z)
    
    return activation_out(z)[0][0]

def Network_predict(x, P):
    #This is for predictions after training, where we can pass the whole x-matrix in one go.
    a = x
    for P_i in P:
        a = np.concatenate((a, np.ones(np.size(a, 0)).reshape(-1, 1)), axis=1) 
        z = np.matmul(a, np.transpose(P_i))
        a = activation_function(z)
    
    return activation_out(z)

def optimize(X, P, N_minibatches, learning_rate, n_epochs):
    
    assert N_minibatches <= np.size(X, 0)

    cost_func_grad = grad(costfunction, 1)
    
    for epoch in range(n_epochs):
        mb = minibatch(X, N_minibatches)
        for i in range(N_minibatches):
            X_mb = X[mb[i]]
            M = np.size(X_mb, 0)
            # compute gradients of weights
            cost_grad = cost_func_grad(X_mb, P)
            for l in range(len(P)):
                P[l] -= learning_rate * cost_grad[l]
    return P

def g_trial(x, P):
    return (1 - x[1]) * np.sin(np.pi * x[0]) + x[0] * (1 - x[0]) * x[1] * Network(x, P)

def g_trial_predict(x, P):
    return (1 - x[:,1]) * np.sin(np.pi * x[:,0]) + x[:,0] * (1 - x[:,0]) * x[:,1] * Network_predict(x, P).reshape(-1)

def costfunction(x, P):
    cost = 0
    u_hess = hessian(g_trial)
    u_jacob = jacobian(g_trial)
    
    for point in x:
        dudx2 = u_hess(point, P)[0,0]
        dudt = u_jacob(point, P)[1]
        cost += (dudt - dudx2)**2

    return cost / np.size(x, 0)

In [3]:
x = np.linspace(0,1,100)
t = np.linspace(0,1,100)
x, t = np.meshgrid(x, t) 
x = x.flatten()
t = t.flatten()
X = np.concatenate((x.reshape(-1,1), t.reshape(-1, 1)), axis=1)

network_shape = [np.size(X, 1), 50, 1]
P = initialize_params(network_shape)


#y = Network(X[0, :].reshape(1, 2), P)

In [4]:
P = optimize(X, P, int(np.size(X, 0) / 32), 0.002, 10)

KeyboardInterrupt: 

In [None]:
x = np.linspace(0,1,100)
t = np.linspace(0,1,100)
x_plot, t_plot = np.meshgrid(x, t)

Z = g_trial_predict(X,P)
Z = Z.reshape(100,100)

fig, ax = plt.subplots(figsize=(4, 3))
cf = ax.pcolormesh(x_plot, t_plot, Z, cmap=plt.get_cmap("inferno"))
fig.colorbar(cf, ax=ax)
ax.set_xlabel("$x$")
ax.set_ylabel("$t$")

In [None]:
# network_shape = [inputs, w1, w2, ..., wL, outputs]
def initialize_params(network_shape):
    P = []
    for i in range(1, len(network_shape)):
        k = network_shape[i-1]
        j = network_shape[i]
        print(i, "->", f"k={k}", f"j={j}")
        P.append(np.random.randn(j, k) * np.sqrt(2) / np.sqrt(k))
        P[i-1] = np.concatenate((P[i-1], np.zeros(j).reshape(-1,1)), axis=1)
    
    return P

P = initialize_params([2, 3, 1])
print(P[0])
print(P[1])

#X_i = [x, t, 1]

In [None]:
k = 2
j = 4

P = np.zeros([j,k])
print(P)
b = np.ones(j).reshape(-1, 1)
print(b)
P = np.concatenate((P, b), axis=1)
print(P)

In [1]:
import autograd.numpy as np
from autograd import jacobian,hessian,grad
import autograd.numpy.random as npr
from matplotlib import cm
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import axes3d

## Set up the network

def sigmoid(z):
    return 1/(1 + np.exp(-z))

def deep_neural_network(deep_params, x):
    # x is now a point and a 1D numpy array; make it a column vector
    num_coordinates = np.size(x,0)
    x = x.reshape(num_coordinates,-1)

    num_points = np.size(x,1)

    # N_hidden is the number of hidden layers
    N_hidden = np.size(deep_params) - 1 # -1 since params consist of parameters to all the hidden layers AND the output layer

    # Assume that the input layer does nothing to the input x
    x_input = x
    x_prev = x_input

    ## Hidden layers:

    for l in range(N_hidden):
        # From the list of parameters P; find the correct weigths and bias for this layer
        w_hidden = deep_params[l]

        # Add a row of ones to include bias
        x_prev = np.concatenate((np.ones((1,num_points)), x_prev ), axis = 0)

        z_hidden = np.matmul(w_hidden, x_prev)
        x_hidden = sigmoid(z_hidden)

        # Update x_prev such that next layer can use the output from this layer
        x_prev = x_hidden

    ## Output layer:

    # Get the weights and bias for this layer
    w_output = deep_params[-1]

    # Include bias:
    x_prev = np.concatenate((np.ones((1,num_points)), x_prev), axis = 0)

    z_output = np.matmul(w_output, x_prev)
    x_output = z_output

    return x_output[0][0]

## Define the trial solution and cost function
def u(x):
    return np.sin(np.pi*x)

def g_trial(point,P):
    x,t = point
    return (1-t)*u(x) + x*(1-x)*t*deep_neural_network(P,point)

# The right side of the ODE:
def f(point):
    return 0.

# The cost function:
def cost_function(P, x, t):
    cost_sum = 0

    g_t_jacobian_func = jacobian(g_trial)
    g_t_hessian_func = hessian(g_trial)

    for x_ in x:
        for t_ in t:
            point = np.array([x_,t_])

            g_t = g_trial(point,P)
            g_t_jacobian = g_t_jacobian_func(point,P)
            g_t_hessian = g_t_hessian_func(point,P)

            g_t_dt = g_t_jacobian[1]
            g_t_d2x = g_t_hessian[0][0]

            func = f(point)

            err_sqr = ( (g_t_dt - g_t_d2x) - func)**2
            cost_sum += err_sqr

    return cost_sum /( np.size(x)*np.size(t) )

## For comparison, define the analytical solution
def g_analytic(point):
    x,t = point
    return np.exp(-np.pi**2*t)*np.sin(np.pi*x)

## Set up a function for training the network to solve for the equation
def solve_pde_deep_neural_network(x,t, num_neurons, num_iter, lmb):
    ## Set up initial weigths and biases
    N_hidden = np.size(num_neurons)

    ## Set up initial weigths and biases

    # Initialize the list of parameters:
    P = [None]*(N_hidden + 1) # + 1 to include the output layer

    P[0] = npr.randn(num_neurons[0], 2 + 1 ) # 2 since we have two points, +1 to include bias
    for l in range(1,N_hidden):
        P[l] = npr.randn(num_neurons[l], num_neurons[l-1] + 1) # +1 to include bias

    # For the output layer
    P[-1] = npr.randn(1, num_neurons[-1] + 1 ) # +1 since bias is included

    print('Initial cost: ',cost_function(P, x, t))

    cost_function_grad = grad(cost_function,0)

    # Let the update be done num_iter times
    for i in range(num_iter):
        cost_grad =  cost_function_grad(P, x , t)

        for l in range(N_hidden+1):
            P[l] = P[l] - lmb * cost_grad[l]

    print('Final cost: ',cost_function(P, x, t))

    return P

if __name__ == '__main__':
    ### Use the neural network:
    npr.seed(15)

    ## Decide the vales of arguments to the function to solve
    Nx = 100; Nt = 100
    x = np.linspace(0, 1, Nx)
    t = np.linspace(0,1,Nt)

    ## Set up the parameters for the network
    num_hidden_neurons = [100]
    num_iter = 10
    lmb = 0.01

    P = solve_pde_deep_neural_network(x,t, num_hidden_neurons, num_iter, lmb)

    ## Store the results
    g_dnn_ag = np.zeros((Nx, Nt))
    G_analytical = np.zeros((Nx, Nt))
    for i,x_ in enumerate(x):
        for j, t_ in enumerate(t):
            point = np.array([x_, t_])
            g_dnn_ag[i,j] = g_trial(point,P)

            G_analytical[i,j] = g_analytic(point)

    # Find the map difference between the analytical and the computed solution
    diff_ag = np.abs(g_dnn_ag - G_analytical)
    print('Max absolute difference between the analytical solution and the network: %g'%np.max(diff_ag))

    ## Plot the solutions in two dimensions, that being in position and time

    T,X = np.meshgrid(t,x)

    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')
    ax.set_title('Solution from the deep neural network w/ %d layer'%len(num_hidden_neurons))
    s = ax.plot_surface(T,X,g_dnn_ag,linewidth=0,antialiased=False,cmap=cm.viridis)
    ax.set_xlabel('Time $t$')
    ax.set_ylabel('Position $x$');


    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')
    ax.set_title('Analytical solution')
    s = ax.plot_surface(T,X,G_analytical,linewidth=0,antialiased=False,cmap=cm.viridis)
    ax.set_xlabel('Time $t$')
    ax.set_ylabel('Position $x$');

    fig = plt.figure(figsize=(10,10))
    ax = fig.gca(projection='3d')
    ax.set_title('Difference')
    s = ax.plot_surface(T,X,diff_ag,linewidth=0,antialiased=False,cmap=cm.viridis)
    ax.set_xlabel('Time $t$')
    ax.set_ylabel('Position $x$');

    ## Take some slices of the 3D plots just to see the solutions at particular times
    indx1 = 0
    indx2 = int(Nt/2)
    indx3 = Nt-1

    t1 = t[indx1]
    t2 = t[indx2]
    t3 = t[indx3]

    # Slice the results from the DNN
    res1 = g_dnn_ag[:,indx1]
    res2 = g_dnn_ag[:,indx2]
    res3 = g_dnn_ag[:,indx3]

    # Slice the analytical results
    res_analytical1 = G_analytical[:,indx1]
    res_analytical2 = G_analytical[:,indx2]
    res_analytical3 = G_analytical[:,indx3]

    # Plot the slices
    plt.figure(figsize=(10,10))
    plt.title("Computed solutions at time = %g"%t1)
    plt.plot(x, res1)
    plt.plot(x,res_analytical1)
    plt.legend(['dnn','analytical'])

    plt.figure(figsize=(10,10))
    plt.title("Computed solutions at time = %g"%t2)
    plt.plot(x, res2)
    plt.plot(x,res_analytical2)
    plt.legend(['dnn','analytical'])

    plt.figure(figsize=(10,10))
    plt.title("Computed solutions at time = %g"%t3)
    plt.plot(x, res3)
    plt.plot(x,res_analytical3)
    plt.legend(['dnn','analytical'])

    plt.show()

Initial cost:  26.212887827901884
Final cost:  8.886877320097636
Max absolute difference between the analytical solution and the network: 0.576223


ImportError: cannot import name '_png' from 'matplotlib' (/home/nick/anaconda3/lib/python3.8/site-packages/matplotlib/__init__.py)

<Figure size 720x720 with 1 Axes>

ImportError: cannot import name '_png' from 'matplotlib' (/home/nick/anaconda3/lib/python3.8/site-packages/matplotlib/__init__.py)

<Figure size 720x720 with 1 Axes>

ImportError: cannot import name '_png' from 'matplotlib' (/home/nick/anaconda3/lib/python3.8/site-packages/matplotlib/__init__.py)

<Figure size 720x720 with 1 Axes>

ImportError: cannot import name '_png' from 'matplotlib' (/home/nick/anaconda3/lib/python3.8/site-packages/matplotlib/__init__.py)

<Figure size 720x720 with 1 Axes>

ImportError: cannot import name '_png' from 'matplotlib' (/home/nick/anaconda3/lib/python3.8/site-packages/matplotlib/__init__.py)

<Figure size 720x720 with 1 Axes>

ImportError: cannot import name '_png' from 'matplotlib' (/home/nick/anaconda3/lib/python3.8/site-packages/matplotlib/__init__.py)

<Figure size 720x720 with 1 Axes>