# Test of Network architecture in the 1D case

We test different numbers of hidden layers and nodes per layers and analyze which architecture leads to the best results. Here, only the one-dimensional heat equation is considered. We expect that the findings can be more or less used in the two-dimensional case as well.

In [1]:
import torch
import numpy as np
from heat_612 import heat_nn
import matplotlib.pyplot as plt
import time

We consider the one-dimensional heat equation
$$
\partial_t u - \kappa \Delta u = f
$$
where $\kappa = 0.1$ and $f(x) = \sin (\pi x)$.

In [2]:
### PDE parameters

# dimension
dim=1

# diffusion coefficient
kappa = 0.1

# initial condition
u_0 = lambda x: torch.sin(torch.pi * x) + torch.sin(4 * torch.pi * x)

# right hand side of the equation
rhs = lambda x, t: torch.sin(torch.pi * x)

# analytic solution
u_analytic = lambda x, t: (1 - 1 / (0.1 * torch.pi**2)) * torch.sin(torch.pi * x) * torch.exp(-torch.pi**2 * 0.1 * t
                    ) + torch.sin(4 * torch.pi * x) * torch.exp(- 16 * torch.pi**2 * 0.1 * t
                    ) + 1 / (0.1 * torch.pi**2) * torch.sin(torch.pi * x)

Test different numbers of layers and different sizes.

In [3]:
### create a list of different layer combinations
### output layer has always size 1

# we first demonstrate that an increasing layer depth is better than decreasing layer depth
"""
layers_list1 = [[32, 64, 1],
                [64, 64, 1],
                [32, 64, 64, 1],
                [64, 64, 32, 1],
                [64, 32, 1],
                [128, 64, 32, 1],
                [128, 64, 1],
                [128, 128, 64, 64, 32, 1],
                [128, 128, 64, 32, 1],
                [32, 64, 128, 128, 1]]
"""
# layers_list1 = [[128, 128, 1],
#                 [128, 128, 128, 1]]
layers_list = [[512, 1],
               [256, 256, 1],
               [128, 128, 128, 128, 1]]

for layers in layers_list:

    # use a random seed for comparability
    np.random.seed(238)
    torch.manual_seed(301)

    # measure computation time
    start = time.perf_counter()

    activations = [torch.tanh]*(len(layers)-1) + [None]
    pde_nn = heat_nn(layers, activations, dim, u_0, kappa, rhs)
    pde_nn.set_analytic_solution(u_analytic)
    N_colloc = 100
    pde_nn.set_data(N_colloc)

    # gives relatively good results (compared to other parameters, still bad though)
    pde_nn.train(lr=1e-2, weight_decay=0.0, epochs = 400, opt_time_scale =True, print_epochs=50)
    # LBFGS needs approximately 100 epochs, 30 iterations for kappa = 1
    # if kappa = 0.1, better choose more iterations, less epochs
    pde_nn.train_lbfgs(lr=1, opt_time_scale = True, epochs=10, max_iter=50)

    end = time.perf_counter()

    ### error measured in L^2 and L^{\infty} norm
    L_2_err = pde_nn.L_2_error()
    L_infty_err = pde_nn.L_infty_error()
    print("Layers:            ", layers)
    print("L^2 error:         ", L_2_err)
    print("L_^{infty}_error:  ", L_infty_err)
    print("Runtime:           ", end - start, "\n")

Epoch 0, Loss: 47.562794, MSE: 0.174730
Epoch 50, Loss: 40.943607, MSE: 0.255055
Epoch 100, Loss: 30.745449, MSE: 0.145521
Epoch 150, Loss: 11.181912, MSE: 0.033161
Epoch 200, Loss: 6.445726, MSE: 0.017188
Epoch 250, Loss: 3.838291, MSE: 0.010264
Epoch 300, Loss: 2.465052, MSE: 0.008530
Epoch 350, Loss: 1.746549, MSE: 0.010089
Epoch 399, Loss: 1.380572, MSE: 0.006300
Epoch 0, Loss: 0.994634, MSE: 0.000758
Epoch 1, Loss: 0.658590, MSE: 0.000276
Epoch 2, Loss: 0.464263, MSE: 0.000508
Epoch 3, Loss: 0.381068, MSE: 0.000501
Epoch 4, Loss: 0.350143, MSE: 0.000306
Epoch 5, Loss: 0.321022, MSE: 0.000254
Epoch 6, Loss: 0.285766, MSE: 0.000164
Epoch 7, Loss: 0.260816, MSE: 0.000134
Epoch 8, Loss: 0.241848, MSE: 0.000094
Epoch 9, Loss: 0.229247, MSE: 0.000085
Layers:             [512, 1]
L^2 error:          0.0041156937
L_^{infty}_error:   0.042084187
Runtime:            490.85416423599236 

Epoch 0, Loss: 47.448982, MSE: 0.337690
Epoch 50, Loss: 8.724582, MSE: 0.022453
Epoch 100, Loss: 4.913282