# Deep Ritz Method

## Introduction of Example 2

Using the Deep Ritz Method to solve the following PDE problem (Poisson Equation in High Dimension)

$$
\begin{align}
-\Delta u(x) &= 0, &x \in (0,1)^{10} \\
u(x) &= \sum_{k=1}^5x_{2k-1}x_{2k}, &x \in \partial(0,1)^{10}
\end{align}
$$

> Note:   
> This code is from [xdfeng7370](https://github.com/xdfeng7370/Deep-Ritz-Method), some contents have been modified.

## 1. Import modules and libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim, autograd
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

## 2. Define functions and Deep Ritz Network

In [2]:
class PowerReLU(nn.Module):
    """
    Implements simga(x)^(power)
    Applies a power of the rectified linear unit element-wise.

    NOTE: inplace may not be working.
    Can set inplace for inplace operation if desired.
    BUT I don't think it is working now.

    INPUT:
        x -- size (N, *) tensor where * is any number of additional dimensions
    OUTPUT:
        y -- size (N, *)
    """

    def __init__(self, inplace=False, power=3):
        super(PowerReLU, self).__init__()
        self.inplace = inplace
        self.power = power

    def forward(self, input):
        y = F.relu(input, inplace=self.inplace)
        return torch.pow(y, self.power)


class Block(nn.Module):
    """
    Implementation of the block used in the Deep Ritz Paper,
    it contains two fully-connected layers, activation function, and
    a residual connection.

    Parameters:
        in_N  -- dimension of the input
        width -- number of nodes in the interior middle layer
        out_N -- dimension of the output
        phi   -- activation function used
    """

    def __init__(self, in_N, width, out_N, phi=PowerReLU()):
        super(Block, self).__init__()
        # create the necessary linear layers
        self.L1 = nn.Linear(in_N, width)
        self.L2 = nn.Linear(width, out_N)
        # choose appropriate activation function
        self.phi = nn.ReLU()

    def forward(self, x):
        return self.phi(self.L2(self.phi(self.L1(x)))) + x


class drrnn(nn.Module):
    """
    drrnn -- Deep Ritz Residual Neural Network

    Implements a network with the architecture used in the deep ritz method paper

    Parameters:
        in_N  -- input dimension
        out_N -- output dimension
        m     -- width of layers that form blocks
        depth -- number of blocks to be stacked
        phi   -- the activation function
    """

    def __init__(self, in_N, m, out_N, depth=4, phi=PowerReLU()):
        super(drrnn, self).__init__()
        # set parameters
        self.in_N = in_N
        self.m = m
        self.out_N = out_N
        self.depth = depth
        self.phi = nn.ReLU()
        # list for holding all the blocks
        self.stack = nn.ModuleList()

        # add first layer to list
        self.stack.append(nn.Linear(in_N, m))

        # add middle blocks to list
        for i in range(depth):
            self.stack.append(Block(m, m, m))

        # add output linear layer
        self.stack.append(nn.Linear(m, out_N))

    def forward(self, x):
        # first layer
        for i in range(len(self.stack)):
            x = self.stack[i](x)
        return x

Then, we randomly sample training points

In [3]:
def get_interior_points(N=1000, d=10):
    """
    randomly sample N points from interior of [-1,1]^d
    """
    return torch.rand(N, d) * 2 - 1


def get_boundary_points(N=100):
    """
    randomly sample points from boundaries
    """
    xb = torch.rand(2 * 10 * N, 10)
    for i in range(10):
        xb[2 * i * N: (2 * i + 1) * N, i] = 0.
        xb[(2 * i + 1) * N: (2 * i + 2) * N, i] = 1.

    return xb


def u(x):
    u = 0
    for i in range(5):
        u += x[:,2*i:2*i+1] * x[:,2*i+1:2*i+2]

    return u

## 3. Train

In [4]:
def init_weights(m):
    if isinstance(m, (nn.Conv2d, nn.Linear)):
        nn.init.kaiming_normal_(m.weight) # Initialize the weight Tensor with values using kaiming initialization.
        nn.init.constant_(m.bias, 0.0) # Fill the bias Tensor with 0.0


def train():

    epochs = 20000

    in_N = 10
    m = 10
    out_N = 1

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    model = drrnn(in_N, m, out_N).to(device)
    model.apply(init_weights)

    criteon = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # x = torch.cat((xr, xb), dim=0)

    # if 2 < m:
    #     y = torch.zeros(x.shape[0], m - 2)
    #     x = torch.cat((x, y), dim=1)
    # # print(x.shape)
    best_loss, best_epoch = 1000, 0
    for epoch in range(epochs+1):

        # generate the data set
        xr = get_interior_points().to(device)
        xr.requires_grad_() # we need to calculate the derivatives of xr, so we set required_grad = True
        xb = get_boundary_points().to(device)

        output_r = model(xr) # u(xr)
        output_b = model(xb) # u(xb)
        grads = autograd.grad(outputs=output_r, inputs=xr,
                              grad_outputs=torch.ones_like(output_r),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]
        grads_sum = torch.sum(torch.pow(grads, 2), dim=1)
        u1 = 0.5 * grads_sum
        u1 = torch.mean(u1)
        u2 = torch.mean(torch.pow(output_b-u(xb), 2))
        loss = u1 + 20 * 500 * u2

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 100 == 0:
            print('epoch:', epoch, 'loss:', loss.item(), 'loss_r:', u1.item(), 'loss_b:', (20* 500 * u2).item())
            if epoch > int(4 * epochs / 5):
                if torch.abs(loss) < best_loss:
                    best_loss = loss.item()
                    best_epoch = epoch
                    torch.save(model.state_dict(), 'assets/weights/deep_ritz_example2_best.mdl')
    print('best epoch:', best_epoch, 'best loss:', best_loss)


In [5]:
train()

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


epoch: 0 loss: 1216833.5 loss_r: 166.364990234375 loss_b: 1216667.125
epoch: 100 loss: 9444.369140625 loss_r: 46.557254791259766 loss_b: 9397.8115234375
epoch: 200 loss: 5679.72998046875 loss_r: 31.830673217773438 loss_b: 5647.8994140625
epoch: 300 loss: 3653.927490234375 loss_r: 32.61579895019531 loss_b: 3621.311767578125
epoch: 400 loss: 2526.60302734375 loss_r: 26.98415184020996 loss_b: 2499.618896484375
epoch: 500 loss: 1786.626220703125 loss_r: 25.05324363708496 loss_b: 1761.572998046875
epoch: 600 loss: 1452.3428955078125 loss_r: 22.45516014099121 loss_b: 1429.8876953125
epoch: 700 loss: 1210.302490234375 loss_r: 23.71978187561035 loss_b: 1186.582763671875
epoch: 800 loss: 1097.178466796875 loss_r: 22.216724395751953 loss_b: 1074.9617919921875
epoch: 900 loss: 921.888916015625 loss_r: 24.267675399780273 loss_b: 897.6212158203125
epoch: 1000 loss: 858.8582153320312 loss_r: 20.839433670043945 loss_b: 838.018798828125
epoch: 1100 loss: 856.8363037109375 loss_r: 25.811494827270508 lo

## 4. Test

In [10]:
# plot figure
in_N = 10
m = 10
out_N = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = drrnn(in_N, m, out_N).to(device)

model.load_state_dict(torch.load('assets/weights/deep_ritz_example2_best.mdl'))
print('load weights from ckpt!')

with torch.no_grad():
    x = torch.rand(100000,10)
    x = x.to(device)
    u_exact = u(x)
    u_pred = model(x)
err_l2 = torch.sqrt(torch.mean(torch.pow(u_pred-u_exact,2))) / torch.sqrt(torch.mean(torch.pow(u_exact,2)))
print('L^2 relative error:', err_l2.item())


load weights from ckpt!
L^2 relative error: 0.026047656312584877


  model.load_state_dict(torch.load('assets/weights/deep_ritz_example2_best.mdl'))


## References

1. [The Deep Ritz Method: A Deep Learning-Based Numerical Algorithm for Solving Variational Problems](http://link.springer.com/10.1007/s40304-018-0127-z)
2. [Deep-Ritz-Method GitHub](https://github.com/xdfeng7370/Deep-Ritz-Method)
3. [DeepRitz GitHub](https://github.com/junbinhuang/DeepRitz)