# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [66]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  4


2022-03-17 19:19:01.385130: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2022-03-17 19:19:01.569975: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:03:00.0 name: NVIDIA A100-SXM4-40GB computeCapability: 8.0
coreClock: 1.41GHz coreCount: 108 deviceMemorySize: 39.59GiB deviceMemoryBandwidth: 1.41TiB/s
2022-03-17 19:19:01.571665: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 1 with properties: 
pciBusID: 0000:44:00.0 name: NVIDIA A100-SXM4-40GB computeCapability: 8.0
coreClock: 1.41GHz coreCount: 108 deviceMemorySize: 39.59GiB deviceMemoryBandwidth: 1.41TiB/s
2022-03-17 19:19:01.573331: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 2 with properties: 
pciBusID: 0000:84:00.0 name: NVIDIA A100-SXM4-40GB computeCapability: 8.0
coreClock: 1.41GHz coreCount: 108 deviceMemorySize: 39.59GiB deviceMemoryBandwidth: 1.41TiB/s
2

In [3]:
n_paths = 256
n_timesteps = 10
n_dimensions = 4
n_factors = 2
T = 1.
dt = T / n_timesteps
batch_size = 128
epochs = 100

In [4]:
# convention: x = (s, alpha, q, c)

In [168]:
eta = 1.
lp = 1.
lm = 1.
k = 100.
sigma = 1.
zeta = 1.
phi = 1.
psi = 1.

In [1]:
def b(t, x, y, z):
    return [
        x[1],
        -eta * x[0],
        lp * tf.exp(-1 + k * y[2] / y[3] + x[0] * k) - lm * tf.exp(-1 - k * y[2] / y[3] - x[0] * k),
        lp * (1./k - y[2] / y[3]) * tf.exp(-1 + k * y[2] / y[3] + x[0] * k) - lm * (-1./k - y[2] / y[3]) * tf.exp(-1. - k * y[2] / y[3] - x[0] * k),
    ]

def s(t, x, y, z):
    return [[sigma, 0], [0, zeta], [0, 0], [0, 0]]

def dH_dx(t, x, y, z):
    return [
        y[3] * lp * tf.exp(-1. + k * y[2] / y[3] + x[0] * k) - y[3] * lm * tf.exp(-1. - k * y[2] / y[3] - x[0] * k),
        y[0] - eta * y[1],
        -2. * phi * x[2],
        0.
    ]

def dg_dx(x):
    return [x[2], 0., x[0] - 2 * psi * x[2], 1.]

In [170]:
paths = []

inputs_dW = Input(shape=(n_timesteps, n_factors))

x0 = tf.constant([[0., 0., 0., 0.]])
y0 = tf.Variable([[10., 10., 10., 10.]])

x = x0
y = y0

z = concatenate([x, y])
z = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-2))(z)
z = Dense(n_dimensions * n_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-2))(z)
z = Reshape((n_dimensions, n_factors))(z)

paths += [[x, y, z]]

for i in range(n_timesteps):
            
    def dX(x, y, z, dw):
        
        def drift(arg):
            x, y, z = arg
            return tf.math.multiply(b(i*dt, x, y, z), dt)
        a0 = tf.vectorized_map(drift, (x, y, z))

        def noise(arg):
            x, y, z, dw = arg
            return tf.tensordot(s(i*dt, x, y, z), dw[i], [[1], [0]])
        a1 = tf.vectorized_map(noise, (x, y, z, dw))
        
        return a0 + a1

    def dY(x, y, z, dw):
        
        def drift(arg):
            x, y, z = arg
            return tf.math.multiply(dH_dx(i*dt, x, y, z), -dt)
        a0 = tf.vectorized_map(drift, (x, y, z))

        def noise(arg):
            x, y, z, dw = arg
            return tf.tensordot(z, dw[i], [[1], [0]])
        a1 = tf.vectorized_map(noise, (x, y, z, dw))
        
        return a0 + a1
    
    x, y = (
        Lambda(lambda r: r[0] + dX(r[0], r[1], r[2], r[3]))([x, y, z, inputs_dW]),
        Lambda(lambda r: r[1] + dY(r[0], r[1], r[2], r[3]))([x, y, z, inputs_dW]),
    )
    
    # we don't train z for the last time step; keep for consistency
    z = concatenate([x, y])
    z = Dense(10, activation='relu')(z)
    z = Dense(n_dimensions * n_factors, activation='relu')(z)
    z = Reshape((n_dimensions, n_factors))(z)

    paths += [[x, y, z]]
    
outputs_loss = Lambda(lambda r: r[1] - tf.transpose(tf.vectorized_map(dg_dx, r[0])))([x, y])
model_loss = Model(inputs_dW, outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
outputs_paths = tf.stack([tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_factors)], axis=2)
model_paths = Model(inputs_dW, outputs_paths)

The following Variables were used a Lambda layer's call (lambda_399), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (lambda_400), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


# Training

In [171]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_factors))
target = tf.zeros((n_paths, 4))
callback = ModelCheckpoint('_models/weights{epoch:03d}.h5', period=1, save_weights_only=True, overwrite=True)
model_loss.fit(dW, target, batch_size=batch_size, epochs=epochs, callbacks=[callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x153d8a1b25e0>

# Display paths and loss

In [173]:
# load bad model
model_loss.load_weights('_models/weights001.h5')

In [174]:
loss = model_loss(dW).numpy()
loss

array([[      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,       nan],
       [      nan,       nan,       nan,      

In [176]:
paths = model_paths(dW).numpy()

In [177]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[112, :, :, :], (1, 2, 0)).numpy()

array([[[   0.0298,   0.06413,    0.1027,    0.1452,    0.1911,    0.2402,    0.2918,    0.3456,     0.401,    0.4575],
        [  0.04532,   0.08766,    0.1266,    0.1616,    0.1924,    0.2186,    0.2399,    0.2561,    0.2668,     0.272],
        [      inf,       inf,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [     -inf,      -inf,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan]],

       [[     -inf,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [       10,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [       10,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [       10,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan]],

       [[      nan,       nan,      