# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [1]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate, Layer
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  3


In [276]:
# numerical parameters
n_paths = 2 ** 20
n_timesteps = 4
n_dimensions = 4
n_diffusion_factors = 2
n_jump_factors = 2
T = 1.
dt = T / n_timesteps
batch_size = 128
epochs = 1000

In [278]:
# model parameters
eta = 1.
lp = 1.
lm = 1.
k = 100.
sigma = 1.
zeta = 1.
phi = 1.
psi = 1.
epsilon = 5e-3

# Initial value layer

In [279]:
class InitialValue(Layer):
    
    def __init__(self, y0, **kwargs):
        super().__init__(**kwargs)
        self.y0 = y0
    
    def call(self, inputs):
        return self.y0

# Coefficients

In [280]:
def b(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)
    
    return [
        x[1],
        -eta * x[0],
        lp * tf.exp(-k * dp) - lm * tf.exp(-k * dm),
        lp * (x[0] + dp) * tf.exp(-k * dp) - lm * (x[0] - dp) * tf.exp(-k * dm),
    ]

def s(t, x, y, z, r):
    return [[sigma, 0], [0, zeta], [0, 0], [0, 0]]

# - dH_dx
def f(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)

    return [
        -(y[3] * lp * tf.exp(-k * dp) - y[3] * lm * tf.exp(-k * dm)),
        -(y[0] - eta * y[1]),
        -(-2. * phi * x[2]),
        -(0.)
    ]

def v(t, x, y, z, r):
    return [[0, 0], [epsilon, -epsilon], [0, 0], [0, 0]]

# dg_dx
def g(x):
    return [x[2], 0., x[0] - 2 * psi * x[2], 1.]

In [281]:
def dX(i, x, y, z, r, dW, dN):
    
    t = i * dt
    
    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(b(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))
        
    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(s(t, x, y, z ,r), dW[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))

    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(v(t, x, y, z ,r), dN[i], [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

def dY(i, x, y, z, r, dW, dN):
    
    t = i * dt

    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(f(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))

    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(z, dW[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))
    
    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(r, dN[i], [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

# Model

In [282]:
paths = []

inputs_dW = Input(shape=(n_timesteps, n_diffusion_factors))
inputs_dN = Input(shape=(n_timesteps, n_jump_factors))

x0 = tf.Variable([[0., 0., 0., 0.]], trainable=False)
y0 = tf.Variable([[5., 5., 5., 5.]], trainable=True)

x = InitialValue(x0, name='x_0')(inputs_dW)
y = InitialValue(y0, name='y_0')(inputs_dW)

z = concatenate([x, y])
z = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z1_0')(z)
z = Dense(n_dimensions * n_diffusion_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z2_0')(z)
z = Reshape((n_dimensions, n_diffusion_factors), name='zr_0')(z)

r = concatenate([x, y])
r = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r1_0')(r)
r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r2_0')(r)
r = Reshape((n_dimensions, n_jump_factors), name='rr_0')(r)

paths += [[x, y, z, r]]

@tf.function
def hx(i, r):
    return r[0] + dX(i, *r)

@tf.function
def hy(i, r):
    return r[1] + dY(i, *r)

for i in range(n_timesteps):
    
    x, y = (
        Lambda(lambda r: hx(i, r), name=f'x_{i+1}')([x, y, z, r, inputs_dW, inputs_dN]),
        Lambda(lambda r: hy(i, r), name=f'y_{i+1}')([x, y, z, r, inputs_dW, inputs_dN]),
    )
    
    # we don't train z for the last time step; keep for consistency
    z = concatenate([x, y])
    z = Dense(10, activation='relu', name=f'z1_{i+1}')(z)
    z = Dense(n_dimensions * n_diffusion_factors, activation='relu', name=f'z2_{i+1}')(z)
    z = Reshape((n_dimensions, n_diffusion_factors), name=f'zr_{i+1}')(z)
    
    # we don't train r for the last time step; keep for consistency
    r = concatenate([x, y])
    r = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r1_{i+1}')(r)
    r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r2_{i+1}')(r)
    r = Reshape((n_dimensions, n_jump_factors), name=f'rr_{i+1}')(r)

    paths += [[x, y, z, r]]
    
outputs_loss = Lambda(lambda r: r[1] - tf.transpose(tf.vectorized_map(g, r[0])))([x, y])
outputs_paths = tf.stack(
    [tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + 
    [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_diffusion_factors)] +
    [tf.stack([p[3][:, :, i] for p in paths[1:]], axis=1) for i in range(n_jump_factors)], axis=2)

model_loss = Model([inputs_dW, inputs_dN], outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
model_paths = Model([inputs_dW, inputs_dN], outputs_paths)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [271]:
model_loss.summary()

Model: "model_45"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_85 (InputLayer)           [(None, 4, 2)]       0                                            
__________________________________________________________________________________________________
x_0 (InitialValue)              (1, 4)               4           input_85[0][0]                   
__________________________________________________________________________________________________
y_0 (InitialValue)              (1, 4)               4           input_85[0][0]                   
__________________________________________________________________________________________________
concatenate_657 (Concatenate)   (1, 8)               0           x_0[0][0]                        
                                                                 y_0[0][0]                 

# Training

In [272]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_diffusion_factors))
dN = tf.random.poisson((n_paths, n_timesteps), [dt * lp, dt * lm])
target = tf.zeros((n_paths, n_dimensions))

In [273]:
# check for exploding gradients before training

with tf.GradientTape() as tape:
    loss = model_loss([dW, dN])

# bias of the last dense layer
variables = model_loss.variables[-1]
tape.gradient(loss, variables)

The following Variables were used a Lambda layer's call (x_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        5,         5,         5,         5]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (y_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        5,         5,         5,         5]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


<tf.Tensor: shape=(8,), dtype=float32, numpy=array([       14,         0,         0,         0,      2031,         0,      2031,         0], dtype=float32)>

In [274]:
callback = ModelCheckpoint('_models/weights{epoch:04d}.h5', save_weights_only=True, overwrite=True)
model_loss.save_weights('_models/weights0000.h5')
model_loss.fit([dW, dN], target, batch_size=batch_size, epochs=1000, callbacks=[callback])


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

KeyboardInterrupt: 

In [275]:
model_loss.variables[1]

<tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[    3.387,     3.427,     3.703,     3.433]], dtype=float32)>

# Display paths and loss

In [183]:
# load bad model
model_loss.load_weights('_models/weights0001.h5')

In [184]:
loss = model_loss([dW, dN]).numpy()
loss



array([[-2.7892856e+05,  5.7580199e+04,  2.9229816e+04,  2.2272785e+04],
       [-3.0421126e+11,  5.4906335e+02,  2.0158292e+11, -2.4858957e+01],
       [-1.4093776e+02,  7.8264332e+00,  1.3364545e+02, -2.5493622e-01],
       [-2.7537908e+03,  6.6549384e+02,  3.6594989e+02,  2.9560254e+02],
       [-3.4703342e+01,  9.8967619e+00,  3.0825493e+01,  2.0972550e+00],
       [-3.3865441e+05,  7.6042847e+01,  1.9492478e+05, -5.9934139e-01],
       [-1.1157483e+01,  4.2878218e+00,  2.1506187e+01, -4.2990202e-01],
       [-2.3254272e+03,  1.1686725e+02,  7.2250928e+02,  2.4598133e+01],
       [-2.4142380e+01,  6.7904444e+00,  3.3732925e+01, -1.9588035e-01],
       [-8.0501183e+01,  1.8984909e+01,  5.0932274e+01,  5.5728073e+00],
       [-5.8120212e+05,  1.1134502e+01,  6.7598969e+05, -2.3334920e+00],
       [-3.7009297e+04,  1.0674642e+04,  2.2117695e+03,  4.3615493e+03],
       [-7.7775528e+01,  1.8671093e+01,  4.9698235e+01,  5.4542475e+00],
       [-1.4375246e+01,  5.8834391e+00,  2.1985283e

In [185]:
paths = model_paths([dW, dN]).numpy()

In [188]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[0, :, :, :], (1, 2, 0)).numpy()

array([[[    1.548,     2.081,     2.594,     3.082,     3.539,      3.96,     4.341,     4.678,     4.966,     5.203],
        [   0.8524,    0.6499,    0.3942,   0.08711,   -0.2687,   -0.6702,    -1.114,    -1.596,    -2.111,    -2.655],
        [    1.267,     1.784,     2.698,     4.522,     10.15,     26.01,     117.8,     128.1, 1.113e+04,  1.34e+04],
        [     1.01,    0.9629,    0.8447,    0.2821,    -5.056,    -29.29,      -292,    -295.1, -7.657e+04, -8.803e+04]],

       [[    1.466,    0.3521,    -1.664,    -5.541,    -17.31,    -48.15,    -271.1,    -439.2, -2.043e+05, -2.655e+05],
        [        2,     2.252,      2.99,     3.405,       4.3,     6.539,     20.14,     61.24,     132.6, 5.758e+04],
        [    2.197,      2.45,     2.807,     4.071,     5.281,     9.444,     24.07,     146.9,     212.5,      2439],
        [    1.996,     2.164,     2.144,     2.089,     2.089,     2.445,     18.52,     18.52,     35.06, 2.227e+04]],

       [[        0,         0,  