# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [1]:
%load_ext tensorboard

In [9]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate, Layer, RepeatVector
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt
from datetime import datetime

In [34]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  3


In [68]:
# numerical parameters
n_paths = 2 ** 18
n_timesteps = 8
n_dimensions = 4
n_diffusion_factors = 2
n_jump_factors = 2
T = 1.
dt = T / n_timesteps
batch_size = 128
epochs = 1000

In [69]:
# model parameters
eta = 1.
lp = 1.
lm = 1.
k = 1.
sigma = 1.
zeta = 1.
phi = 1.
psi = 1e-2
epsilon = 5e-3

# Coefficients

In [70]:
def b(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)
    
    return [
        x[1],
        -eta * x[0],
        lm * tf.exp(-k * dm) - lp * tf.exp(-k * dp),
        lp * (x[0] + dp) * tf.exp(-k * dp) - lm * (x[0] - dp) * tf.exp(-k * dm),
    ]

def s(t, x, y, z, r):
    return [[sigma, 0], [0, zeta], [0, 0], [0, 0]]

# - dH_dx
def f(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)

    return [
        -(y[3] * lp * tf.exp(-k * dp) - y[3] * lm * tf.exp(-k * dm)),
        -(y[0] - eta * y[1]),
        -(-2. * phi * x[2]),
        -(0.)
    ]

def v(t, x, y, z, r):
    return [[0, 0], [epsilon, -epsilon], [0, 0], [0, 0]]

# dg_dx
def g(x):
    return [x[2], 0., x[0] - 2 * psi * x[2], 1.]

In [71]:
def dX(t, x, y, z, r, dW, dN):
            
    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(b(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))
        
    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(s(t, x, y, z ,r), dW, [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))

    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(v(t, x, y, z ,r), dN, [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

def dY(t, x, y, z, r, dW, dN):
    
    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(f(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))

    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(z, dW, [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))
    
    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(r, dN, [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

# Model

In [72]:
class InitialValue(Layer):
    
    def __init__(self, y0, **kwargs):
        super().__init__(**kwargs)
        self.y0 = y0
    
    def call(self, inputs):
        return self.y0


class Adjoint(Layer):
    
    def __init__(self, n_hidden_units, n_dimensions, n_factors):
        super(Adjoint, self).__init__()
        self.first = Dense(n_hidden_units, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1))
        self.second = Dense(n_hidden_units, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1))
        self.third = Dense(n_dimensions * n_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1))
        self.reshape = Reshape((n_dimensions, n_factors))

    def call(self, inputs):
        x = self.first(inputs)
        x = self.second(x)
        x = self.third(x)
        return self.reshape(x)


In [73]:
paths = []

inputs_dW = Input(shape=(n_timesteps, n_diffusion_factors))
inputs_dN = Input(shape=(n_timesteps, n_jump_factors))

x0 = tf.Variable([[0., 0., 0., 0.]], trainable=False)
y0 = tf.Variable([[5., 5., 5., 5.]], trainable=True)

# anti-pattern
x = InitialValue(x0, name='x_0')(inputs_dW)
x = Lambda(lambda x: tf.repeat(x, batch_size, 0))(x)

y = InitialValue(y0, name='y_0')(inputs_dW)
y = Lambda(lambda x: tf.repeat(x, batch_size, 0))(y)

adj_z = Adjoint(100, n_dimensions, n_diffusion_factors)
adj_r = Adjoint(100, n_dimensions, n_jump_factors)

t = InitialValue(tf.constant([[0.]], dtype=tf.float32))(inputs_dW)
t = Lambda(lambda x: tf.repeat(x, batch_size, 0))(t)

concat = concatenate([t, x, y])
z = adj_z(concat)
r = adj_r(concat)

paths += [[x, y, z, r]]

@tf.function
def hx(args):
    t, x, y, z, r, dW, dN = args
    return x + dX(t, x, y, z, r, dW, dN)

@tf.function
def hy(args):
    t, x, y, z, r, dW, dN = args
    return y + dY(t, x, y, z, r, dW, dN)

for i in range(n_timesteps):
    
    dW = Lambda(lambda x: x[:, i])(inputs_dW)
    dN = Lambda(lambda x: x[:, i])(inputs_dN)
    
    x, y = (
        Lambda(hx, name=f'x_{i+1}')([t, x, y, z, r, dW, dN]),
        Lambda(hy, name=f'y_{i+1}')([t, x, y, z, r, dW, dN]),
    )
    
    t = InitialValue(tf.constant([[(i+1) * dt]], dtype=tf.float32))(inputs_dW)
    t = Lambda(lambda x: tf.repeat(x, batch_size, 0))(t)

    # we don't train z and r for the last time step; keep for consistency
    concat = concatenate([t, x, y])
    z = adj_z(concat)
    r = adj_r(concat)

    paths += [[x, y, z, r]]
    
outputs_loss = Lambda(lambda r: r[1] - tf.transpose(tf.vectorized_map(g, r[0])))([x, y])
outputs_paths = tf.stack(
    [tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + 
    [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_diffusion_factors)] +
    [tf.stack([p[3][:, :, i] for p in paths[1:]], axis=1) for i in range(n_jump_factors)], axis=2)

model_loss = Model([inputs_dW, inputs_dN], outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
model_paths = Model([inputs_dW, inputs_dN], outputs_paths)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [74]:
model_loss.summary()

Model: "model_10"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_27 (InputLayer)           [(None, 8, 2)]       0                                            
__________________________________________________________________________________________________
initial_value_52 (InitialValue) (1, 1)               0           input_27[0][0]                   
__________________________________________________________________________________________________
x_0 (InitialValue)              (1, 4)               4           input_27[0][0]                   
__________________________________________________________________________________________________
y_0 (InitialValue)              (1, 4)               4           input_27[0][0]                   
___________________________________________________________________________________________

# Training

In [75]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_diffusion_factors))
dN = tf.random.poisson((n_paths, n_timesteps), [dt * lp, dt * lm])
target = tf.zeros((n_paths, n_dimensions))

In [None]:
# check for exploding gradients before training

with tf.GradientTape() as tape:
    loss = model_loss([dW, dN])

# bias of the last dense layer
variables = model_loss.variables[-1]
tape.gradient(loss, variables)

In [None]:
log_dir = "_logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_callback = ModelCheckpoint('_models/weights{epoch:04d}.h5', save_weights_only=True, overwrite=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model_loss.save_weights('_models/weights0000.h5')
history = model_loss.fit([dW, dN], target, batch_size=batch_size, epochs=1000, callbacks=[checkpoint_callback, tensorboard_callback])


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000

In [80]:
model_loss.variables[1]

<tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[    1.392,     1.303,    0.2493,    0.6378]], dtype=float32)>

# Display paths and loss

In [29]:
# load bad model
model_loss.load_weights('_models/weights0050.h5')

In [31]:
loss = model_loss([dW, dN]).numpy()
loss

array([[ 1.9774437e-03,  6.4851180e-02,  9.2710733e-02, -3.0249357e-03],
       [ 3.1286180e-03,  4.5552850e-05, -1.6318083e-02, -3.0249357e-03],
       [ 1.8336773e-03,  2.3638418e-01,  1.4692068e-01, -3.0249357e-03],
       ...,
       [ 2.0097494e-03,  7.7560700e-02,  5.1658750e-02, -3.0249357e-03],
       [ 2.5312901e-03, -5.8633961e-02,  4.3590486e-02, -3.0249357e-03],
       [ 4.2875633e-03,  3.0241426e-02, -1.2896877e-02, -3.0249357e-03]],
      dtype=float32)

In [84]:
paths = model_paths([dW, dN]).numpy()

In [85]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[50, :, :, :], (1, 2, 0)).numpy()

array([[[  -0.2869,   -0.6836,    -1.182,    -1.772,    -2.438,    -3.165],
        [  -0.6591,     -1.27,    -1.815,    -2.277,    -2.641,    -2.894],
        [  0.04917,   0.06195,    0.0477,   -0.0256,   -0.1851,   -0.3518],
        [   0.1838,    0.3236,    0.4229,    0.3982,    0.1156,   -0.2907]],

       [[    1.423,     1.333,     1.045,    0.1809,    0.1118,   -0.3538],
        [    1.288,     1.266,    0.9309,     0.301,    0.0147, -0.001478],
        [   0.2493,     0.246,    0.2667,   -0.1633,    -2.254,    -2.315],
        [   0.6378,    0.4334,    0.4334,    0.4334,    0.4334,    0.4334]],

       [[  0.05013,    0.9826,         0,         0,     1.371,     1.001],
        [        0,     1.129,         0,     1.068,         0,    0.3909],
        [   0.0687,         0,     1.323,     5.107,         0,     1.194],
        [   0.7125,         0,         0,         0,         0,    0.5149]],

       [[   0.1272,         0,     1.263,         0,         0,    0.7206],
      

In [95]:
n = 2000
x = tf.transpose(paths[n, :, 0, :], (1, 0))
dp = tf.maximum(0., 1./k - (paths[n, :, 1, 2] / paths[n, :, 1, 3] + paths[n, :, 0, 0]))
dm = tf.maximum(0., 1./k + (paths[n, :, 1, 2] / paths[n, :, 1, 3] + paths[n, :, 0, 0]))
tf.concat([x, tf.expand_dims(dp, 0), tf.expand_dims(dm, 0)], axis=0)

<tf.Tensor: shape=(6, 6), dtype=float32, numpy=
array([[  -0.2024,   -0.5271,   -0.9685,    -1.518,    -2.162,    -2.887],
       [   -0.734,    -1.434,     -2.08,    -2.653,    -3.134,    -3.508],
       [  0.04917,   0.07243,   0.07184,   0.02179,   -0.1339,   -0.3004],
       [   0.1838,    0.3368,    0.4585,    0.4899,    0.2536,   -0.1065],
       [   0.8114,     1.005,     1.398,     2.718,     7.387,     8.202],
       [    1.189,    0.9952,    0.6024,         0,         0,         0]], dtype=float32)>

In [51]:
print(x0)
print(x1)

tf.Tensor(
[[  -0.3752   -0.8029    -1.273    -1.773     -2.29     -2.81]
 [  -0.3148   -0.5671   -0.7481   -0.8508   -0.8701   -0.8033]
 [ 0.005608  -0.03553   -0.1664   -0.3181   -0.4762   -0.6413]
 [   0.1284    0.1997    0.1074  -0.08566   -0.3659   -0.7438]
 [     1.33     1.928      2.41     2.966      4.66     5.339]
 [   0.6705   0.07164         0         0         0         0]], shape=(6, 6), dtype=float32)
tf.Tensor(
[[   0.4054    0.8068     1.193     1.553     1.875     2.151]
 [ -0.02386   -0.1153   -0.2736   -0.4963   -0.7789    -1.115]
 [ 0.005608   0.06282    0.1797    0.3353    0.4947    0.6566]
 [   0.1284    0.3015    0.5202    0.8169     1.115     1.419]
 [   0.5489    0.1524         0         0         0         0]
 [    1.451     1.848     2.714     3.134     3.546     4.003]], shape=(6, 6), dtype=float32)
