# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [1]:
%load_ext tensorboard

In [2]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate, Layer
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt
from datetime import datetime
from keras.metrics import mse
from keras.optimizers import Adam

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  3


# Inputs

In [329]:
# numerical parameters
n_paths = 2 ** 18
n_timesteps = 8
n_dimensions = 4
n_diffusion_factors = 2
n_jump_factors = 2
T = 1.
dt = T / n_timesteps
batch_size = 128
epochs = 1000
learning_rate = 1e-6

In [330]:
# model parameters
nu = 1.
eta = 1.
zeta = 1.
epsilon = 1.
lp = 1.
lm = 1.
k = 1.
phi = 1e-2
psi = 1e-2

In [331]:
# coefficients
def b(t, x, y, z, r):
    
    ad = y[2] / y[3] - x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)
    
    return [
        x[1],
        -eta * x[1],
        lm * tf.exp(-k * dm) - lp * tf.exp(-k * dp),
        lp * (x[0] + dp) * tf.exp(-k * dp) - lm * (x[0] - dp) * tf.exp(-k * dm),
    ]

def s(t, x, y, z, r):
    return [[nu, 0], [0, zeta], [0, 0], [0, 0]]

# - dH_dx
def f(t, x, y, z, r):
    
    ad = y[2] / y[3] - x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)

    return [
        -(y[3] * lp * tf.exp(-k * dp) - y[3] * lm * tf.exp(-k * dm)),
        -(y[0] - eta * y[1]),
        -(-2. * phi * x[2]),
        -(0.)
    ]

def v(t, x, y, z, r):
    return [[0, 0], [epsilon, -epsilon], [0, 0], [0, 0]]

# dg_dx
def g(x):
    return [x[2], 0., x[0] - 2 * psi * x[2], 1.]

# Initial value layer

In [332]:
class InitialValue(Layer):
    
    def __init__(self, y0, **kwargs):
        super().__init__(**kwargs)
        self.y0 = y0
    
    def call(self, inputs):
        return self.y0

# Model

In [333]:
def dX(t, x, y, z, r, dW, dN):
    
    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(b(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))
        
    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(s(t, x, y, z ,r), dW, [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))

    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(v(t, x, y, z ,r), dN, [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

def dY(t, x, y, z, r, dW, dN):
    
    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(f(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))

    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(z, dW, [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))
    
    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(r, dN, [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

In [334]:
paths = []

n_hidden_units = n_dimensions + n_diffusion_factors + n_jump_factors + 10

inputs_dW = Input(shape=(n_timesteps, n_diffusion_factors))
inputs_dN = Input(shape=(n_timesteps, n_jump_factors))

x0 = tf.Variable([[10., 0., 0., 0.]], trainable=False)
y0 = tf.Variable([g(x0[0])], trainable=True)

x = InitialValue(x0, name='x_0')(inputs_dW)
y = InitialValue(y0, name='y_0')(inputs_dW)

z = concatenate([x, y])
z = Dense(n_hidden_units, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z1_0')(z)
z = Dense(n_dimensions * n_diffusion_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z2_0')(z)
z = Reshape((n_dimensions, n_diffusion_factors), name='zr_0')(z)

r = concatenate([x, y])
r = Dense(n_hidden_units, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r1_0')(r)
r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r2_0')(r)
r = Reshape((n_dimensions, n_jump_factors), name='rr_0')(r)

paths += [[x, y, z, r]]

# pre-compile lambda layers

@tf.function
def hx(args):
    i, x, y, z, r, dW, dN = args
    return x + dX(i * dt, x, y, z, r, dW, dN)

@tf.function
def hy(args):
    i, x, y, z, r, dW, dN = args
    return y + dY(i * dt, x, y, z, r, dW, dN)

for i in range(n_timesteps):
    
    step = InitialValue(tf.Variable(i, dtype=tf.float32, trainable=False))(inputs_dW)
    
    dW = Lambda(lambda x: x[0][:, tf.cast(x[1], tf.int32)])([inputs_dW, step])
    dN = Lambda(lambda x: x[0][:, tf.cast(x[1], tf.int32)])([inputs_dN, step])
    
    x, y = (
        Lambda(hx, name=f'x_{i+1}')([step, x, y, z, r, dW, dN]),
        Lambda(hy, name=f'y_{i+1}')([step, x, y, z, r, dW, dN]),
    )
    
    # we don't train z for the last time step; keep for consistency
    z = concatenate([x, y])
    z = Dense(n_hidden_units, activation='relu', name=f'z1_{i+1}')(z)
    z = Dense(n_dimensions * n_diffusion_factors, activation='relu', name=f'z2_{i+1}')(z)
    z = Reshape((n_dimensions, n_diffusion_factors), name=f'zr_{i+1}')(z)
    
    # we don't train r for the last time step; keep for consistency
    r = concatenate([x, y])
    r = Dense(n_hidden_units, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r1_{i+1}')(r)
    r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r2_{i+1}')(r)
    r = Reshape((n_dimensions, n_jump_factors), name=f'rr_{i+1}')(r)

    paths += [[x, y, z, r]]
    
outputs_loss = Lambda(lambda r: r[1] - tf.transpose(tf.vectorized_map(g, r[0])))([x, y])
outputs_paths = tf.stack(
    [tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + 
    [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_diffusion_factors)] +
    [tf.stack([p[3][:, :, i] for p in paths[1:]], axis=1) for i in range(n_jump_factors)], axis=2)

adam = Adam(learning_rate=learning_rate)

model_loss = Model([inputs_dW, inputs_dN], outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
model_paths = Model([inputs_dW, inputs_dN], outputs_paths)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [335]:
model_loss.summary()

Model: "model_61"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_79 (InputLayer)           [(None, 8, 2)]       0                                            
__________________________________________________________________________________________________
x_0 (InitialValue)              (1, 4)               4           input_79[0][0]                   
__________________________________________________________________________________________________
y_0 (InitialValue)              (1, 4)               4           input_79[0][0]                   
__________________________________________________________________________________________________
concatenate_670 (Concatenate)   (1, 8)               0           x_0[0][0]                        
                                                                 y_0[0][0]                 

# Transfer weights (if needed)

In [None]:
# try transfer learning from another starting point

model_loss.get_layer('y_0').set_weights(m_large.get_layer('y_0').get_weights())

for i in range(n_timesteps):
    model_loss.get_layer(f'z1_{i}').set_weights(m_large.get_layer(f'z1_{i}').get_weights())
    model_loss.get_layer(f'z2_{i}').set_weights(m_large.get_layer(f'z2_{i}').get_weights())

In [340]:
# transfer learning from cruder discretization

model_loss.get_layer('y_0').set_weights(m_small.get_layer('y_0').get_weights())

n_small = 4

for i in range(n_small):
    for j in range(n_timesteps // n_small):
        model_loss.get_layer(f'z1_{n_timesteps // n_small * i}').set_weights(m_small.get_layer(f'z1_{i}').get_weights())
        model_loss.get_layer(f'z2_{n_timesteps // n_small * i}').set_weights(m_small.get_layer(f'z2_{i}').get_weights())
        model_loss.get_layer(f'z1_{n_timesteps // n_small * i + j}').set_weights(m_small.get_layer(f'z1_{i}').get_weights())
        model_loss.get_layer(f'z2_{n_timesteps // n_small * i + j}').set_weights(m_small.get_layer(f'z2_{i}').get_weights())

# Training

In [341]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_diffusion_factors))
dN = tf.random.poisson((n_paths, n_timesteps), [dt * lp, dt * lm])
target = tf.zeros((n_paths, n_dimensions))

In [342]:
# check for exploding gradients before training

with tf.GradientTape() as tape:
    loss = mse(model_loss([dW, dN]), target)

# bias of the last dense layer
variables = model_loss.variables[-1]
tape.gradient(loss, variables)

The following Variables were used a Lambda layer's call (x_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[  0.02798,  -0.05875,     10.03,     1.012]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (y_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[  0.02798,  -0.05875,     10.03,     1.012]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


<tf.Tensor: shape=(8,), dtype=float32, numpy=array([1.319e+04,  1.39e+04,     -7438,    -852.6,      5397,      8211, 1.022e+04,      5612], dtype=float32)>

In [None]:
log_dir = "_logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_callback = ModelCheckpoint('_models/weights{epoch:04d}.h5', save_weights_only=True, overwrite=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model_loss.save_weights('_models/weights0000.h5')
history = model_loss.fit([dW, dN], target, batch_size=128, epochs=1000, callbacks=[checkpoint_callback, tensorboard_callback])


# Display paths and loss

In [92]:
# load bad model
model_loss.load_weights('_models/weights0011.h5')

In [94]:
loss = model_loss([dW, dN]).numpy()
loss



array([[ 8.79636407e-03, -1.04610287e-02, -3.50279510e-02,
        -1.47234201e-02],
       [ 2.21365094e-02,  3.79096717e-04, -3.30638885e-03,
        -1.25616789e-02],
       [-1.51470482e-01, -3.59187759e-02, -1.55644417e-01,
        -1.74591959e-01],
       ...,
       [ 1.63220763e-02,  1.37822665e-02,  1.04904175e-05,
        -9.77623463e-03],
       [ 5.40584326e-04,  5.68848103e-03,  5.95724583e-02,
        -8.07791948e-03],
       [ 2.24162936e-02, -5.05838916e-03, -3.53908539e-03,
        -1.08971000e-02]], dtype=float32)

In [344]:
paths = model_paths([dW, dN]).numpy()

In [345]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[80, :, :, :], (1, 2, 0)).numpy()

array([[[    9.983,     9.653,     9.423,      9.31,     8.863,     9.127,     8.867,     8.719],
        [  -0.5404,   -0.7483,   -0.7346,   -0.3764,   -0.2514,   -0.3425,    0.4348,    0.4913],
        [ -0.04421,  -0.08673,  -0.09749,   -0.1032,  -0.09104,  -0.07072,  -0.05018,   -0.0248],
        [   0.4968,    0.9773,     1.163,     1.303,     1.295,      1.23,     1.157,     1.054]],

       [[ -0.06553,   -0.1086,   -0.1195,   -0.1253,    -0.113,  -0.09238,  -0.07157,  -0.04586],
        [ -0.09727,   -0.1012,   -0.1003,  -0.09791,  -0.09449,  -0.09217,  -0.09215,  -0.09472],
        [    9.663,     9.663,     9.485,     9.568,     9.203,     9.473,     9.261,     9.054],
        [    1.013,     1.013,     1.013,     1.013,     1.013,     1.013,     1.013,     1.013]],

       [[        0,         0,         0,         0,         0,         0,         0,    0.4277],
        [        0,         0,         0,         0,         0,         0,         0,         0],
        [       

In [346]:
def output(n0):
    x = tf.transpose(paths[n0, :, 0, :], (1, 0))
    dp = tf.maximum(0., 1./k + (paths[n0, :, 1, 2] / paths[n0, :, 1, 3] - paths[n0, :, 0, 0]))
    dm = tf.maximum(0., 1./k - (paths[n0, :, 1, 2] / paths[n0, :, 1, 3] - paths[n0, :, 0, 0]))
    x0 = tf.concat([x, tf.expand_dims(dp, 0), tf.expand_dims(dm, 0)], axis=0)
    print(x0)

for i in range(20):
    output(i)

tf.Tensor(
[[    10.44      10.1     10.65     11.29     11.46     11.63     11.02        11]
 [   0.5333    0.6352     0.585    0.8268     0.704     0.555    0.2945    0.8612]
 [ -0.04421   -0.1387   -0.1936   -0.2567    -0.312   -0.3793   -0.4588   -0.5446]
 [   0.4968     1.496     2.097     2.809      3.48     4.288     5.238     6.205]
 [  0.09959    0.4337    0.3585    0.4303    0.3211    0.2183    0.1665    0.1272]
 [      1.9     1.566     1.641      1.57     1.679     1.782     1.834     1.873]], shape=(6, 8), dtype=float32)
tf.Tensor(
[[    9.739     9.539     9.826     9.519     9.803     9.953     10.11     9.784]
 [   -1.147   -0.4714   -0.7391   -0.3375    -1.256    -1.128    -1.316    -1.166]
 [ -0.04421  -0.06301  -0.06333   -0.0725  -0.06296  -0.07968  -0.08153  -0.07164]
 [   0.4968    0.7547    0.8494     1.023     1.034     1.274     1.383     1.385]
 [   0.7971    0.9965    0.9004     1.104    0.8192    0.9799     1.107     1.269]
 [    1.203     1.004       1.1   