# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [None]:
%load_ext tensorboard

In [1]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate, Layer
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  3


In [94]:
# numerical parameters
n_paths = 2 ** 18
n_timesteps = 10
n_dimensions = 4
n_diffusion_factors = 2
n_jump_factors = 2
T = 1.
dt = T / n_timesteps
batch_size = 128
epochs = 1000

In [95]:
# model parameters
eta = 1.
lp = 1.
lm = 1.
k = 1.
sigma = 1.
zeta = 1.
phi = 1.
psi = 1.
epsilon = 5e-3

# Initial value layer

In [96]:
class InitialValue(Layer):
    
    def __init__(self, y0, **kwargs):
        super().__init__(**kwargs)
        self.y0 = y0
    
    def call(self, inputs):
        return self.y0

# Coefficients

In [97]:
def b(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)
    
    return [
        x[1],
        -eta * x[0],
        lm * tf.exp(-k * dm) - lp * tf.exp(-k * dp),
        lp * (x[0] + dp) * tf.exp(-k * dp) - lm * (x[0] - dp) * tf.exp(-k * dm),
    ]

def s(t, x, y, z, r):
    return [[sigma, 0], [0, zeta], [0, 0], [0, 0]]

# - dH_dx
def f(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)

    return [
        -(y[3] * lp * tf.exp(-k * dp) - y[3] * lm * tf.exp(-k * dm)),
        -(y[0] - eta * y[1]),
        -(-2. * phi * x[2]),
        -(0.)
    ]

def v(t, x, y, z, r):
    return [[0, 0], [epsilon, -epsilon], [0, 0], [0, 0]]

# dg_dx
def g(x):
    return [x[2], 0., x[0] - 2 * psi * x[2], 1.]

In [98]:
def dX(i, x, y, z, r, dW, dN):
    
    t = i * dt
    
    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(b(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))
        
    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(s(t, x, y, z ,r), dW[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))

    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(v(t, x, y, z ,r), dN[i], [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

def dY(i, x, y, z, r, dW, dN):
    
    t = i * dt

    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(f(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))

    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(z, dW[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))
    
    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(r, dN[i], [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

# Model

In [99]:
paths = []

inputs_dW = Input(shape=(n_timesteps, n_diffusion_factors))
inputs_dN = Input(shape=(n_timesteps, n_jump_factors))

x0 = tf.Variable([[0., 0., 0., 0.]], trainable=False)
y0 = tf.Variable([[5., 5., 5., 5.]], trainable=True)

x = InitialValue(x0, name='x_0')(inputs_dW)
y = InitialValue(y0, name='y_0')(inputs_dW)

z = concatenate([x, y])
z = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z1_0')(z)
z = Dense(n_dimensions * n_diffusion_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z2_0')(z)
z = Reshape((n_dimensions, n_diffusion_factors), name='zr_0')(z)

r = concatenate([x, y])
r = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r1_0')(r)
r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r2_0')(r)
r = Reshape((n_dimensions, n_jump_factors), name='rr_0')(r)

paths += [[x, y, z, r]]

@tf.function
def hx(i, r):
    return r[0] + dX(i, *r)

@tf.function
def hy(i, r):
    return r[1] + dY(i, *r)

for i in range(n_timesteps):
    
    x, y = (
        Lambda(lambda r: hx(i, r), name=f'x_{i+1}')([x, y, z, r, inputs_dW, inputs_dN]),
        Lambda(lambda r: hy(i, r), name=f'y_{i+1}')([x, y, z, r, inputs_dW, inputs_dN]),
    )
    
    # we don't train z for the last time step; keep for consistency
    z = concatenate([x, y])
    z = Dense(10, activation='relu', name=f'z1_{i+1}')(z)
    z = Dense(n_dimensions * n_diffusion_factors, activation='relu', name=f'z2_{i+1}')(z)
    z = Reshape((n_dimensions, n_diffusion_factors), name=f'zr_{i+1}')(z)
    
    # we don't train r for the last time step; keep for consistency
    r = concatenate([x, y])
    r = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r1_{i+1}')(r)
    r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r2_{i+1}')(r)
    r = Reshape((n_dimensions, n_jump_factors), name=f'rr_{i+1}')(r)

    paths += [[x, y, z, r]]
    
outputs_loss = Lambda(lambda r: r[1] - tf.transpose(tf.vectorized_map(g, r[0])))([x, y])
outputs_paths = tf.stack(
    [tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + 
    [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_diffusion_factors)] +
    [tf.stack([p[3][:, :, i] for p in paths[1:]], axis=1) for i in range(n_jump_factors)], axis=2)

model_loss = Model([inputs_dW, inputs_dN], outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
model_paths = Model([inputs_dW, inputs_dN], outputs_paths)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [100]:
model_loss.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 10, 2)]      0                                            
__________________________________________________________________________________________________
x_0 (InitialValue)              (1, 4)               4           input_7[0][0]                    
__________________________________________________________________________________________________
y_0 (InitialValue)              (1, 4)               4           input_7[0][0]                    
__________________________________________________________________________________________________
concatenate_30 (Concatenate)    (1, 8)               0           x_0[0][0]                        
                                                                 y_0[0][0]                  

# Training

In [101]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_diffusion_factors))
dN = tf.random.poisson((n_paths, n_timesteps), [dt * lp, dt * lm])
target = tf.zeros((n_paths, n_dimensions))

In [102]:
# check for exploding gradients before training

with tf.GradientTape() as tape:
    loss = model_loss([dW, dN])

# bias of the last dense layer
variables = model_loss.variables[-1]
tape.gradient(loss, variables)

The following Variables were used a Lambda layer's call (x_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        5,         5,         5,         5]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (y_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        5,         5,         5,         5]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


<tf.Tensor: shape=(8,), dtype=float32, numpy=array([2.398e+04,      5836, 2.603e+04, 2.325e+04,      6304, 2.493e+04,        16, 2.593e+04], dtype=float32)>

In [103]:
log_dir = "_logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_callback = ModelCheckpoint('_models/weights{epoch:04d}.h5', save_weights_only=True, overwrite=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR, histogram_freq=1)
model_loss.save_weights('_models/weights0000.h5')
history = model_loss.fit([dW, dN], target, batch_size=batch_size, epochs=1000, callbacks=[checkpoint_callback, tensorboard_callback])


Epoch 1/1000




Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000


KeyboardInterrupt: 

In [109]:
model_loss.get_layer('z1_3').variables

[<tf.Variable 'z1_3/kernel:0' shape=(8, 10) dtype=float32, numpy=
 array([[  -0.4973,   -0.0301,   -0.6337,      -0.6,   -0.5832,   -0.8468,    0.3204,   -0.4777,    0.2092,   -0.6144],
        [   0.6544,    -0.328,    0.4384,   -0.5906,    0.6096,    -0.507,   -0.1359,    0.3445,    0.9457,    0.5788],
        [   0.4381,   -0.1769,    0.2016,   -0.2961,   -0.1777,     0.432,   -0.3021,    -0.737,   -0.4246,     0.149],
        [   0.2678,    0.3145,   -0.3807,    0.4027,   -0.5968,   -0.1678,    0.1973,    0.5364,   0.08427,    0.1412],
        [   0.1113,  -0.03577,   -0.3366,   -0.3748,    -0.299,    0.1799,   -0.2483,    0.0734,   -0.0555,   0.05282],
        [   0.4557,     1.116,    0.4912,    0.3476,    0.4156,   -0.2231,    0.3507,    0.3502,   -0.1516,    0.6344],
        [  -0.1386,   -0.6493,    0.1872,   -0.3514,    0.3694,   -0.8408,    0.6017,   -0.7619,     1.017,   -0.5158],
        [   0.1138,   0.09502,    0.4001,    0.3738,   -0.4724,     0.282,    0.1602,   -0.130

In [108]:
model_loss.get_layer('z1_2').variables

[<tf.Variable 'z1_2/kernel:0' shape=(8, 10) dtype=float32, numpy=
 array([[  -0.7859,     0.159,    0.1963,   -0.1493,  -0.08391,   0.05727,    0.4855,    0.1415,   -0.3941,    0.2776],
        [  -0.3786,   -0.5218,    -0.093,   -0.3241,   -0.8467,    0.8853,   0.03522,   -0.0938,    0.6889,    0.5398],
        [   0.2856,    0.4262,  -0.07518,   -0.5991,     0.135,   -0.3077,   0.09879,    0.2155,   -0.6053,    -0.113],
        [  -0.2753,    0.3501,    0.2572,    0.1297,     0.425,   0.07822,   -0.5407,    0.2009,    0.6369,   0.02404],
        [   0.4903,    0.0663,   -0.2649,    0.2267,    0.2515,   -0.4859,  -0.09234,   -0.3989,    0.9512,   -0.2294],
        [   0.3351,   -0.5152,    0.3408,    0.2511,    0.3455,    0.5638,    0.4539,    0.5087,    -0.194,    -0.271],
        [   0.3137,    0.2679,    0.5888,    0.1672,   -0.5069,    0.1477,    0.1488,   -0.2026,    0.5874,   -0.4026],
        [   0.5251,    0.3946,    0.1898,   0.09845,      0.57,   0.03063,   -0.1624,  -0.0974

In [57]:
model_loss.variables[1]

<tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[   -1.813,   -0.9162,     2.426,     1.005]], dtype=float32)>

# Display paths and loss

In [183]:
# load bad model
model_loss.load_weights('_models/weights0001.h5')

In [58]:
loss = model_loss([dW, dN]).numpy()
loss



array([[   0.1175,  -0.06752,  -0.04785,  0.005087],
       [   0.0842,  -0.02167,   0.01194,  0.005087],
       [ -0.03125,  -0.01088,    0.1787,  0.005087],
       [   0.1584,  -0.05617,  -0.04919,  0.005087],
       [-0.008478,  -0.08928,  -0.04812,  0.005087],
       [   0.1481,  -0.05505,   0.02531,  0.005087],
       [  0.01221,  -0.02262,  -0.01552,  0.005087],
       [  0.01707,   -0.0229,   0.03127,  0.005087],
       [    -0.09,  -0.04197,   0.02347,  -0.07824],
       [ -0.07179,   -0.0568,   0.05714,     -0.13],
       [   0.1572,  -0.02351,   0.01054,  0.005087],
       [   0.1518,  -0.05537,   0.02432,  0.005087],
       [ -0.06203,  -0.05759,   0.05918,   -0.1234],
       [ -0.01509,  -0.05822,   0.03213,  -0.04095],
       [   0.1185,  -0.02271,    0.1097,  0.005087],
       [   0.1568,  -0.02159,  -0.03076,  0.005087],
       [   0.1514,  -0.05589, 0.0002482,  0.005087],
       [  0.04962,  -0.02952,   0.02787,  0.005087],
       [   0.1588, -0.001569,  -0.06206,  0.00

In [52]:
paths = model_paths([dW, dN]).numpy()

In [53]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[50, :, :, :], (1, 2, 0)).numpy()

array([[[  -0.4723,   -0.8721,     -1.17,    -1.341],
        [   0.2901,    0.6984,     1.207,     1.789],
        [  -0.2418,   -0.4786,   -0.7199,   -0.9551],
        [   0.8814,     1.767,     2.848,     3.871]],

       [[    -1.57,    -1.445,    -1.207,   -0.9758],
        [  -0.6921,   -0.4911,   -0.2527,  -0.01401],
        [    2.426,     3.186,     2.947,    0.5812],
        [    1.005,    0.9844,    0.9844,    0.9844]],

       [[   0.2397,         0,         0,    0.6617],
        [  0.03881,         0,         0,    0.1683],
        [        0,         0,     4.246,         0],
        [   0.0438,         0,         0,    0.5734]],

       [[        0,         0,         0,     0.666],
        [        0,         0,         0,         0],
        [     3.09,         0,         0,         0],
        [        0,         0,         0,    0.3042]],

       [[        0,         0,         0,         0],
        [        0,         0,         0,         0],
        [        0, 

In [93]:
n = 600
x = tf.transpose(paths[n, :, 0, :], (1, 0))
dp = tf.maximum(0., 1./k - (paths[n, :, 1, 2] / paths[n, :, 1, 3] + paths[n, :, 0, 0]))
dm = tf.maximum(0., 1./k + (paths[n, :, 1, 2] / paths[n, :, 1, 3] + paths[n, :, 0, 0]))
tf.concat([x, tf.expand_dims(dp, 0), tf.expand_dims(dm, 0)], axis=0)

<tf.Tensor: shape=(6, 4), dtype=float32, numpy=
array([[  -0.7081,    -1.436,    -2.139,    -2.773],
       [ -0.07885,   0.01933,    0.2995,    0.7554],
       [  -0.2418,   -0.4751,   -0.7006,   -0.8407],
       [   0.8814,     1.768,      2.73,     3.423],
       [        0,         0,    0.2975,     5.203],
       [    2.705,     2.323,     1.702,         0]], dtype=float32)>