# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [1]:
%load_ext tensorboard

In [14]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate, Layer
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt
from datetime import datetime

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  3


In [101]:
# numerical parameters
n_paths = 2 ** 18
n_timesteps = 6
n_dimensions = 4
n_diffusion_factors = 2
n_jump_factors = 2
T = 1.
dt = T / n_timesteps
batch_size = 128
epochs = 1000

In [102]:
# model parameters
eta = 1.
lp = 1.
lm = 1.
k = 1.
sigma = 1.
zeta = 1.
phi = 1.
psi = 1e-2
epsilon = 5e-3

# Initial value layer

In [103]:
class InitialValue(Layer):
    
    def __init__(self, y0, **kwargs):
        super().__init__(**kwargs)
        self.y0 = y0
    
    def call(self, inputs):
        return self.y0

# Coefficients

In [124]:
def b(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)
    
    return [
        x[1],
        -eta * x[0],
        lm * tf.exp(-k * dm) - lp * tf.exp(-k * dp),
        lp * (x[0] + dp) * tf.exp(-k * dp) - lm * (x[0] - dp) * tf.exp(-k * dm),
    ]

def s(t, x, y, z, r):
    return [[sigma, 0], [0, zeta], [0, 0], [0, 0]]

# - dH_dx
def f(t, x, y, z, r):
    
    ad = y[2] / y[3] + x[0]
    dp = tf.maximum(0., 1./k + ad)
    dm = tf.maximum(0., 1./k - ad)

    return [
        -(y[3] * lp * tf.exp(-k * dp) - y[3] * lm * tf.exp(-k * dm)),
        -(y[0] - eta * y[1]),
        -(-2. * phi * x[2]),
        -(0.)
    ]

def v(t, x, y, z, r):
    return [[0, 0], [epsilon, -epsilon], [0, 0], [0, 0]]

# dg_dx
def g(x):
    return [x[2], 0., x[0] - 2 * psi * x[2], 1.]

In [125]:
def dX(i, x, y, z, r, dW, dN):
    
    t = i * dt
        
    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(b(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))
        
    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(s(t, x, y, z ,r), dW, [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))

    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(v(t, x, y, z ,r), dN, [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

def dY(i, x, y, z, r, dW, dN):
    
    t = i * dt

    def drift(arg):
        x, y, z, r = arg
        return tf.math.multiply(f(t, x, y, z, r), dt)
    a0 = tf.vectorized_map(drift, (x, y, z, r))

    def noise(arg):
        x, y, z, r, dW = arg
        return tf.tensordot(z, dW, [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, r, dW))
    
    def jump(arg):
        x, y, z, r, dN = arg
        return tf.tensordot(r, dN, [[1], [0]])
    a2 = tf.vectorized_map(jump, (x, y, z, r, dN))
    
    return a0 + a1 + a2

# Model

In [131]:
paths = []

inputs_dW = Input(shape=(n_timesteps, n_diffusion_factors))
inputs_dN = Input(shape=(n_timesteps, n_jump_factors))

x0 = tf.Variable([[0., 0., 0., 0.]], trainable=False)
y0 = tf.Variable([[5., 5., 5., 5.]], trainable=True)

x = InitialValue(x0, name='x_0')(inputs_dW)
y = InitialValue(y0, name='y_0')(inputs_dW)

z = concatenate([x, y])
z = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z1_0')(z)
z = Dense(n_dimensions * n_diffusion_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='z2_0')(z)
z = Reshape((n_dimensions, n_diffusion_factors), name='zr_0')(z)

r = concatenate([x, y])
r = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r1_0')(r)
r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name='r2_0')(r)
r = Reshape((n_dimensions, n_jump_factors), name='rr_0')(r)

paths += [[x, y, z, r]]

@tf.function
def hx(args):
    i, x, y, z, r, dW, dN = args
    return x + dX(i, x, y, z, r, dW, dN)

@tf.function
def hy(args):
    i, x, y, z, r, dW, dN = args
    return y + dY(i, x, y, z, r, dW, dN)

for i in range(n_timesteps):
    
    step = InitialValue(tf.constant(i, dtype=tf.float32))(inputs_dW)
    
    dW = Lambda(lambda x: x[:, i])(inputs_dW)
    dN = Lambda(lambda x: x[:, i])(inputs_dN)
    
    x, y = (
        Lambda(hx, name=f'x_{i+1}')([step, x, y, z, r, dW, dN]),
        Lambda(hy, name=f'y_{i+1}')([step, x, y, z, r, dW, dN]),
    )
    
    # we don't train z for the last time step; keep for consistency
    z = concatenate([x, y])
    z = Dense(10, activation='relu', name=f'z1_{i+1}')(z)
    z = Dense(n_dimensions * n_diffusion_factors, activation='relu', name=f'z2_{i+1}')(z)
    z = Reshape((n_dimensions, n_diffusion_factors), name=f'zr_{i+1}')(z)
    
    # we don't train r for the last time step; keep for consistency
    r = concatenate([x, y])
    r = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r1_{i+1}')(r)
    r = Dense(n_dimensions * n_jump_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-1), name=f'r2_{i+1}')(r)
    r = Reshape((n_dimensions, n_jump_factors), name=f'rr_{i+1}')(r)

    paths += [[x, y, z, r]]
    
outputs_loss = Lambda(lambda r: r[1] - tf.transpose(tf.vectorized_map(g, r[0])))([x, y])
outputs_paths = tf.stack(
    [tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + 
    [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_diffusion_factors)] +
    [tf.stack([p[3][:, :, i] for p in paths[1:]], axis=1) for i in range(n_jump_factors)], axis=2)

model_loss = Model([inputs_dW, inputs_dN], outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
model_paths = Model([inputs_dW, inputs_dN], outputs_paths)

In [132]:
model_loss.summary()

Model: "model_14"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_27 (InputLayer)           [(None, 6, 2)]       0                                            
__________________________________________________________________________________________________
x_0 (InitialValue)              (1, 4)               4           input_27[0][0]                   
__________________________________________________________________________________________________
y_0 (InitialValue)              (1, 4)               4           input_27[0][0]                   
__________________________________________________________________________________________________
concatenate_118 (Concatenate)   (1, 8)               0           x_0[0][0]                        
                                                                 y_0[0][0]                 

# Training

In [133]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_diffusion_factors))
dN = tf.random.poisson((n_paths, n_timesteps), [dt * lp, dt * lm])
target = tf.zeros((n_paths, n_dimensions))

In [134]:
# check for exploding gradients before training

with tf.GradientTape() as tape:
    loss = model_loss([dW, dN])

# bias of the last dense layer
variables = model_loss.variables[-1]
tape.gradient(loss, variables)

The following Variables were used a Lambda layer's call (x_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        5,         5,         5,         5]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (y_1), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        5,         5,         5,         5]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


<tf.Tensor: shape=(8,), dtype=float32, numpy=array([4.385e+04, 4.198e+04,      7849,      4557, 4.346e+04,      8410, 2.845e+04,      6886], dtype=float32)>

In [135]:
log_dir = "_logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_callback = ModelCheckpoint('_models/weights{epoch:04d}.h5', save_weights_only=True, overwrite=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model_loss.save_weights('_models/weights0000.h5')
history = model_loss.fit([dW, dN], target, batch_size=batch_size, epochs=1000, callbacks=[checkpoint_callback, tensorboard_callback])


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

KeyboardInterrupt: 

In [None]:
model_loss.variables[1]

# Display paths and loss

In [None]:
# load bad model
model_loss.load_weights('_models/weights0040.h5')

In [None]:
loss = model_loss([dW, dN]).numpy()
loss

In [None]:
paths = model_paths([dW, dN]).numpy()

In [None]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[50, :, :, :], (1, 2, 0)).numpy()

In [None]:
n = 2500
x = tf.transpose(paths[n, :, 0, :], (1, 0))
dp = tf.maximum(0., 1./k - (paths[n, :, 1, 2] / paths[n, :, 1, 3] + paths[n, :, 0, 0]))
dm = tf.maximum(0., 1./k + (paths[n, :, 1, 2] / paths[n, :, 1, 3] + paths[n, :, 0, 0]))
tf.concat([x, tf.expand_dims(dp, 0), tf.expand_dims(dm, 0)], axis=0)

In [51]:
print(x0)
print(x1)

tf.Tensor(
[[  -0.3752   -0.8029    -1.273    -1.773     -2.29     -2.81]
 [  -0.3148   -0.5671   -0.7481   -0.8508   -0.8701   -0.8033]
 [ 0.005608  -0.03553   -0.1664   -0.3181   -0.4762   -0.6413]
 [   0.1284    0.1997    0.1074  -0.08566   -0.3659   -0.7438]
 [     1.33     1.928      2.41     2.966      4.66     5.339]
 [   0.6705   0.07164         0         0         0         0]], shape=(6, 6), dtype=float32)
tf.Tensor(
[[   0.4054    0.8068     1.193     1.553     1.875     2.151]
 [ -0.02386   -0.1153   -0.2736   -0.4963   -0.7789    -1.115]
 [ 0.005608   0.06282    0.1797    0.3353    0.4947    0.6566]
 [   0.1284    0.3015    0.5202    0.8169     1.115     1.419]
 [   0.5489    0.1524         0         0         0         0]
 [    1.451     1.848     2.714     3.134     3.546     4.003]], shape=(6, 6), dtype=float32)
