# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [1]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  0


In [227]:
n_paths = 256
n_timesteps = 10
n_dimensions = 4
n_factors = 4
T = 1.
dt = T / n_timesteps
batch_size = 128
epochs = 100

In [275]:
def b(t, x, y, z):
    return tf.fill((n_dimensions,), 0.)

def s(t, x, y, z):
    return n_dimensions * tf.exp(-1/n_dimensions * tf.reduce_sum(x)) * z

def f(t, x, y, z):
    return tf.repeat(tf.exp(-1./n_dimensions * tf.reduce_sum(x)) * tf.reduce_sum(tf.square(tf.linalg.diag_part(z))), n_dimensions)

def g(x):
    return tf.repeat(tf.exp(1./n_dimensions * tf.reduce_sum(x)), n_dimensions)

In [276]:
def dX(t, x, y, z, dw):
    
    def drift(arg):
        x, y, z = arg
        return tf.math.multiply(b(t, x, y, z), dt)
    a0 = tf.vectorized_map(drift, (x, y, z))
        
    def noise(arg):
        x, y, z, dw = arg
        return tf.tensordot(s(t, x, y, z), dw[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, dw))
            
    return a0 + a1

def dY(t, x, y, z, dw):

    def drift(arg):
        x, y, z = arg
        return tf.math.multiply(f(t, x, y, z), -dt)
    a0 = tf.vectorized_map(drift, (x, y, z))

    def noise(arg):
        x, y, z, dw = arg
        return tf.tensordot(z, dw[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, dw))
    
    return a0 + a1

In [288]:
paths = []

inputs_dW = Input(shape=(n_timesteps, n_factors))

x0 = tf.constant([[0.] * n_dimensions])
y0 = tf.Variable([[1.] * n_dimensions])

x = x0
y = y0

z = concatenate([x, y])
z = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-2))(z)
z = Dense(n_dimensions * n_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-2))(z)
z = Reshape((n_dimensions, n_factors))(z)

paths += [[x, y, z]]

for i in range(n_timesteps):
    
    x, y = (
        Lambda(lambda r: r[0] + dX(i * dt, r[0], r[1], r[2], r[3]), name=f'dx_{i}')([x, y, z, inputs_dW]),
        Lambda(lambda r: r[1] + dY(i * dt, r[0], r[1], r[2], r[3]), name=f'dy_{i}')([x, y, z, inputs_dW]),
    )
    
    # we don't train z for the last time step; keep for consistency
    z = concatenate([x, y])
    z = Dense(10, activation='relu')(z)
    z = Dense(n_dimensions * n_factors, activation='relu')(z)
    z = Reshape((n_dimensions, n_factors))(z)

    paths += [[x, y, z]]
    
outputs_loss = Lambda(lambda r: r[1] - tf.vectorized_map(g, r[0]))([x, y])
model_loss = Model(inputs_dW, outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
outputs_paths = tf.stack([tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_factors)], axis=2)
model_paths = Model(inputs_dW, outputs_paths)

The following Variables were used a Lambda layer's call (dx_0), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (dy_0), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


# Training

In [289]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_factors))
target = tf.zeros((n_paths, n_dimensions))
callback = ModelCheckpoint('_models/weights{epoch:03d}.h5', period=1, save_weights_only=True, overwrite=True)
model_loss.fit(dW, target, batch_size=batch_size, epochs=epochs, callbacks=[callback])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


KeyboardInterrupt: 

# Display paths and loss

In [282]:
# load bad model
model_loss.load_weights('_models/weights003.h5')

In [None]:
loss = model_loss(dW).numpy()
loss

In [284]:
paths = model_paths(dW).numpy()



In [285]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[112, :, :, :], (1, 2, 0)).numpy()

array([[[-0.0005017,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [ 0.000991,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [ 0.001027,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [-0.0002361,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan]],

       [[   0.9999,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [        1,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [        1,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan],
        [   0.9999,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan,       nan]],

       [[      nan,       nan,    