# FBSDE

Ji, Shaolin, Shige Peng, Ying Peng, and Xichuan Zhang. “Three Algorithms for Solving High-Dimensional Fully-Coupled FBSDEs through Deep Learning.” ArXiv:1907.05327 [Cs, Math], February 2, 2020. http://arxiv.org/abs/1907.05327.

In [180]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, Reshape, concatenate, Layer
from keras import Model, initializers
from keras.callbacks import ModelCheckpoint
from keras.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [181]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  3


In [303]:
n_paths = 4096
n_timesteps = 25
n_dimensions = 100
n_factors = 100
T = 1.
dt = T / n_timesteps
batch_size = n_paths
epochs = 1000

# Initial value layer

In [304]:
class InitialValue(Layer):
    
    def __init__(self, y0):
        super().__init__()
        self.y0 = tf.Variable(y0, trainable=True)
    
    def call(self, inputs):
        print(type(inputs))
        return self.y0

In [305]:
inputs = Input(shape=(1,))
y0 = InitialValue(10.)(inputs)
outputs = Lambda(lambda x: x[0] + x[1])([y0, inputs])
model = Model([inputs], outputs)
model.compile(loss='mse', optimizer='adam')
model.fit(tf.zeros((10000,)), tf.random.normal((10000,)), epochs=1)

<class 'tensorflow.python.framework.ops.Tensor'>
<class 'tensorflow.python.framework.ops.Tensor'>
The following Variables were used a Lambda layer's call (lambda_52), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=() dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
<class 'tensorflow.python.framework.ops.Tensor'>


<tensorflow.python.keras.callbacks.History at 0x7f5e610542b0>

# Coefficients

In [306]:
def b(t, x, y, z):
    return tf.fill((n_dimensions,), 0.)

def s(t, x, y, z):
    # discard non-diagonal elements of z
    return n_dimensions * tf.exp(-1/n_dimensions * tf.reduce_sum(x)) * tf.linalg.diag(tf.linalg.diag_part(z))

def f(t, x, y, z):
    # take only diagonal elements of z
    return tf.repeat(tf.exp(-1./n_dimensions * tf.reduce_sum(x)) * tf.reduce_sum(tf.square(tf.linalg.diag_part(z))), n_dimensions)

def g(x):
    return tf.repeat(tf.exp(1./n_dimensions * tf.reduce_sum(x)), n_dimensions)

In [307]:
def dX(t, x, y, z, dw):
    
    def drift(arg):
        x, y, z = arg
        return tf.math.multiply(b(t, x, y, z), dt)
    a0 = tf.vectorized_map(drift, (x, y, z))
        
    def noise(arg):
        x, y, z, dw = arg
        return tf.tensordot(s(t, x, y, z), dw[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, dw))
            
    return a0 + a1

def dY(t, x, y, z, dw):

    def drift(arg):
        x, y, z = arg
        return tf.math.multiply(f(t, x, y, z), dt)
    a0 = tf.vectorized_map(drift, (x, y, z))

    def noise(arg):
        x, y, z, dw = arg
        return tf.tensordot(z, dw[i], [[1], [0]])
    a1 = tf.vectorized_map(noise, (x, y, z, dw))
    
    return a0 + a1

# Model

In [317]:
paths = []

inputs_dW = Input(shape=(n_timesteps, n_factors))

x0 = tf.constant([[1.] * n_dimensions])
y0 = [[2.] * n_dimensions]

x = x0
y = InitialValue(y0)(inputs_dW)

z = concatenate([x, y])
z = Dense(10, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-2), name='z1_0')(z)
z = Dense(n_dimensions * n_factors, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=1e-2), name='z2_0')(z)
z = Reshape((n_dimensions, n_factors), name='reshape_0')(z)

paths += [[x, y, z]]

@tf.function
def hx(r):
    return r[0] + dX(i * dt, r[0], r[1], r[2], r[3])

@tf.function
def hy(r):
    return r[1] + dY(i * dt, r[0], r[1], r[2], r[3])

for i in range(n_timesteps):
    
    x, y = (
        Lambda(hx, name=f'dx_{i}')([x, y, z, inputs_dW]),
        Lambda(hy, name=f'dy_{i}')([x, y, z, inputs_dW]),
    )
    
    # we don't train z for the last time step; keep for consistency
    z = concatenate([x, y])
    z = Dense(10, activation='relu', name=f'z1_{i+1}')(z)
    z = Dense(n_dimensions * n_factors, activation='relu', name=f'z2_{i+1}')(z)
    z = Reshape((n_dimensions, n_factors), name=f'reshape_{i+1}')(z)

    paths += [[x, y, z]]

outputs_loss = Lambda(lambda r: r[1] - tf.vectorized_map(g, r[0]))([x, y])
outputs_paths = tf.stack([tf.stack([p[0] for p in paths[1:]], axis=1), tf.stack([p[1] for p in paths[1:]], axis=1)] + [tf.stack([p[2][:, :, i] for p in paths[1:]], axis=1) for i in range(n_factors)], axis=2)

model_loss = Model(inputs_dW, outputs_loss)
model_loss.compile(loss='mse', optimizer='adam')

# (n_sample, n_timestep, x/y/z_k, n_dimension)
# skips the first time step
model_paths = Model(inputs_dW, outputs_paths)

<class 'tensorflow.python.framework.ops.Tensor'>
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


# Training

In [318]:
dW = tf.sqrt(dt) * tf.random.normal((n_paths, n_timesteps, n_factors))
target = tf.zeros((n_paths, n_dimensions))

In [319]:
# check for exploding gradients before training

with tf.GradientTape() as tape:
    loss = model_loss(dW)
    
variables = model_loss.variables[-1]
tape.gradient(loss, variables)

<class 'tensorflow.python.framework.ops.EagerTensor'>
The following Variables were used a Lambda layer's call (dx_0), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 100) dtype=float32, numpy=array([[        2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,         2,        

<tf.Tensor: shape=(10000,), dtype=float32, numpy=array([      nan,      5.66,     5.942,    -23.54,     42.16,  -0.07529,     8.756,     -32.3,     30.13,    -13.68,    -29.93,     4.265,     18.97,     -39.3,    0.3957,    -11.22,    -3.341,     -11.4,     -4.85,     16.21,     37.96,    -3.696,      20.3,     27.29,     -8.65,    -9.411,     22.47,      41.2,      25.1,     10.14, ...,     20.97,    -32.85,    -4.268,    -23.43,    -12.89,    -14.14,      19.3,    -3.287,     26.91,    -18.63,     17.82,     42.09,     19.93,    -26.41,     7.683,     1.232,   -0.8981,   -0.2112,    -6.968,     5.219,     23.84,     29.57,     17.95,     6.645,     3.439,     18.98,     17.14,    -27.03,    -22.66,       nan], dtype=float32)>

In [None]:
callback = ModelCheckpoint('_models/weights{epoch:03d}.h5', period=1, save_weights_only=True, overwrite=True)
model_loss.fit(dW, target, batch_size=batch_size, epochs=1000, callbacks=[callback])


# Display paths and loss

In [None]:
# load bad model
model_loss.load_weights('_models/weights001.h5')

In [295]:
loss = model_loss(dW).numpy()
loss

<class 'tensorflow.python.framework.ops.EagerTensor'>
The following Variables were used a Lambda layer's call (dx_0), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        2,         2,         2,         2]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
The following Variables were used a Lambda layer's call (dy_0), but
are not present in its tracked objects:
  <tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[        2,         2,         2,         2]], dtype=float32)>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.


array([[  -0.6775,    0.4698,     1.808,     0.314],
       [   -1.284,    0.1527,   -0.5152,   -0.6432],
       [   0.6276,  -0.02144,  -0.05538,     0.198],
       [ -0.01756,   -0.7259,   -0.6613,    0.1513],
       [   -0.837,    -1.986,     -1.49,    -1.563],
       [   -3.305,    -3.932,    -2.581,    -3.619],
       [   0.8182,    -1.574,     -1.22,  -0.03084],
       [    1.065,    0.6896,    0.3729,    0.7629],
       [   -0.405,   -0.8818,    -0.792,   -0.2468],
       [  0.05031,    -2.038,    -1.549,   -0.3511],
       [   -7.949,    -13.99,    -14.17,    -18.09],
       [   0.2198,   -0.1332,    -2.649,   -0.2122],
       [   0.2255,    0.6244,   -0.3724,    0.5045],
       [ -0.08177,     1.052,    -2.819,    -1.141],
       [    1.363,    0.2058,  -0.08906,    0.3074],
       [  -0.2603,   -0.4994,   -0.8175,   -0.8452],
       [  -0.1544,     0.193,   -0.1946,    0.1098],
       [    1.345,     1.538,    -1.182,  -0.07162],
       [   0.0834,   -0.8688,   -0.3108,   -0.

In [299]:
paths = model_paths(dW).numpy()

<class 'tensorflow.python.framework.ops.EagerTensor'>


In [300]:
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%9.4g" % x))
tf.transpose(paths[262, :, :, :], (1, 2, 0)).numpy()

array([[[        1,         1,         1,         1,    0.1636,    -1.572,    -2.478,    -2.478, -3.005e+04,       nan],
        [        1,         1,         1,         1,         1,     1.028,     1.028,     1.028,     1.028,       nan],
        [        1,    0.7938,    0.7938,    0.2201,   -0.4654,   -0.8909,    -1.686,    -10.62,     -2075,       nan],
        [        1,         1,         1,     0.801,     0.801,     0.801,    -4.139,    -16.56,    -16.56,       nan]],

       [[        2,     1.605,     1.605,     1.171,    0.8077,  -0.04179,    0.1739,     0.385,      2758,       inf],
        [        2,     1.819,     1.819,     1.354,    0.5568,    0.3871,   -0.3507,  -0.02773,      2761,       inf],
        [        2,    0.9443,    0.9443,    0.5957,  0.005049,   -0.9188,    -1.236,    -1.056,      2762,       inf],
        [        2,     1.971,     1.971,    0.2938,    0.2504,   -0.3511,    -1.047,    -1.385,      2762,       inf]],

       [[        0,         0,     