## M203 Electronic markets project

### Exercise 4 - Deep learning approach to optimal liquidation

Marchessaux François, Collin Thibault

### Loading libraries and initializing parameters

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers### Loading libraries and initializing parameters
import numpy as np
import plotly.express as px
import pandas as pd

### Splitting the deep learning algorithm into several subfunctions

In [None]:
def create_model():
    """
    Builds the artificial neural network architecture
    """
    
    model = keras.Sequential([
        keras.Input(shape=(3,)),
        layers.Dense(50, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0001)),
        layers.BatchNormalization(),
        layers.Dropout(0.8),
        layers.Dense(50, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0001)),
        layers.BatchNormalization(),
        layers.Dropout(0.8),
        layers.Dense(1, activation="sigmoid")])
    
    return model

In [None]:
def plot_simulation_results(simulation_results):
    """
    Plots the liquidation strategies resulting from the neural network optimizarion
    """
    
    df = pd.DataFrame(simulation_results)
    
    fig = px.line(df, x='t', y='x', color='lbd', template='plotly_white',
                  labels={"x": "Remaining Inventory", "t": "Time", "lbd": "Market Aversion"})
    
    fig.update_layout(width=900, height=300)
    fig.update_layout(xaxis=dict(range=[0, 1]))
    
    fig.show()

In [None]:
def run_episode(order, model, optimizer, n_traj, n_time_steps, x0, lbd_value, sigma, eta, action_noise_std, episode, sto_vol):
    """
    Runs one 'epoch' of the global network so as to learn the optimal strategy
    """
    
    with tf.GradientTape() as tape:

        # Initializing parameters for trajectory generation
        r = np.zeros((n_time_steps, n_traj), dtype=np.float32)
        x = tf.zeros((n_time_steps, n_traj), dtype=tf.float32)
        log_density = tf.zeros((n_time_steps, n_traj), dtype=tf.float32)

        # Initializing the action grid
        x = tf.tensor_scatter_nd_update(x, [[0]], [x0 * tf.ones(n_traj)])
        
        # Parameters for the CIR model
        dt = 1 / n_time_steps  # Time step size (assuming 252 trading days in a year)
        kappa = 2  # Speed of mean reversion
        theta = 0.2  # Long-term mean volatility
        sigma_vol = 0.4  # Volatility of volatility (sigma in the CIR model)
        V0 = sigma ** 2

        # Pre-allocate the sigma array
        sigmas = np.zeros((n_time_steps, 1), dtype=np.float32)
        
        np.random.seed(42)

        # CIR stochastic volatility process
        if sto_vol:
            sigmas[0] = V0
            Z = np.random.normal(size=(n_time_steps - 1, 1))
            for t in range(1, n_time_steps):
                sigmas[t] = sigmas[t-1] + kappa * (theta - sigmas[t-1]) * dt + sigma_vol * np.sqrt(np.maximum(sigmas[t-1], 0)) * np.sqrt(dt) * Z[t-1]
        else:
            sigmas.fill(np.sqrt(sigma))

        for i in range(1, n_time_steps):

            # Filling the current normalized time and spatial position
            current_time = tf.fill([n_traj], i / n_time_steps, name='current_time')
            current_position = x[i-1] / x0

            # Input time and normalized position into the policy network
            inputs = tf.stack([current_time, current_position, tf.fill([n_traj], lbd_value)], axis=1)

            # Neural network predicts the action to take, without noise
            output_no_noise = model(inputs)[:, 0]

            # Convert neural network output to a proposed selling action
            proposed_sell_amount = output_no_noise * x[i-1]

            # Add exploration noise
            noise = action_noise_std * proposed_sell_amount * tf.random.normal(shape=(n_traj,))
            sell_amount = proposed_sell_amount + noise

            # Inventory constraints
            sell_amount = tf.maximum(sell_amount, 0.0)
            sell_amount = tf.minimum(sell_amount, x[i-1])

            # Inventory update
            new_inventory = x[i-1] - sell_amount
            x = tf.tensor_scatter_nd_update(x, [[i]], [new_inventory])

            log_density_i = -((sell_amount - output_no_noise) / (x0 * action_noise_std)) ** 2 / 2
            log_density = tf.tensor_scatter_nd_update(log_density, [[i]], [log_density_i])

            x_dot = (x[i] - x[i-1])

            if order == "IS":
                inventory_penalty = lbd_value * (sigmas[i] ** 2) * (x[i] ** 2)
                liquidation_penalty = eta * (x_dot ** 2)

            if order == "TC":
                inventory_penalty = lbd_value * (sigmas[i] ** 2) * ((x[i] - x0) ** 2)
                liquidation_penalty = eta * (x_dot ** 2)
            
            if order == "POV":
                v = np.sqrt(((x0 ** 2) * lbd_value * (sigmas[i] ** 2)) / (3 * eta))
                inventory_penalty = (lbd_value * (sigmas[i] ** 2) * (x0 ** 2)) / (3 * v)
                liquidation_penalty = eta * v
            
            if i == n_time_steps - 1:
                final_penalty = tf.where(x[i] > 0, 10e18 * tf.ones_like(x[i]), tf.zeros_like(x[i]))
            else:
                final_penalty = tf.zeros_like(x[i])

            r_i = - (inventory_penalty + liquidation_penalty + final_penalty)                
            r = tf.tensor_scatter_nd_update(r, [[i]], [r_i])

        payoff = tf.reverse(tf.cumsum(tf.reverse(r, axis=[0]), axis=0), axis=[0])

        loss = -tf.reduce_mean(log_density * tf.cast(payoff, dtype=tf.float32))
            
    return loss

In [None]:
def train_model(order, model, optimizer, episodes, sto_vol, **kwargs):
    """
    Trains the model by iteratively fitting new optimal strategies through several 'epochs'
    """
    
    for episode in range(episodes):
        loss = run_episode(order, model, optimizer, **kwargs, episode=episode, sto_vol=sto_vol)
            
    return model

In [None]:
def simulate_trajectory_for_lambda(lbd_value, model, n_time_steps=20, T=1, x0=1):
    """
    Simulate for one value of risk aversion
    """
    
    strat = np.zeros(n_time_steps)
    strat[0] = x0
    lamb = lbd_value
    t_values = np.linspace(0, T, n_time_steps)
    
    results = [{'t': 0, 'x': strat[0], 'lbd': lamb}]
    
    for i, t in enumerate(t_values):
        if i == 0:
            continue

        inputs = tf.convert_to_tensor([[t, strat[i-1]/x0, lamb]], dtype=tf.float32)
        action = model(inputs)[0, 0].numpy()
        sell_fraction = tf.clip_by_value(action, 0, 1)
        proposed_sell_amount = sell_fraction * strat[i-1]

        strat[i] = max(strat[i-1] - proposed_sell_amount, 0)
        results.append({'t': t, 'x': strat[i], 'lbd': lamb})
    
    return results

In [None]:
def run_simulations_for_lambdas(lambda_values, model, n_time_steps, T, x0):
    """
    Simulates multiple trajectories for multiple lambdas, but considering the same exact model
    """
    
    all_results = []
    
    for lamb in lambda_values:
        simulation_results = simulate_trajectory_for_lambda(lamb, model, n_time_steps, T, x0)
        for result in simulation_results: result['lbd'] = lamb
        all_results.extend(simulation_results)
        
    return all_results

In [None]:
def run_simulation(lr=1e-3, n_traj=100, n_time_steps=20, x0=1, sigma=0.2, eta=1.1e-4, episodes=100, order="IS"):
    """
    Runs the global deep learning algorithm for all lambda parameters, and given initializing market parameters
    """
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    model = create_model()
    
    # Training risk aversion parameter
    lbd_value = 0.01
    action_noise_std = 0.0
    T = 1
    
    # Model training
    trained_model = train_model(
        order, model, optimizer, episodes, 
        n_traj=n_traj, n_time_steps=n_time_steps, x0=x0, 
        lbd_value=lbd_value, sigma=sigma, eta=eta, action_noise_std=action_noise_std, sto_vol=False)
    
    # Running simulations
    lambda_values = [0.0001, 0.001, 0.01, 1, 10]
    all_results = run_simulations_for_lambdas(lambda_values, trained_model, n_time_steps, T, x0)
    
    return all_results

### Plotting the resulting optimal liquidation strategy for both orders

In [None]:
simulation_results = run_simulation(order="IS")

In [None]:
plot_simulation_results(simulation_results)

### Plotting the optimal liquidation strategy given stochastic volatility of the asset

In [None]:
def run_simulation_sto_vol(lr=1e-3, n_traj=100, n_time_steps=20, x0=1, sigma=0.2, eta=1.1e-4, episodes=100, order="IS"):
    """
    Leveraging ANNs to introduce stochastic diffusion for the tradeable asset volatility
    """
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    model = create_model()
    
    # Training risk aversion parameter
    lbd_value = 0.01
    action_noise_std = 0.0
    T = 1
    
    # Model training
    trained_model = train_model(
        order, model, optimizer, episodes, 
        n_traj=n_traj, n_time_steps=n_time_steps, x0=x0, 
        lbd_value=lbd_value, sigma=sigma, eta=eta, action_noise_std=action_noise_std, sto_vol=True)
    
    # Running simulations
    lambda_values = [0.0001, 0.001, 0.01, 1, 10]
    all_results = run_simulations_for_lambdas(lambda_values, trained_model, n_time_steps, T, x0)
    
    return all_results

In [None]:
simulation_results = run_simulation_sto_vol(order="IS")

In [None]:
plot_simulation_results(simulation_results)