## M203 Electronic markets project

### Exercise 3 - Liquidating two assets

Marchessaux François, Collin Thibault

### Loading libraries and initializing parameters

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

### Creating the Bellman optimization backward algorithm

In [2]:
def Bellman_IS_2D(lamb, sigma1, sigma2, eta1, eta2, rho, nb_stocks1, nb_stocks2, nb_periods):
    # Initialize matrices to store rewards and policies for each stock count and period
    reward = np.zeros((nb_stocks1 + 1, nb_stocks2 + 1, nb_periods))
    policy = np.zeros((nb_stocks1 + 1, nb_stocks2 + 1, nb_periods, 2))

    # Iterate through each period in reverse order
    for period in range(nb_periods-1, -1, -1):
        # Iterate through each possible number of stocks for both dimensions
        for stock1 in range(nb_stocks1 + 1):
            for stock2 in range(nb_stocks2 + 1):
                psi1 = stock1
                psi2 = stock2
                x1 = nb_stocks1 - psi1
                x2 = nb_stocks2 - psi2

                if psi1 == nb_stocks1 and psi2 == nb_stocks2:
                    reward[psi1, psi2, period] = 0
                    policy[psi1, psi2, period] = (0, 0)
                else:
                    if period == nb_periods - 1:
                        reward[psi1, psi2, period] = np.inf
                        policy[psi1, psi2, period] = (np.inf, np.inf)
                    elif period == nb_periods - 2:
                        reward[psi1, psi2, period] = -1 * lamb * ((sigma1 ** 2) * (x1 ** 2) + (sigma2 ** 2) * (x2 ** 2))
                        reward[psi1, psi2, period] -= 2 * lamb * sigma1 * sigma2 * x1 * x2 * rho
                        reward[psi1, psi2, period] -= eta1 * (psi1 ** 2) + eta2 * (psi2 ** 2)
                        policy[psi1, psi2, period] = (x1, x2)
                    else:
                        # Calculate potential rewards for all possible actions
                        base_reward = -1 * lamb * ((sigma1 ** 2) * (x1 ** 2) + (sigma2 ** 2) * (x2 ** 2))
                        base_reward -= 2 * lamb * sigma1 * sigma2 * x1 * x2 * rho
                        base_reward -= eta1 * (psi1 ** 2) + eta2 * (psi2 ** 2)
                        base_reward += reward[psi1, psi2, period + 1]
                        best_policy = (0, 0)

                        # Consider all other liquidation scenarios
                        for i in range(x1 + 1):
                            for j in range(x2 + 1):
                                potential_reward = base_reward + reward[psi1 + i, psi2 + j, period + 1]

                                if potential_reward > base_reward:
                                    base_reward = potential_reward
                                    best_policy = (i, j)

                        reward[psi1, psi2, period] = base_reward
                        policy[psi1, psi2, period] = best_policy

    # Determine the optimal trajectory of stock holdings over time
    trajectory = [(nb_stocks1, nb_stocks2)]
    current = (0, 0)

    for period in range(1, nb_periods + 1):
        instant_past = trajectory[-1]
        psi1, psi2 = policy[current[0], current[1], period - 1, 0], policy[current[0], current[1], period - 1, 1]
        current = (current[0] + int(psi1), current[1] + int(psi2))
        remaining_stock1 = instant_past[0] - psi1
        remaining_stock2 = instant_past[1] - psi2
        trajectory.append((remaining_stock1, remaining_stock2))

    return trajectory

### Running our algorithms

In [21]:
x0_1 = 100
x0_2 = 0

sigma_1 = 0.20
sigma_2 = 0.20
rho = 0.9

steps = 25

eta_1 = 5 * 1e-2
eta_2 = 5
lambdas = [5e-4, 1e-3, 1e-2 , 1e-1, 1, 10]

In [22]:
tab = {'t':[],'x1_t':[], 'x2_t':[], 'lambda':[]}
for lamb_value in lambdas:
    traj = Bellman_IS_2D(lamb_value, sigma_1, sigma_2, eta_1, eta_2, rho, x0_1, x0_2, steps)
    tab['t'].extend(range(len(traj)))
    tab['x1_t'].extend([x[0] for x in traj])
    tab['x2_t'].extend([x[1] for x in traj])
    tab['lambda'].extend([str(lamb_value) for i in range(len(traj))])

df = pd.DataFrame(tab)
fig = px.line(df, x='t', y='x1_t', color='lambda', template='plotly_white',
              labels={"x1_t": "Asset 1 Remaining Inventory", "t": "Time", "lambda": "Risk Aversion"})

fig.update_traces(hoverinfo='skip')
fig.update_traces(hovertemplate=None)
fig.update_layout(width=900, height=300)
fig.show()

fig = px.line(df, x='t', y='x2_t', color='lambda', template='plotly_white',
              labels={"x2_t": "Asset 2 Remaining Inventory", "t": "Time", "lambda": "Risk Aversion"})
fig.update_traces(hoverinfo='skip')
fig.update_traces(hovertemplate=None)
fig.update_layout(width=900, height=300)
fig.show()