# Import and Utils

We import the necessary libraries and some util functions.

- `compute_black_scholes_delta`: computes the Black-Scholes delta for a given sequence of prices.
- `compute_prices`: computes the stock prices at each day given the jump sequences and other parameters.
- `compute_rewards`: computes the rewards given a sequence of prices and deltas.
- `compute_utility`: computes the utility of the sequence of rewards using exponential utility.

In [1]:
import sys

if "google.colab" in sys.modules:
  ! rm -rf deep-hedging
  ! git clone https://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@github.com/SnehalRaj/jpmc-qcware-deephedging deep-hedging
  ! cp -r deep-hedging/* .

In [2]:
import jax
import jax.numpy as jnp
import pickle
import numpy as np
np.set_printoptions(formatter={'float': "{0:0.3f}".format})
def load_params(file_name):
    with open(file_name, "rb") as f:
        params = pickle.load(f)
        # convert NP arrays to Jax arrays
        return jax.tree_util.tree_map(lambda param: jnp.array(param), params)
def compute_prices(
    seq_jumps,
    *,
    num_trading_days=252,
    mu=0.0,
    sigma=0.5,
    initial_price=100.0,
):
    num_jumps = 1
    bernoulli_prob = 0.5
    seq_jumps = seq_jumps - bernoulli_prob  # mean 0
    seq_jumps /= np.sqrt(bernoulli_prob * (1 - bernoulli_prob))  # std 1
    num_paths, num_days = seq_jumps.shape
    seq_jumps = seq_jumps.reshape(num_paths, num_days * num_jumps)
    brownian = jnp.cumsum(seq_jumps, axis=1)
    brownian /= np.sqrt(num_jumps * num_trading_days)
    t = jnp.arange(1, 1 + num_days) / num_trading_days
    log_prices = (mu - sigma**2 / 2) * t + sigma * brownian
    seq_prices = jnp.exp(log_prices)
    seq_prices = jnp.concatenate([jnp.ones((num_paths, 1)), seq_prices], axis=1)
    seq_prices *= initial_price
    return seq_prices


def compute_rewards(seq_prices, seq_deltas, *, strike=0.9, cost_eps=0.0):
    seq_actions = [
        seq_deltas[:, [0]],
        seq_deltas[:, 1:] - seq_deltas[:, :-1],
        -seq_deltas[:, [-1]],
    ]
    seq_actions = jnp.concatenate(seq_actions, axis=1)
    payoff = -jnp.maximum(seq_prices[:, -1] - strike * seq_prices[:, 0], 0.0)
    costs = -(jnp.abs(seq_actions) * cost_eps + seq_actions) * seq_prices
    seq_rewards = costs.at[:, -1].add(payoff)
    return seq_rewards
def compute_utility(seq_rewards, *, utility_lambda=1.0):
    returns = seq_rewards.sum(axis=1)
    utility = (
        -1 / utility_lambda * jnp.log(jnp.mean(jnp.exp(-utility_lambda * returns)))
    )
    return utility

def compute_black_scholes_deltas(
    seq_prices,
    *,
    num_days=8,
    num_trading_days=252,
    mu=0.0,
    sigma=0.5,
    strike=1.0,
):
    seq_prices = seq_prices[..., None]
    seq_prices = seq_prices[:, :-1]
    strike_price = seq_prices[0, 0] * strike
    T = jnp.arange(1, num_days + 1) / num_trading_days
    T = jnp.repeat(jnp.flip(T[None, :]), seq_prices.shape[0], 0)
    d1 = jnp.divide(
        jnp.log(seq_prices[..., 0] / strike_price) + (mu + 0.5 * sigma**2) * T,
        sigma * jnp.sqrt(T),
    )
    seq_deltas = jax.scipy.stats.norm.cdf(d1, 0.0, 1.0)
    return seq_deltas

# Loading actions from hardware experiments

We create jnp arrays from the actions we observed from hardware experiments and their respective classical simulations. These deltas are combined from the notebooks:
- `JPMC_Part_2_inference_hardware.ipynb`
- `JPMC_Part_2_inference_hardware_2.ipynb`
- `JPMC_Part_2_inference_hardware_3.ipynb`

In [3]:
# Deltas for hardware experiments and classical simulation of expected models

classical_deltas_exp = load_params("data/classical_deltas_exp")
hardware_deltas_exp =  load_params("data/hardware_deltas_exp")

# Deltas for hardware experiments and classical simulation of expected models

classical_deltas_dist =  load_params("data/classical_deltas_dist")
hardware_deltas_dist =  load_params("data/hardware_deltas_dist")



# Load data for inference

In [4]:
# Load seq of jumps for 10 days for hardware experiments
batch_jumps = jnp.array(load_params('data/seq_jumps_10_days_hardware_exp'))

# Pick a random sample of jumps on the final day. As we already know, the 
# price at the final day has no consequence on the actions. We just pick
# a random sample of jumps on the final day and save it here for reproducibility.
day_jumps = load_params("data/final_day_jumps_10_days_hardware_exp")

seq_jumps = jnp.concatenate([batch_jumps, day_jumps], axis=-1)

In [5]:


seq_prices = compute_prices(
            seq_jumps,
            num_trading_days=30,
            mu=0.,
            sigma=0.2,
            initial_price=100.,
        )

bs_deltas = compute_black_scholes_deltas(
    seq_prices,
    num_days=10,
    num_trading_days=30,
    mu=0.0,
    sigma=0.2,
    strike=1.0,
)

seq_rewards_bs = compute_rewards(
            seq_prices, jnp.stack(bs_deltas.T,axis=1), strike=1., cost_eps=0.002
        )
pnl_bs = seq_rewards_bs.sum(axis=1)

utility_bs = compute_utility(seq_rewards_bs, utility_lambda=0.1)

print(f'PnL = {pnl_bs}')
print(f'Utility = {utility_bs}')

PnL = [-4.602 -5.373 -5.441 -4.263 -5.173 -5.036 -4.139 -4.962]
Utility = -4.883941650390625


In [6]:

seq_rewards_exp_sim = compute_rewards(
            seq_prices, jnp.stack(classical_deltas_exp,axis=1), strike=1., cost_eps=0.002
        )
seq_rewards_exp_hw = compute_rewards(
            seq_prices, jnp.stack(hardware_deltas_exp,axis=1), strike=1., cost_eps=0.002
        )

pnl_exp_sim = seq_rewards_exp_sim.sum(axis=1)
pnl_exp_hw = seq_rewards_exp_hw.sum(axis=1)

utility_exp_sim = compute_utility(seq_rewards_exp_sim, utility_lambda=0.1)
utility_exp_hw = compute_utility(seq_rewards_exp_hw, utility_lambda=0.1)

print(f'Simulator PnL = {pnl_exp_sim}')
print(f'Hardware PnL = {pnl_exp_hw}')

print(f'Simulator Utility = {utility_exp_sim}')
print(f'Hardware Utility = {utility_exp_hw}')

Simulator PnL = [0.082 -6.207 -3.651 0.967 -6.770 -4.740 2.286 -6.701]
Hardware PnL = [0.002 -6.074 -3.634 0.888 -6.736 -4.804 2.256 -6.830]
Simulator Utility = -3.6507270336151123
Hardware Utility = -3.667651414871216


In [7]:

seq_rewards_dist_sim = compute_rewards(
            seq_prices, jnp.stack(classical_deltas_dist,axis=1), strike=1., cost_eps=0.002
        )
seq_rewards_dist_hw = compute_rewards(
            seq_prices, jnp.stack(hardware_deltas_dist,axis=1), strike=1., cost_eps=0.002
        )

pnl_dist_sim = seq_rewards_dist_sim.sum(axis=1)
pnl_dist_hw = seq_rewards_dist_hw.sum(axis=1)

utility_dist_sim = compute_utility(seq_rewards_dist_sim, utility_lambda=0.1)
utility_dist_hw = compute_utility(seq_rewards_dist_hw, utility_lambda=0.1)

print(f'Simulator PnL = {pnl_dist_sim}')
print(f'Hardware PnL = {pnl_dist_hw}')

print(f'Simulator Utility = {utility_dist_sim}')
print(f'Hardware Utility = {utility_dist_hw}')

Simulator PnL = [-1.813 -8.317 -5.111 1.469 -2.728 -4.790 0.391 -3.948]
Hardware PnL = [-1.799 -8.213 -4.940 1.364 -2.995 -4.669 0.366 -4.206]
Simulator Utility = -3.541973829269409
Hardware Utility = -3.55318546295166
