In [12]:
import itertools
import sys
import warnings
from math import factorial
from typing import Callable, NamedTuple

import jax
import jax.numpy as jnp
import numpy as np
import optax
import pickle

warnings.filterwarnings("ignore")
np.set_printoptions(formatter={'float': "{0:0.3f}".format})

class Agent(NamedTuple):
    init: Callable
    train_step: Callable
    eval_step: Callable

def load_params(file_name):
    with open(file_name, "rb") as f:
        params = pickle.load(f)
        # convert NP arrays to Jax arrays
        return jax.tree_util.tree_map(lambda param: jnp.array(param), params)

def compute_black_scholes_deltas(
    seq_prices,
    *,
    num_days=8,
    num_trading_days=252,
    mu=0.0,
    sigma=0.5,
    strike=1.0,
):
    seq_prices = seq_prices[..., None]
    seq_prices = seq_prices[:, :-1]
    strike_price = seq_prices[0, 0] * strike
    T = jnp.arange(1, num_days + 1) / num_trading_days
    T = jnp.repeat(jnp.flip(T[None, :]), seq_prices.shape[0], 0)
    d1 = jnp.divide(
        jnp.log(seq_prices[..., 0] / strike_price) + (mu + 0.5 * sigma**2) * T,
        sigma * jnp.sqrt(T),
    )
    seq_deltas = jax.scipy.stats.norm.cdf(d1, 0.0, 1.0)
    return seq_deltas


def compute_prices(
    seq_jumps,
    *,
    num_trading_days=252,
    mu=0.0,
    sigma=0.5,
    initial_price=100.0,
):
    num_jumps = 1
    bernoulli_prob = 0.5
    seq_jumps = seq_jumps - bernoulli_prob  # mean 0
    seq_jumps /= np.sqrt(bernoulli_prob * (1 - bernoulli_prob))  # std 1
    num_paths, num_days = seq_jumps.shape
    seq_jumps = seq_jumps.reshape(num_paths, num_days * num_jumps)
    brownian = jnp.cumsum(seq_jumps, axis=1)
    brownian /= np.sqrt(num_jumps * num_trading_days)
    t = jnp.arange(1, 1 + num_days) / num_trading_days
    log_prices = (mu - sigma**2 / 2) * t + sigma * brownian
    seq_prices = jnp.exp(log_prices)
    seq_prices = jnp.concatenate([jnp.ones((num_paths, 1)), seq_prices], axis=1)
    seq_prices *= initial_price
    return seq_prices


def compute_rewards(seq_prices, seq_deltas, *, strike=0.9, cost_eps=0.0):
    seq_actions = [
        seq_deltas[:, [0]],
        seq_deltas[:, 1:] - seq_deltas[:, :-1],
        -seq_deltas[:, [-1]],
    ]
    seq_actions = jnp.concatenate(seq_actions, axis=1)
    payoff = -jnp.maximum(seq_prices[:, -1] - strike * seq_prices[:, 0], 0.0)
    costs = -(jnp.abs(seq_actions) * cost_eps + seq_actions) * seq_prices
    seq_rewards = costs.at[:, -1].add(payoff)
    return seq_rewards


def compute_bounds(
    num_days=8,
    num_trading_days=252,
    mu=0.0,
    sigma=0.5,
    initial_price=100.0,
    strike=0.9,
    cost_eps=0.0,
):
    # TODO: add cost_eps
    jumps_max = jnp.ones((num_days))
    jumps_min = jnp.zeros((num_days))
    seq_jumps = jnp.stack([jumps_min, jumps_max], axis=0)
    prices_min, prices_max = compute_prices(
        seq_jumps,
        num_trading_days=num_trading_days,
        mu=mu,
        sigma=sigma,
        initial_price=initial_price,
    )
    payoffs_min = -jnp.maximum(prices_max - strike * initial_price, 0)
    values_max = (2 * (prices_max - strike * initial_price))[::-1][:-1]
    values_min = (2 * (prices_min - strike * initial_price) + payoffs_min)[::-1][:-1]
    Gt_range = jnp.stack((values_min, values_max), axis=0)
    return Gt_range


def compute_returns(seq_rewards):
    seq_returns = jnp.cumsum(seq_rewards[:, ::-1], axis=1)[:, ::-1]
    return seq_returns


def compute_utility(seq_rewards, *, utility_lambda=1.0):
    returns = seq_rewards.sum(axis=1)
    utility = (
        -1 / utility_lambda * jnp.log(jnp.mean(jnp.exp(-utility_lambda * returns)))
    )
    return utility




In [13]:
def get_square_idxs(num_qubits, num_layers=None):
    if num_layers is None:
        num_layers = 1+int(np.log2(num_qubits))
    rbs_idxs = [[(i,i+1) for i in range(0,num_qubits-1,2)]]
    rbs_idxs += [[(i,i+1) for i in range(1,num_qubits-1,2)]]
    return rbs_idxs * num_layers


def make_ortho_fn(rbs_idxs, num_qubits):
    rbs_idxs = [list(map(list, rbs_idx)) for rbs_idx in rbs_idxs]
    len_idxs = np.cumsum([0] + list(map(len, rbs_idxs)))
    def get_rbs_unary(theta):
        cos_theta, sin_theta = jnp.cos(theta), jnp.sin(theta)
        unary = jnp.array(
            [
                [cos_theta, sin_theta],
                [-sin_theta, cos_theta],
            ]
        )
        unary = unary.transpose(*[*range(2, unary.ndim), 0, 1])
        return unary
    def get_parallel_rbs_unary(thetas):
        unitaries = []
        for i, idxs in enumerate(rbs_idxs):
            idxs = sum(idxs, [])
            sub_thetas = thetas[len_idxs[i] : len_idxs[i + 1]]
            rbs_blocks = get_rbs_unary(sub_thetas)
            eye_block = jnp.eye(num_qubits - len(idxs), dtype=thetas.dtype)
            permutation = idxs + [i for i in range(num_qubits) if i not in idxs]
            permutation = np.argsort(permutation)
            unary = jax.scipy.linalg.block_diag(*rbs_blocks, eye_block)
            unary = unary[permutation][:, permutation]
            unitaries.append(unary)
        unitaries = jnp.stack(unitaries)
        return unitaries

    def orthogonal_fn(thetas):
        unitaries = get_parallel_rbs_unary(thetas)
        if len(unitaries) > 1:
            unary = jnp.linalg.multi_dot(unitaries[::-1])
        else:
            unary = unitaries[0]
        return unary[::-1][:,::-1]

    return orthogonal_fn


def compute_compound(unary, order=1):
    num_qubits = unary.shape[-1]
    if (order == 0) or (order == num_qubits):
        return jnp.ones((1, 1))
    elif order == 1:
        return unary
    else:
        subsets = list(itertools.combinations(range(num_qubits), order))
        compounds = unary[subsets, ...][..., subsets].transpose(0, 2, 1, 3)
        compound = jnp.linalg.det(compounds)
    return compound

def decompose_state(state):
    num_qubits = int(np.log2(state.shape[-1]))
    batch_dims = state.shape[:-1]
    state = state.reshape(-1, 2**num_qubits)
    idxs = list(itertools.product(*[[0, 1]] * num_qubits))
    subspace_idxs = [
        [
            int((2**np.array(bla)).sum())
            for bla in itertools.combinations(range(num_qubits), weight)
        ]
        for weight in range(num_qubits + 1)
    ]
    subspace_states = [
        state[..., subspace_idxs[weight]] for weight in range(num_qubits + 1)
    ]
    alphas = [
        jnp.linalg.norm(subspace_state, axis=-1) for subspace_state in subspace_states
    ]
    betas = [
        subspace_state / (alpha[..., None] + 1e-6)
        for alpha, subspace_state in zip(alphas, subspace_states)
    ]
    alphas = [alpha.reshape(*batch_dims, -1) for alpha in alphas]
    betas = [beta.reshape(*batch_dims, -1) for beta in betas]
    alphas = jnp.stack(alphas, -1)[..., 0, :]
    return alphas, betas


In [16]:
from hardware_utils import prepare_circuit_compound, run_circuit_compound
import config
def make_agent(
    num_days=14,
    num_jumps=1,
    num_trading_days=252,
    mu=0.0,
    sigma=0.2,
    initial_price=100.0,
    strike=1.0,
    cost_eps=0.0,
    train_num_paths=32,
    eval_num_paths=32,
    utility_lambda=0.1,
    model="vanilla",
):
    bernoulli_prob = 0.5

    def net_fn_apply(params, key, batch_jumps):
        for time_step in range(num_days):
            seq_jumps = batch_jumps[:,:time_step]
            num_qubits = num_days - time_step + 2
            depth = 2 * max(1,time_step) * int(np.log2(num_qubits))
            if depth <= 10:
                num_layers = int(np.log2(num_qubits))
            else:
                num_layers = max(int(np.log2(num_qubits))//2,1)
            rbs_idxs = get_square_idxs(num_qubits,num_layers=num_layers)
            num_params = sum(map(len, rbs_idxs))
            if time_step == 0:
                thetas_shape = (1, num_params)
            else:
                thetas_shape = (2 * time_step, num_params)
            thetas = params[0]["actor_thetas_{}".format(time_step)]
            state = jnp.ones((2 ** (num_days - time_step),)) / np.sqrt(
                2 ** (num_days - time_step)
            )
            state = jnp.kron(state, jnp.array([0.0, 1.0, 0.0, 0.0]))
            alphas, betas = decompose_state(state)
            thetas = thetas.reshape(-1, num_params)
            unaries = jax.vmap(make_ortho_fn(rbs_idxs, num_qubits))(thetas)
            if time_step == 0:
                seq_unaries = jnp.repeat(unaries, seq_jumps.shape[0], axis=0)
            else:
                unaries = unaries.reshape(2, time_step, num_qubits, num_qubits)
                seq_unaries = jnp.einsum("bt,tij->btij", seq_jumps, unaries[1])
                seq_unaries += jnp.einsum("bt,tij->btij", 1 - seq_jumps, unaries[0])
                if time_step > 1:
                    seq_unaries = jax.vmap(jnp.linalg.multi_dot)(seq_unaries[:,::-1,:,:])
                else:
                    seq_unaries = seq_unaries[:, 0]
            compounds = [
                jax.vmap(compute_compound, in_axes=(0, None))(seq_unaries, order)
                for order in range(num_qubits + 1)
            ]
            deltas_betas = [compound @ beta for compound, beta in zip(compounds, betas)]
            deltas_ranges = [(0, 1) for _ in range(len(deltas_betas))]
            deltas_dist = [
                beta**2 @ jnp.linspace(*delta_range, beta.shape[-1])
                for beta, delta_range in zip(deltas_betas, deltas_ranges)
            ]
            deltas_exp = [alpha**2 * dist for alpha, dist in zip(alphas, deltas_dist)]
            deltas_exp = jnp.array(deltas_exp).sum(0)
            if time_step == 0:
                seq_deltas_exp = [deltas_exp]
            else:
                seq_deltas_exp.append(deltas_exp)
        return (
            seq_jumps,
            seq_deltas_exp,
        )


    def hardware_net_fn_apply(params, key, batch_jumps, device_id, backend_name):
        for time_step in range(num_days):
            seq_jumps = batch_jumps[:,:time_step]
            num_qubits = num_days - time_step + 2
            depth = 2 * max(1,time_step) * int(np.log2(num_qubits))
            if depth <= 10:
                num_layers = int(np.log2(num_qubits))
            else:
                num_layers = max(int(np.log2(num_qubits))//2,1)
            rbs_idxs = get_square_idxs(num_qubits,num_layers=num_layers)
            num_params = sum(map(len, rbs_idxs))
            if time_step == 0:
                thetas_shape = (1, num_params)
            else:
                thetas_shape = (2 * time_step, num_params)
            thetas = params[0]["actor_thetas_{}".format(time_step)]
            state = jnp.ones((2 ** (num_days - time_step),)) / np.sqrt(
                2 ** (num_days - time_step)
            )
            state = jnp.kron(state, jnp.array([0.0, 1.0, 0.0, 0.0]))
            alphas, betas = decompose_state(state)
            # Begin Quantum-HW
            circs = []
            for jumps in seq_jumps:
                circs.append(prepare_circuit_compound(rbs_idxs, time_step, num_qubits, jumps, thetas))
            results = jnp.array(run_circuit_compound(circs,num_qubits,device_id, backend_name))
            deltas_alphas, deltas_betas = decompose_state(results)
            # End Quantum-HW
            deltas_ranges = [(0, 1) for _ in range(len(deltas_betas))]
            deltas_dist = [
                beta**2 @ jnp.linspace(*delta_range, beta.shape[-1])
                for beta, delta_range in zip(deltas_betas, deltas_ranges)
            ]
            deltas_exp = [alpha**2 * dist for alpha, dist in zip(alphas, deltas_dist)]
            deltas_exp = jnp.array(deltas_exp).sum(0)
            if time_step == 0:
                seq_deltas_exp = [deltas_exp]
            else:
                seq_deltas_exp.append(deltas_exp)
        
        return (
            seq_jumps,
            seq_deltas_exp,
        )
    
    def eval_step(params, batch_jumps, device_id, backend_name):
        key = jax.random.PRNGKey(123)
        keys = jax.random.split(key, 4)
        net_params = params
        
        seq_jumps, seq_deltas_exp = net_fn_apply(net_params, keys[0], batch_jumps)
        seq_jumps, seq_deltas_exp_hw = hardware_net_fn_apply(net_params, keys[0], batch_jumps, device_id, backend_name)

        if backend_name[-1] == "E":
            final_day_prob = 0.5
        else:
            final_day_prob = 1.
        day_jumps = jax.random.bernoulli(
            keys[1], final_day_prob, (seq_jumps.shape[0], 1)
        )
        seq_jumps = jnp.concatenate([seq_jumps, day_jumps], axis=-1)
        seq_prices = compute_prices(
            seq_jumps,
            num_trading_days=num_trading_days,
            mu=mu,
            sigma=sigma,
            initial_price=initial_price,
        )
        seq_deltas_hw = jnp.stack(seq_deltas_exp_hw,axis=1)
        seq_deltas = jnp.stack(seq_deltas_exp, axis=1)
        seq_rewards = compute_rewards(
            seq_prices, seq_deltas, strike=strike, cost_eps=cost_eps
        )
        seq_bs_deltas = compute_black_scholes_deltas(
            seq_prices,
            num_days=num_days,
            num_trading_days=num_trading_days,
            mu=mu,
            sigma=sigma,
            strike=strike,
        )
        seq_rewards = compute_rewards(
            seq_prices, seq_deltas, strike=strike, cost_eps=cost_eps
        )
        seq_hw_rewards = compute_rewards(
            seq_prices, seq_deltas_hw, strike=strike, cost_eps=cost_eps
        ) 
        seq_bs_rewards = compute_rewards(
            seq_prices, seq_bs_deltas, strike=strike, cost_eps=cost_eps
        )
        returns = seq_rewards.sum(axis=1)
        hw_returns = seq_hw_rewards.sum(axis=1)
        bs_returns = seq_bs_rewards.sum(axis=1)
        metrics = {
            "returns": jnp.array(returns),
            "hw_returns": jnp.array(hw_returns),
            "seq_deltas": jnp.array(seq_deltas_exp),
            "seq_deltas_hw": jnp.array(seq_deltas_exp_hw),
        }
        utility_lambda = 1E-1
        utility = compute_utility(seq_rewards, utility_lambda=utility_lambda)
        hw_utility = compute_utility(seq_hw_rewards, utility_lambda=utility_lambda)
        bs_utility = compute_utility(seq_bs_rewards, utility_lambda=utility_lambda)
        metrics[f'U_{utility_lambda}'] = utility
        metrics[f'U_hw_{utility_lambda}'] = hw_utility
        metrics[f'U_bs_{utility_lambda}'] = bs_utility
        metrics['seq_prices'] = seq_prices
        return metrics

    return Agent(init=None, train_step=None, eval_step=eval_step)


def experiment(hparams, seed, params_save_loc, jumps_save_loc, device_id, backend_name):
    global_number_of_circuits_executed = 0
    global_hardware_run_results_dict = {
        'model_type' : hparams["model"],
        'measurementRes' : None,
        'epsilon' : hparams["cost_eps"],
        'layer_type' : 'actor-critic',
        'backend_name' : None,
        'num_trading_days' : hparams["num_trading_days"],
        'batch_idx' : 0,
    }
    config.global_number_of_circuits_executed = global_number_of_circuits_executed  
    config.global_hardware_run_results_dict = global_hardware_run_results_dict
    agent = make_agent(**hparams)
    params = load_params(params_save_loc)
    batch_jumps = np.array(load_params(jumps_save_loc))
    eval_metrics = agent.eval_step(params, batch_jumps, device_id, backend_name)
    eval_metrics = jax.device_get(eval_metrics)
    print(f'Total number of circuits executed = {global_number_of_circuits_executed}')

    utility_lambda = 1E-1
    utility_agent = eval_metrics[f'U_{utility_lambda}']
    utility_hw_agent = eval_metrics[f'U_hw_{utility_lambda}']

    
    print("---"*10+'Utility'+"---"*10)
    print("Agent {:.2f}, Hardware Agent {:,.2f}".format(utility_agent, utility_hw_agent ))
    print("---"*10+'Deltas'+"---"*10)
    print(f'Agent :\n {eval_metrics["seq_deltas"]}')
    print(f'Hardware Agent :\n {eval_metrics["seq_deltas_hw"]}')
    print("---"*10+'Terminal PnL'+"---"*10)
    print(f'Agent :\n {eval_metrics["returns"]}')
    print(f'Hardware Agent :\n {eval_metrics["hw_returns"]}')

# Hardware Emulator Backend

In [17]:
num_days = 10
env_kwargs = dict(
    num_days=num_days,
    num_jumps=1,
    num_trading_days=30,
    mu=0.0,
    sigma=0.2,
    initial_price=100.0,
    strike=1.,
    cost_eps=0.002,
    utility_lambda=0.1,
)

hparams = dict(env_kwargs)

params_dict = {
    "distributional": 'params/20221116-121451_10-0.002-1.0_distributional.pkl',
    "expected": 'params/20221117-131056_10-0.002-1.0_expected.pkl',
    "vanilla": 'params/20221117-123435_10-0.002-1.0_vanilla.pkl'
}

for key in params_dict.keys():
    hparams["model"] = key
    experiment(hparams, seed=19983, params_save_loc=params_dict[key], jumps_save_loc= 'data/seq_jumps_10_days',device_id="1128_part_2",backend_name = 'quantinuum_H1-1E')

Using precomputed counts from data/1128_part_2_distributional_quantinuum_H1-1E_actor-critic_0.002_0.json


: 

: 

# Hardware Backend

In [21]:
num_days = 10
env_kwargs = dict(
    num_days=num_days,
    num_jumps=1,
    num_trading_days=30,
    mu=0.0,
    sigma=0.2,
    initial_price=100.0,
    strike=1.,
    cost_eps=0.002,
    utility_lambda=0.1,
)

hparams = dict(env_kwargs)

# Experiment 1
hparams["model"] = 'distributional'
experiment(
    hparams,
    seed=19983,
    params_save_loc='params/20221116-121451_10-0.002-1.0_distributional.pkl',
    jumps_save_loc= 'data/seq_jumps_10_days_hardware_exp_1',
    device_id="1118_device",
    backend_name = 'quantinuum_H1-1')

# Experiment 2
experiment(
    hparams,
    seed=19983,
    params_save_loc='params/20221116-121451_10-0.002-1.0_distributional.pkl',
    jumps_save_loc= 'data/seq_jumps_10_days_hardware_exp_2',
    device_id="1122_part2_2_device",
    backend_name = 'quantinuum_H1-1')

# Experiment 3
hparams["model"] = 'expected'
experiment(
    hparams,
    seed=19983,
    params_save_loc='params/20221117-131056_10-0.002-1.0_expected.pkl',
    jumps_save_loc= 'data/seq_jumps_10_days_hardware_exp',
    device_id="1207_part_3_device",
    backend_name = 'quantinuum_H1-2')

Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_0.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_1.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_2.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_3.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_4.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_5.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_6.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_7.json
Using precomputed counts from data/1118_device_distributional_quantinuum_H1-1_actor-critic_0.002_8.json
Using precomputed counts from data/1118_device_distributional_qu

: 

: 

#### Use actions to compute utility

In [9]:
# Deltas for hardware experiments and classical simulation of expected models

classical_deltas_exp = load_params("data/classical_deltas_exp")
hardware_deltas_exp =  load_params("data/hardware_deltas_exp")

# Deltas for hardware experiments and classical simulation of expected models

classical_deltas_dist =  load_params("data/classical_deltas_dist")
hardware_deltas_dist =  load_params("data/hardware_deltas_dist")

# Load seq of jumps for 10 days for hardware experiments
batch_jumps = jnp.array(load_params('data/seq_jumps_10_days_hardware_exp'))

# Pick a random sample of jumps on the final day. As we already know, the 
# price at the final day has no consequence on the actions. We just pick
# a random sample of jumps on the final day and save it here for reproducibility.
day_jumps = load_params("data/final_day_jumps_10_days_hardware_exp")

seq_jumps = jnp.concatenate([batch_jumps, day_jumps], axis=-1)

seq_prices = compute_prices(
            seq_jumps,
            num_trading_days=30,
            mu=0.,
            sigma=0.2,
            initial_price=100.,
        )

bs_deltas = compute_black_scholes_deltas(
    seq_prices,
    num_days=10,
    num_trading_days=30,
    mu=0.0,
    sigma=0.2,
    strike=1.0,
)

# Black Scholes PnL and Utility
seq_rewards_bs = compute_rewards(
            seq_prices, jnp.stack(bs_deltas.T,axis=1), strike=1., cost_eps=0.002
        )
pnl_bs = seq_rewards_bs.sum(axis=1)

utility_bs = compute_utility(seq_rewards_bs, utility_lambda=0.1)

print(f'Black-Scholes PnL = {pnl_bs}')
print(f'Black-Scholes Utility = {utility_bs}')

# Expected Actor-Critic PnL and Utility
seq_rewards_exp_sim = compute_rewards(
            seq_prices, jnp.stack(classical_deltas_exp,axis=1), strike=1., cost_eps=0.002
        )
seq_rewards_exp_hw = compute_rewards(
            seq_prices, jnp.stack(hardware_deltas_exp,axis=1), strike=1., cost_eps=0.002
        )

pnl_exp_sim = seq_rewards_exp_sim.sum(axis=1)
pnl_exp_hw = seq_rewards_exp_hw.sum(axis=1)

utility_exp_sim = compute_utility(seq_rewards_exp_sim, utility_lambda=0.1)
utility_exp_hw = compute_utility(seq_rewards_exp_hw, utility_lambda=0.1)

print(f'Expected Actor-Critic Simulator PnL = {pnl_exp_sim}')
print(f'Expected Actor-Critic Hardware PnL = {pnl_exp_hw}')

print(f'Expected Actor-Critic Simulator Utility = {utility_exp_sim}')
print(f'Expected Actor-Critic Hardware Utility = {utility_exp_hw}')



# Distributional Actor-Critic PnL and Utility
seq_rewards_dist_sim = compute_rewards(
            seq_prices, jnp.stack(classical_deltas_dist,axis=1), strike=1., cost_eps=0.002
        )
seq_rewards_dist_hw = compute_rewards(
            seq_prices, jnp.stack(hardware_deltas_dist,axis=1), strike=1., cost_eps=0.002
        )

pnl_dist_sim = seq_rewards_dist_sim.sum(axis=1)
pnl_dist_hw = seq_rewards_dist_hw.sum(axis=1)

utility_dist_sim = compute_utility(seq_rewards_dist_sim, utility_lambda=0.1)
utility_dist_hw = compute_utility(seq_rewards_dist_hw, utility_lambda=0.1)

print(f'Distributional Actor-Critic Simulator PnL = {pnl_dist_sim}')
print(f'Distributional Actor-Critic Hardware PnL = {pnl_dist_hw}')

print(f'Distributional Actor-Critic Simulator Utility = {utility_dist_sim}')
print(f'Distributional Actor-Critic Hardware Utility = {utility_dist_hw}')



Black-Scholes PnL = [-4.602 -5.373 -5.441 -4.263 -5.173 -5.036 -4.139 -4.962]
Black-Scholes Utility = -4.883941650390625
Expected Actor-Critic Simulator PnL = [0.082 -6.207 -3.651 0.967 -6.770 -4.740 2.286 -6.701]
Expected Actor-Critic Hardware PnL = [0.002 -6.074 -3.634 0.888 -6.736 -4.804 2.256 -6.830]
Expected Actor-Critic Simulator Utility = -3.6507270336151123
Expected Actor-Critic Hardware Utility = -3.667651414871216
Distributional Actor-Critic Simulator PnL = [-1.813 -8.317 -5.111 1.469 -2.728 -4.790 0.391 -3.948]
Distributional Actor-Critic Hardware PnL = [-1.799 -8.213 -4.940 1.364 -2.995 -4.669 0.366 -4.206]
Distributional Actor-Critic Simulator Utility = -3.541973829269409
Distributional Actor-Critic Hardware Utility = -3.55318546295166
