In [1]:
import torch
import gym
import time
import numpy as np
import importlib
import matplotlib.pyplot as plt
from machin.frame.algorithms import DQN

In [2]:
import hedging_gym, asset_price_models, option_price_models
_ = importlib.reload(hedging_gym)
_ = importlib.reload(asset_price_models)
_ = importlib.reload(option_price_models)

from hedging_gym import HedgingEnv
from asset_price_models import GBM
from option_price_models import BSM

In [3]:
np.random.seed(0)
torch.manual_seed(0)

# option model paramaters
strike_price             = 100
rf_interest_rate         = 0.01
T                        = 1.0

# asset model parameters
mu                       = 0
vol                      = 0.15
dt                       = T / 8
S_0                      = 100

# hedging gym parameters
max_steps                = 4
option_contract_size     = 100
initial_holding_fraction = 0.5
trading_cost_parameter   = 0.01
trading_cost_gradient    = 0.01

asset_price_model  = GBM(mu, vol, S_0, dt)
option_price_model = BSM(strike_price, rf_interest_rate, vol, T, dt)
env                = HedgingEnv(asset_price_model, option_price_model, max_steps, option_contract_size,
                                initial_holding_fraction, trading_cost_parameter, trading_cost_gradient)

In [4]:
# DQN parameters
num_hidden    = 1
hidden_dim    = 20
discount      = 0.8
epsilon_decay = 0.999
learning_rate = 0.001

# TODO add batch norm?
class QNet(torch.nn.Module):
    def __init__(self, state_dim, num_hidden, hidden_dim, action_dim):
        super(QNet, self).__init__()
        self.input  = torch.nn.Linear(state_dim, hidden_dim)
        self.hidden = [torch.nn.Linear(hidden_dim, hidden_dim)] * num_hidden
        self.output = torch.nn.Linear(hidden_dim, action_dim)

    def forward(self, state):
        res = torch.relu(self.input(state))
        for hidden_layer in self.hidden:
            res = torch.relu(hidden_layer(res))
        return self.output(res)

qnet   = QNet(env.state_size(), num_hidden, hidden_dim, option_contract_size+1)
qnet_t = QNet(env.state_size(), num_hidden, hidden_dim, option_contract_size+1)

# TODO add LR scheduler?
dqn = DQN(qnet, qnet_t, torch.optim.Adam, torch.nn.MSELoss(reduction='sum'), 
          discount=discount, epsilon_decay=epsilon_decay, learning_rate=learning_rate)

In [5]:
import dqn_utils
_ = importlib.reload(dqn_utils)

from dqn_utils import predict_while_training, add_to_training_set, train

In [19]:
time.time() - time.time()



-9.5367431640625e-07

In [20]:

num_episodes = 1000
st = time.time()

for episode in range(num_episodes):
    if episode % 25 == 0:
        et = time.time()
        tt = (et-st)
        st = et
        
        print(f"processing episode {episode} {tt}")
    
    done  = False
    state = env.reset()
#     print(state)
    while not done:
        new_h                 = predict_while_training(dqn, state)
        
#         print(new_h)
        
        new_state, pnls, done = env.step(new_h.item())
        
        
#         print(new_state, pnls, done)
        
        reward                = 1e-2 * (0.01*np.sum(pnls) - np.sum(pnls)**2)
#         print(reward)

        
        add_to_training_set(dqn, state, new_h, new_state, reward, done)
        
        
        state = new_state

    if episode > num_episodes/2:
        
        for k in range(int(env.max_steps)):
            train(dqn)


processing episode 0 0.0003440380096435547
processing episode 25 0.08294796943664551
processing episode 50 0.0507810115814209
processing episode 75 0.050363779067993164
processing episode 100 0.051980018615722656
processing episode 125 0.05080127716064453
processing episode 150 0.0505979061126709
processing episode 175 0.050306081771850586
processing episode 200 0.05003190040588379
processing episode 225 0.05168914794921875
processing episode 250 0.05069088935852051
processing episode 275 0.05037498474121094
processing episode 300 0.05017495155334473
processing episode 325 0.05007815361022949
processing episode 350 0.0521998405456543
processing episode 375 0.0506742000579834
processing episode 400 0.05067706108093262
processing episode 425 0.05099678039550781
processing episode 450 0.05210304260253906
processing episode 475 0.05331301689147949
processing episode 500 0.055185794830322266
processing episode 525 0.32958006858825684
processing episode 550 0.3349609375
processing episode 57

In [7]:
asset_price_model.step()
asset_price_model.get_asset_price()

127.58290634295207

In [8]:
option_price_model.get_option_delta(0, 100)

0.5563283472595516

In [9]:
option_price_model.K

100

In [10]:
env.reset()

array([ 50.        ,   1.        , 100.        ,   6.45948318,
         0.55632835])

In [11]:
for j in range(num_eps):
    print("episode: ", j)
    state = env.reset()
    done = False
    state = state[[0,1,2,4]]
    while not done:
        out = dqn.act_discrete_with_noise({"state": torch.tensor(state, dtype=torch.float32).unsqueeze(0)})
        temp = out.squeeze().detach().numpy()
#         print(temp)
        action = temp/num_actions - env.h

        new_state, reward, done = env.step(action)
        #print(action)
        #print(reward)
        reward = np.sum(reward)
        #print(reward)
        new_state = new_state[[0, 1, 2, 4]]
        #print(state)
        reward = -norm_factor*((reward) ** 2 + 1 / 1000 * reward)
        rew.append(reward)

        dqn.store_episode([{
            "state": {"state": torch.tensor(state, dtype=torch.float32).unsqueeze(0)},
            "action": {"action": out},
            "next_state": {"state": torch.tensor(new_state, dtype=torch.float32).unsqueeze(0)},
            "reward": float(reward),  # norm factor
            "terminal": done
        }])
        state = new_state

    if j % 50 == 0 and j != 0:
        print(test(10), test_delta(10))
        print("reward: ", np.mean(rew), np.mean(rew)/norm_factor)
        rew = []

    if j > 100:
        for k in range(int(num_steps)):
            dqn.update()

NameError: name 'num_eps' is not defined