In [1]:
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
sys.path.append("../src")

from simulator import rBergomi
from deep_hedge import dnn

In [2]:
seed = 1234
grid_num = 30
path_num = 10000
T = 30./365.
H = 0.1
eta = 1.9
xi = 0.235*0.235
rho = -0.7
S0 = 100.
K = 100.

In [3]:
S, V = rBergomi.get_path(seed, grid_num, path_num, T, S0, H, eta, xi, rho)

In [4]:
MC_price = np.average(np.maximum(S[-1] - K, 0))

In [5]:
MC_price

2.013439648833331

In [6]:
dS = S[1:] - S[:-1]
dV = V[1:] - V[:-1]

In [7]:
y_network = dnn.FeedForwardNetwork(input_size=2, output_size=1, hidden_size=100)
z_network = dnn.FeedForwardNetwork(input_size=2, output_size=2, hidden_size=100)

In [8]:
learning_rate = 0.001
opt_y = optim.Adam(params=y_network.parameters(), lr=learning_rate)
opt_z = optim.Adam(params=z_network.parameters(), lr=learning_rate)

scheduler_y = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_y, 'min', factor=0.5, patience=500, min_lr=1e-7, verbose=True)
scheduler_z = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_z, 'min', factor=0.5, patience=500, min_lr=1e-7, verbose=True)

for ep in range(400):
    states = np.array([[S[0][i], V[0][i]] for i in range(path_num)])
    y = y_network(torch.FloatTensor(states))
    p = torch.squeeze(y)
    targets = torch.FloatTensor(np.maximum(S[-1] - K, 0))
    for t in range(grid_num):
        states = np.array([[S[t][i], V[t][i]] for i in range(path_num)])
        z = z_network(torch.FloatTensor(states))
        p = p + z[:,0] * torch.FloatTensor(dS[t]) + z[:,1] * torch.FloatTensor(dV[t])
            
    if ep%2==0:
        critic_loss = torch.nn.MSELoss()(p, targets)
        opt_y.zero_grad()
        critic_loss.backward()
        opt_y.step()
    else:
        actor_loss = torch.nn.MSELoss()(p, targets)
        opt_z.zero_grad()
        actor_loss.backward()
        opt_z.step()
        
    if ep % 10 == 9:
        scheduler_y.step(critic_loss)
        scheduler_z.step(actor_loss)
        print('ep: {}'.format(ep),
              'time: {}'.format(t),
              'Training critic loss: {:.8e}'.format(critic_loss.data.numpy()),
              'Training actor loss: {:.8e}'.format(actor_loss.data.numpy()))

ep: 9 time: 29 Training critic loss: 3.93597937e+00 Training actor loss: 3.83796835e+00
ep: 19 time: 29 Training critic loss: 4.06941414e+00 Training actor loss: 4.01097250e+00
ep: 29 time: 29 Training critic loss: 2.82034540e+00 Training actor loss: 2.85649014e+00
ep: 39 time: 29 Training critic loss: 2.54890656e+00 Training actor loss: 2.57082486e+00
ep: 49 time: 29 Training critic loss: 2.41235328e+00 Training actor loss: 2.41900492e+00
ep: 59 time: 29 Training critic loss: 2.29462624e+00 Training actor loss: 2.29046059e+00
ep: 69 time: 29 Training critic loss: 2.21148109e+00 Training actor loss: 2.20773602e+00
ep: 79 time: 29 Training critic loss: 2.19302177e+00 Training actor loss: 2.18981290e+00
ep: 89 time: 29 Training critic loss: 2.19599175e+00 Training actor loss: 2.19370937e+00
ep: 99 time: 29 Training critic loss: 2.18041301e+00 Training actor loss: 2.17865086e+00
ep: 109 time: 29 Training critic loss: 2.15960741e+00 Training actor loss: 2.15867901e+00
ep: 119 time: 29 Trai

In [9]:
states = np.array([[S[0][i], V[0][i]] for i in range(path_num)])
y = y_network(torch.FloatTensor(states))
print(y[:,0])

tensor([2.2338, 2.2338, 2.2338,  ..., 2.2338, 2.2338, 2.2338],
       grad_fn=<SelectBackward0>)


In [10]:
y_network = dnn.FeedForwardNetwork(input_size=2, output_size=3, hidden_size=100)
z_network = dnn.FeedForwardNetwork(input_size=4, output_size=2, hidden_size=100)

In [11]:
learning_rate = 0.001
opt_y = optim.Adam(params=y_network.parameters(), lr=learning_rate)
opt_z = optim.Adam(params=z_network.parameters(), lr=learning_rate)

scheduler_y = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_y, 'min', factor=0.5, patience=500, min_lr=1e-7, verbose=True)
scheduler_z = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_z, 'min', factor=0.5, patience=500, min_lr=1e-7, verbose=True)

for ep in range(400):
    states = np.array([[S[0][i], V[0][i]] for i in range(path_num)])
    y = y_network(torch.FloatTensor(states))
    p = y[:,0]
    targets = torch.FloatTensor(np.maximum(S[-1] - K, 0))
    for t in range(grid_num):
        states = np.array([[S[t][i], V[t][i]] for i in range(path_num)])
        if t == 0:
            states = torch.cat((torch.FloatTensor(states), y[:, 1:]), 1)
        else:
            states = torch.cat((torch.FloatTensor(states), z), 1)
        z = z_network(states)
        p = p + z[:,0] * torch.FloatTensor(dS[t]) + z[:,1] * torch.FloatTensor(dV[t])
            
    if ep%2==0:
        critic_loss = torch.nn.MSELoss()(p, targets)
        opt_y.zero_grad()
        critic_loss.backward()
        opt_y.step()
    else:
        actor_loss = torch.nn.MSELoss()(p, targets)
        opt_z.zero_grad()
        actor_loss.backward()
        opt_z.step()
        
    if ep % 10 == 9:
        scheduler_y.step(critic_loss)
        scheduler_z.step(actor_loss)
        print('ep: {}'.format(ep),
              'time: {}'.format(t),
              'Training critic loss: {:.8e}'.format(critic_loss.data.numpy()),
              'Training actor loss: {:.8e}'.format(actor_loss.data.numpy()))

ep: 9 time: 29 Training critic loss: 1.07903862e+01 Training actor loss: 1.08887835e+01
ep: 19 time: 29 Training critic loss: 3.43784952e+00 Training actor loss: 3.48486567e+00
ep: 29 time: 29 Training critic loss: 2.30514455e+00 Training actor loss: 2.30508709e+00
ep: 39 time: 29 Training critic loss: 2.36897540e+00 Training actor loss: 2.34845376e+00
ep: 49 time: 29 Training critic loss: 2.40398884e+00 Training actor loss: 2.38689804e+00
ep: 59 time: 29 Training critic loss: 2.32226443e+00 Training actor loss: 2.31199169e+00
ep: 69 time: 29 Training critic loss: 2.22488093e+00 Training actor loss: 2.22086310e+00
ep: 79 time: 29 Training critic loss: 2.18199444e+00 Training actor loss: 2.17992640e+00
ep: 89 time: 29 Training critic loss: 2.17379260e+00 Training actor loss: 2.17331505e+00
ep: 99 time: 29 Training critic loss: 2.16418481e+00 Training actor loss: 2.16365671e+00
ep: 109 time: 29 Training critic loss: 2.14014077e+00 Training actor loss: 2.13985324e+00
ep: 119 time: 29 Trai

In [12]:
states = np.array([[S[0][i], V[0][i]] for i in range(path_num)])
y = y_network(torch.FloatTensor(states))

In [13]:
y[:,0]

tensor([2.2296, 2.2296, 2.2296,  ..., 2.2296, 2.2296, 2.2296],
       grad_fn=<SelectBackward0>)

In [14]:
y_network = dnn.FeedForwardNetwork(input_size=2, output_size=3, hidden_size=100)
z_network = dnn.FeedForwardNetwork(input_size=4, output_size=4, hidden_size=100)

In [15]:
learning_rate = 0.001
opt_y = optim.Adam(params=y_network.parameters(), lr=learning_rate)
opt_z = optim.Adam(params=z_network.parameters(), lr=learning_rate)

scheduler_y = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_y, 'min', factor=0.5, patience=500, min_lr=1e-7, verbose=True)
scheduler_z = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_z, 'min', factor=0.5, patience=500, min_lr=1e-7, verbose=True)

for ep in range(400):
    states = np.array([[S[0][i], V[0][i]] for i in range(path_num)])
    y = y_network(torch.FloatTensor(states))
    p = y[:,0]
    targets = torch.FloatTensor(np.maximum(S[-1] - K, 0))
    for t in range(grid_num):
        states = np.array([[S[t][i], V[t][i]] for i in range(path_num)])
        if t == 0:
            states = torch.cat((torch.FloatTensor(states), y[:, 1:]), 1)
        else:
            states = torch.cat((torch.FloatTensor(states), z[:, 2:]), 1)
        z = z_network(states)
        p = p + z[:,0] * torch.FloatTensor(dS[t]) + z[:,1] * torch.FloatTensor(dV[t])
            
    if ep%2==0:
        critic_loss = torch.nn.MSELoss()(p, targets)
        opt_y.zero_grad()
        critic_loss.backward()
        opt_y.step()
    else:
        actor_loss = torch.nn.MSELoss()(p, targets)
        opt_z.zero_grad()
        actor_loss.backward()
        opt_z.step()
        
    if ep % 10 == 9:
        scheduler_y.step(critic_loss)
        scheduler_z.step(actor_loss)
        print('ep: {}'.format(ep),
              'time: {}'.format(t),
              'Training critic loss: {:.8e}'.format(critic_loss.data.numpy()),
              'Training actor loss: {:.8e}'.format(actor_loss.data.numpy()))

ep: 9 time: 29 Training critic loss: 1.25398092e+01 Training actor loss: 1.24094105e+01
ep: 19 time: 29 Training critic loss: 4.81526852e+00 Training actor loss: 4.76439667e+00
ep: 29 time: 29 Training critic loss: 2.26286530e+00 Training actor loss: 2.26988316e+00
ep: 39 time: 29 Training critic loss: 2.71872020e+00 Training actor loss: 2.73342609e+00
ep: 49 time: 29 Training critic loss: 2.77288103e+00 Training actor loss: 2.78337812e+00
ep: 59 time: 29 Training critic loss: 2.43854141e+00 Training actor loss: 2.44344091e+00
ep: 69 time: 29 Training critic loss: 2.23457503e+00 Training actor loss: 2.23569679e+00
ep: 79 time: 29 Training critic loss: 2.23852372e+00 Training actor loss: 2.23717427e+00
ep: 89 time: 29 Training critic loss: 2.24491549e+00 Training actor loss: 2.24310231e+00
ep: 99 time: 29 Training critic loss: 2.22562504e+00 Training actor loss: 2.22481155e+00
ep: 109 time: 29 Training critic loss: 2.21270752e+00 Training actor loss: 2.21282220e+00
ep: 119 time: 29 Trai

In [16]:
states = np.array([[S[0][i], V[0][i]] for i in range(path_num)])
y = y_network(torch.FloatTensor(states))
print(y[:,0])

tensor([2.2322, 2.2322, 2.2322,  ..., 2.2322, 2.2322, 2.2322],
       grad_fn=<SelectBackward0>)


In [17]:
y

tensor([[ 2.2322,  6.7361, -3.7967],
        [ 2.2322,  6.7361, -3.7967],
        [ 2.2322,  6.7361, -3.7967],
        ...,
        [ 2.2322,  6.7361, -3.7967],
        [ 2.2322,  6.7361, -3.7967],
        [ 2.2322,  6.7361, -3.7967]], grad_fn=<AddmmBackward0>)