In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from networks import *
from markethistory import *

In [5]:
%%javascript
$('.nbp-app-bar').toggle()

<IPython.core.display.Javascript object>

In [28]:
NUM_ASSETS = 11
OBS_WINDOW = 50
EPISOD_WINDOW = 50
TXN_FEE = 0.0025
SAMPLING_BIAS = 5e-3
BATCH_SIZE = 128

In [4]:
start = '2018/04/10'
end = '2018/04/20'
markethistory = MarketHistory(start, end)



In [22]:
N_global = markethistory.data.shape[2]
N_test = int(0.1 * N)
N_valid = int(0.2 * N)
N_train = N - test_N - valid_N

train_data = markethistory.data[:, :, :N_train]
valid_data = markethistory.data[:, :, N_train:N_train + N_valid]
test_data = markethistory.data[:, :, N_train + N_valid:]
print(train_data.shape)
print(valid_data.shape)
print(test_data.shape)

(3, 11, 337)
(3, 11, 96)
(3, 11, 48)


In [106]:
def sample(start, end, bias):
    """
    Geometrically sample a number in [START, END)
    
    Input:
    - start: the start (inclusive)
    - end: the end (exclusive)
    - bias: a number between 0 to 1. The closer the bias to 1, the more
      likely to generate a sample closer to END.
    """
    offset = np.random.geometric(bias)
    while offset > end - start:
        offset = np.random.geometric(bias)
    t = end - offset
    return t

def sample_fast(start, end, bias):
    """
    Geometrically sample a number in [START, END)
    
    Input:
    - start: the start (inclusive)
    - end: the end (exclusive)
    - bias: a number between 0 to 1. The closer the bias to 1, the more
      likely to generate a sample closer to END.
    """
    offset = np.random.geometric(bias)
    return max(end - offset, start)

def sample_batch(batch_size, start, end, bias):
    return np.array([sample_fast(start, end, bias) for _ in range(batch_size)])

def get_observation(end_t_batch, data):
    """
    Returns:
    - obs: A torch tensor of shape [batch, feature, asset, OBS_WINDOW]
    """
    obs = []
    for offset in range(OBS_WINDOW-1, -1, -1):
        t_batch = end_t_batch - offset
        observation = data[:, :, t_batch].permute(2, 0, 1)
        obs.append(observation)
    return torch.stack(obs, dim=-1)

def calculate_mu(w, w_prev):
    w0_0, w0_m = w_prev[:, 0], w_prev[:, 1:]
    w1_0, w1_m = w[:, 0], w[:, 1:]
    c = TXN_FEE
    
    const1 = 1 - c*w0_0
    const2 = 2*c - c*c
    const3 = 1 - c*w1_0
    
    u = c * torch.sum(torch.abs(w0_m - w1_m))
    w1_m_T = w1_m.transpose(0, 1)
    while True:
        u_next = (const1 - const2*torch.sum(F.relu(w0_m - (u*w1_m_T).transpose(0,1)), dim=1)) / const3
        max_diff = torch.max(torch.abs(u - u_next))
        if max_diff <= 1e-10:
            return u_next
        u = u_next

In [108]:
w = torch.tensor([[0.5,0.25,0.05,0.1,0.0,0.1],[0.2,0.05,0.17,0.08,0.25,0.25]])
w_prev = torch.tensor([[0.5,0.,0.5,0.,0.,0.],[0.2,0.,0.,0.,0.8,0.]])
calculate_mu(w, w_prev)

tensor([ 0.9977,  0.9972])

In [None]:
# TODO: instaed of fixed window size, try randomized window size
# TODO: modify data matrix so that it includes a row of 1 for Cash
# TODO: think of better way to initialize the initial pf weights
def train(policy, data, lr, episods=10000):
    optimizer = torch.optim.Adam(policy.parameters(), lr=lr)
    T = data.shape[-1]
    
    for i in range(episods):
        # geometrically sample start times: [batch]
        start_indices = sample_batch(BATCH_SIZE, OBS_WINDOW, T-EPISOD_WINDOW, SAMPLING_BIAS)
        # initialize portfolio weights: [batch, asset]
        pf_w = (torch.ones(NUM_ASSETS) / NUM_ASSETS).repeat(BATCH_SIZE, 1)
        # initialize portfolio values: [batch]
        pf_v = torch.ones(BATCH_SIZE)
        
        # simulate one episod of live trading with the policy
        loss = 0
        price_curr = data[0, :, start_indices] # [batch, asset]
        for t in range(0, EPISOD_WINDOW):
            price_next = data[0, :, start_indicies+t+1] # [batch, asset]
            obs = get_observation(start_indices+t, data)
            
            pf_w_t_start = policy.forward(obs, pf_w)
            shrinkage = calculate_shrinkage(pf_w_t_start, pf_w)
            pf_v_t_start = pf_v * shrinkage
            
            w_tmp = (price_next / price_curr) * pf_w_t_start # [batch, asset]
            w_tmp_sum = torch.sum(w_tmp, dim=1) # [batch]
            pf_v_t_end = w_tmp_sum * pf_v_t_start
            pf_w_t_end = w_tmp / w_tmp_sum.view(BATCH_SIZE, 1)
            
            batch_reward = torch.log(pf_v_t_end / pf_v)
            loss -= torch.sum(batch_reward) / BATCH_SIZE
            
            # update variables
            pf_w = pf_w_t_end
            pf_v = pf_v_t_end
            price_curr = price_next
        loss /= EPISOD_WINDOW
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
policy = DecisionNetworks_CNN(asset=NUM_ASSETS)