In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

import add_features

from statsmodels.tsa.stattools import adfuller
from tensorboardX import SummaryWriter
writer = SummaryWriter()


%matplotlib inline

PRINT = False

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import keras
import torch
import torchvision

In [3]:
c_dir = "C://Users/shlomi/Documents/Work/vindish/data/"
e_dir = "E:\\Work/Vindish/created_samples/"

In [4]:
X = torch.tensor(np.load(e_dir + "X.npy")).type(torch.float32)
y = torch.tensor(np.load(e_dir + "y.npy")).type(torch.float32)
features = np.load(e_dir+"features.npy")

In [5]:
print(X.shape, y.shape)
print(X.type(), y.type())

torch.Size([49938, 10, 20]) torch.Size([49938, 5])
torch.FloatTensor torch.FloatTensor


In [6]:
for i, j in enumerate(features):
    print(i, j)

0 Time_To_Expiration
1 UX1
2 UX2
3 UX3
4 UX4
5 UX5
6 SP500
7 dow
8 dom
9 doy
10 UX1_diff
11 UX2_diff
12 UX3_diff
13 UX4_diff
14 UX5_diff
15 SP500_diff
16 day_of_month
17 day_of_week
18 day_of_year
19 time_of_day


### Define Model

In [7]:
class MiniConv2d(torch.nn.Module):
    def __init__(self):
        super(MiniConv2d, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 5, 3, padding=1)
        self.conv2 = torch.nn.Conv2d(5, 5, 3, padding=1)
        self.conv3 = torch.nn.Conv2d(5, 5, 3, padding=1)
        self.conv4 = torch.nn.Conv2d(5, 5, 3, padding=1)
        self.conv5 = torch.nn.Conv2d(5, 5, 3, padding=1)
        self.conv6 = torch.nn.Conv2d(5, 5, 3, padding=1)
        self.conv7 = torch.nn.Conv2d(5, 5, 3, padding=1)
    def forward(self, x):
        x = self.conv1(x)
        x = torch.nn.functional.relu(x)
        x = self.conv2(x)
        x = torch.nn.functional.relu(x)
        x = self.conv3(x)
        x = torch.nn.functional.relu(x)
        x = self.conv4(x)
        x = torch.nn.functional.relu(x)
        x = self.conv5(x)
        x = torch.nn.functional.relu(x)
        x = self.conv6(x)
        x = torch.nn.functional.relu(x)
        x = self.conv7(x)
        x = torch.nn.functional.relu(x)
        
        return x

In [8]:
class MiniConv1d(torch.nn.Module):
    def __init__(self, init_kernel_size=(3, 2)):
        super(MiniConv1d, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 5, init_kernel_size, padding=(1, 1))
        self.conv2 = torch.nn.Conv2d(5, 5, (3, 3), padding=(1, 1))
        self.conv3 = torch.nn.Conv2d(5, 5, (3, 3), padding=(1, 1))
        self.conv4 = torch.nn.Conv2d(5, 5, (3, 3), padding=(1, 1))
        self.conv5 = torch.nn.Conv2d(5, 5, (3, 3), padding=(1, 1))
        self.conv6 = torch.nn.Conv2d(5, 5, (3, 3), padding=(1, 1))
        self.conv7 = torch.nn.Conv2d(5, 5, (3, 3), padding=(1, 1))
        
    def forward(self, x):
        x = self.conv1(x)
        x = torch.nn.functional.relu(x)
        x = self.conv2(x)
        x = torch.nn.functional.relu(x)
        x = self.conv3(x)
        x = torch.nn.functional.relu(x)
        x = self.conv4(x)
        x = torch.nn.functional.relu(x)
        x = self.conv5(x)
        x = torch.nn.functional.relu(x)
        x = self.conv6(x)
        x = torch.nn.functional.relu(x)
        x = self.conv7(x)
        x = torch.nn.functional.relu(x)
        
        return x

In [9]:
class Embeddings(torch.nn.Module):
    def __init__(self, n_categories, n_dims):
        super(Embeddings, self).__init__()
        self.embed = torch.nn.Embedding(n_categories, n_dims)
        
    def forward(self, x):
        x = self.embed(x)
        
        return x

In [10]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.mini_conv_ux = MiniConv2d()
        self.mini_conv_ux_diffs = MiniConv2d()
        self.mini_conv_snp = MiniConv1d()
        self.mini_conv_singles = MiniConv1d(init_kernel_size=(3,3))
        self.embed_dow = Embeddings(7, 3)
        self.embed_dom = Embeddings(31, 5)
        
        self.fc1 = torch.nn.Linear(880, 64)
        self.fc2 = torch.nn.Linear(64, 32)
        self.fc3 = torch.nn.Linear(32, 5)
        
        
    def forward(self, x):
        ux_vals = x[:, :, 1:6].unsqueeze_(1)
        ux_diffs = x[:, :, 10:15].unsqueeze_(1)
        snp_data = x[:, :, [6,15]].unsqueeze_(1)
        singles = x[:, :,  [0, 18, 19]].unsqueeze_(1) # time_to_expiration, doy, time_of_day
        
        dow = x[:, :, 17].type(torch.long)
        dom = x[:, :, 16].type(torch.long)
        
        x_ux = self.mini_conv_ux(ux_vals).view(x.shape[0], -1)
        x_diffs = self.mini_conv_ux_diffs(ux_diffs).view(x.shape[0], -1)
        x_snp = self.mini_conv_snp(snp_data).view(x.shape[0], -1)
        x_dow = self.embed_dow(dow).view(x.shape[0], -1)
        x_dom = self.embed_dom(dom).view(x.shape[0], -1)
        x_singles = self.mini_conv_singles(singles).view(x.shape[0], -1)
        
        x = torch.cat((x_ux, x_diffs, x_snp, x_dom, x_dow, x_singles), 1)
        
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

### split train/test

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

In [13]:
BATCH_SIZE = 64
betas = torch.from_numpy(np.array([0.62, 0.44, 0.32, 0.26, 0.21])).type(torch.Tensor).to(device)

In [14]:
n_train = int(X.shape[0]*0.6)
n_val = int(X.shape[0]*0.8)

X_train = X[:n_train]
y_train = y[:n_train]

X_val = X[n_train:n_val]
y_val = y[n_train:n_val]

# dropping all end of period samples:
ser = pd.Series(X_train[:, -1, 0].detach().numpy())
idx_to_keep = ser[ser>0.3].index.values
X_train = X_train[idx_to_keep]
y_train = y_train[idx_to_keep]

ser = pd.Series(X_val[:, -1, 0].detach().numpy())
idx_to_keep = ser[ser>0.3].index.values
X_val = X_val[idx_to_keep]
y_val = y_val[idx_to_keep]


X_test = X[n_val:]
y_test = y[n_val:]

In [15]:
train_ds = TensorDataset(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

val_ds = TensorDataset(X_val, y_val)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)

test_ds = TensorDataset(X_test, y_test)
test_dl = DataLoader(test_ds, batch_size=1)

### Run Model

In [16]:
model = Model()
# print(model)
# model.fc3.weight.data = (torch.zeros_like(model.fc3.weight, requires_grad=True, device=device))
# model.fc3.bias.data = torch.tensor([200., 0., 0., 0., 0.], requires_grad=True, device=device)

model = model.to(device)
# print(model)

In [17]:
def get_profit(y, x, alphas):
    if PRINT:
        print("Xs:\n", x[0].cpu())
        print("ys:\n", y[0].cpu())

    L3 = ((y - x)*alphas).sum(dim=1)

    if PRINT:
        print("L3s:\n", L3[:2].cpu())
    return L3

In [18]:
def get_dist_from_200(alphas):
    return (alphas.abs().sum(dim=1)-200.)

In [19]:
def get_hedging_score(alphas, betas):
    return (alphas*betas).sum(dim=1)

In [20]:
def calc_loss(alphas, betas, x_batch, y_batch):
    a = 1
    b = 10
    c = 10000
    
    L1 = get_dist_from_200(alphas)**2
    L2 = get_hedging_score(alphas, betas)**2 
    L3 = get_profit(y_batch, x_batch[:, -1, 1:6], alphas)
    L = a*L1 + b*L2 - c*L3
    
#     print(L1.size(), L2.size(), L3.size())
    
    return L.sum()

Change a, b during epochs so that profit will also be tuned during initial steps of optimization

In [21]:
### testing

In [22]:
model = Model()
# PATH = save_path
from glob import glob
PATH = glob("model*.tar")[-1]

In [23]:
# PATH = "model_vindish_epoch_18_train_loss_-280_val_loss107.pth.tar"
model.load_state_dict(torch.load(PATH))
model.to(device)
model.eval()

Model(
  (mini_conv_ux): MiniConv2d(
    (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv4): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv5): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv6): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv7): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (mini_conv_ux_diffs): MiniConv2d(
    (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv4): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv5): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [24]:
profits = []
costs = []
trans_cost = 0.01
alphas_test = []

x, y = next(iter(test_dl))
x, y = x.to(device), y.to(device)
alphas_test.append(model(x).type(torch.Tensor).to(device))

In [25]:
income = []
outcome = []
income.append(0)
outcome.append(-(alphas_test[-1].abs().sum()*trans_cost).detach().cpu().numpy())
outcome

[-2.0723355]

In [26]:
x[0, :, 0].min()

tensor(33.9028, device='cuda:0')

In [27]:
i = 0
flag_re_buy = True

for x, y in tqdm(test_dl): # batch size is 1 for testing
    i += 1

    x, y = x.to(device), y.to(device)
    alphas_test.append(model(x).type(torch.Tensor).to(device))

    if (x[0, :, 0].min()<0.3 or i==1) and not flag_re_buy:
        print(f"selling all in i={i}")
#         print(alphas_test[-2])
#         print(y.shape)
#         print(y)
        income.append((alphas_test[-2]*y).sum().detach().cpu().numpy()) # may also be negative if the contract i bought doesn't profitable
        flag_re_buy = True
        outcome.append(-(alphas_test[-2].abs().sum()*trans_cost).detach().cpu().numpy())
        continue
        
    if flag_re_buy and x[0, :, 0].min()>0.3:
        print(f"re-buying all in i={i}")
        flag_re_buy = False
        outcome.append(-(alphas_test[-1].abs().sum()*trans_cost).detach().cpu().numpy())
    
    if flag_re_buy: # the case when we're in between months so we dont have any contract
        continue
    
    outcome.append(-np.abs(alphas_test[-1].cpu().detach().numpy()-alphas_test[-2].cpu().detach().numpy()).sum()*trans_cost)
    income.append(((alphas_test[-1]-alphas_test[-2])*x[:, -1, 1:6]).cpu().detach().numpy().mean())
    
print("Total profit including costs: {:.4f}".format(sum(income+outcome)))

  0%|                                                                                                                                                                         | 0/9988 [00:00<?, ?it/s]

re-buying all in i=1


 33%|███████████████████████████████████████████████████▍                                                                                                          | 3248/9988 [00:44<01:31, 73.61it/s]

selling all in i=3256


 33%|████████████████████████████████████████████████████▎                                                                                                         | 3308/9988 [00:44<01:30, 73.68it/s]

re-buying all in i=3309


 59%|█████████████████████████████████████████████████████████████████████████████████████████████▉                                                                | 5938/9988 [01:20<00:54, 74.11it/s]

selling all in i=5943


 60%|██████████████████████████████████████████████████████████████████████████████████████████████▊                                                               | 5995/9988 [01:20<00:53, 74.10it/s]

re-buying all in i=5996


 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8608/9988 [01:55<00:18, 74.21it/s]

selling all in i=8610


 87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 8658/9988 [01:56<00:17, 74.22it/s]

re-buying all in i=8663


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9988/9988 [02:14<00:00, 74.03it/s]


Total profit including costs: 2179.5163


In [38]:
print(sum(income), min(income), sorted( [(x,i) for (i,x) in enumerate(income)], reverse=True )[:5], len(income))
print(sum(outcome), min(outcome), max(outcome), len(outcome))

2200.024586277913 -0.6312307 [(array(746.3181, dtype=float32), 5891), (array(733.1569, dtype=float32), 8506), (array(721.41315, dtype=float32), 3256), (0.7093135, 8460), (0.583943, 8464)] 9833
-20.508282203674263 -2.1753435 -0.0 9837


In [None]:
alpha_50 = pd.Series(alphas_test)[51]#.apply(lambda x: x.cpu().detach().numpy()[0])[50]
# alpha_50
# len(alphas_test)

In [None]:
x = X_test[50:51]
y = y_test[50:51]
x, y = x.to(device), y.to(device)
get_profit(y, x[:, -1, 1:6], alpha_50).cpu().detach().numpy().mean()
# len(profits)

In [None]:
profits[50]

In [None]:
alphas_test = model(x0)
