In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
import matplotlib.pyplot as plt
import seaborn
from tqdm.notebook import tqdm
import time
import gc

In [2]:
data = pd.read_csv("Workshop_Backtest.csv")

In [3]:
def sharpe_ratio(actualReturn, signals):
    assert actualReturn.shape == signals.shape
    dailyReturns = (actualReturn * signals)
    if(dailyReturns.shape[1] > 1):
        dailyReturns = dailyReturns.mean(axis=1)
    return dailyReturns.mean() / dailyReturns.std() * np.sqrt(240)

In [4]:
actual_returns = data[data['datetime'] >= '2017/1/1'].iloc[:, 1:]

In [27]:
data

Unnamed: 0,datetime,CF,SR,TA,al,au,c,cu,l,m,p,rb,ru,v,y,zn
0,2010/1/5,0.011700,0.010348,0.011220,0.048953,0.011921,0.012156,0.034943,0.005915,0.011528,0.012012,0.010009,0.028698,0.000648,0.010202,0.041029
1,2010/1/6,-0.024954,-0.035594,-0.031634,-0.043162,-0.016297,-0.009922,-0.028508,-0.033712,-0.030935,-0.040734,-0.033175,-0.010413,-0.019430,-0.033251,-0.038521
2,2010/1/7,0.006552,-0.029944,0.007801,0.009022,0.029701,0.015823,0.019672,0.006085,0.005376,0.006187,0.000446,0.018066,-0.004624,0.002293,0.002316
3,2010/1/8,0.003254,-0.000179,-0.007741,0.013691,-0.005040,-0.006231,-0.009486,-0.000806,-0.011698,-0.015651,0.001782,0.007605,-0.009954,-0.018302,-0.006932
4,2010/1/11,-0.025361,-0.007719,-0.013896,-0.049063,-0.026535,-0.014107,-0.037981,-0.050040,-0.029422,-0.037763,-0.020231,-0.027869,-0.022788,-0.031072,-0.046999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2785,2021/6/23,0.000945,-0.002906,-0.002797,-0.004259,-0.001391,-0.001161,-0.003336,0.008197,-0.003260,-0.015468,-0.006221,0.006939,-0.013458,-0.012977,-0.001369
2786,2021/6/24,0.008816,0.008927,0.036859,0.018979,-0.000107,0.002713,0.002329,0.013759,0.010110,0.022112,0.028271,0.007657,0.013642,0.019102,0.002742
2787,2021/6/25,0.002809,0.000181,0.002318,-0.011280,-0.003323,0.007731,-0.005663,0.016039,0.013247,0.003985,0.006088,-0.003419,0.016969,-0.010224,-0.010255
2788,2021/6/28,0.000000,0.007222,-0.016962,0.000796,-0.014412,0.002301,-0.007886,-0.006072,0.007844,0.012759,-0.010346,-0.018300,0.001726,0.012051,0.006447


In [6]:
# requirement:
# get multi-dimensional data
# get include other column data
# get test data

In [124]:
X = data.iloc[:, 1:].values

In [140]:
stride_array = np.lib.stride_tricks.sliding_window_view(X, (2, 15), axis=(0,1))

stride_array = stride_array.reshape(-1, 1, 30)
stride_array.shape

(2789, 1, 30)

In [141]:
stride_array[0:3]

array([[[ 0.0117  ,  0.010348,  0.01122 ,  0.048953,  0.011921,
          0.012156,  0.034943,  0.005915,  0.011528,  0.012012,
          0.010009,  0.028698,  0.000648,  0.010202,  0.041029,
         -0.024954, -0.035594, -0.031634, -0.043162, -0.016297,
         -0.009922, -0.028508, -0.033712, -0.030935, -0.040734,
         -0.033175, -0.010413, -0.01943 , -0.033251, -0.038521]],

       [[-0.024954, -0.035594, -0.031634, -0.043162, -0.016297,
         -0.009922, -0.028508, -0.033712, -0.030935, -0.040734,
         -0.033175, -0.010413, -0.01943 , -0.033251, -0.038521,
          0.006552, -0.029944,  0.007801,  0.009022,  0.029701,
          0.015823,  0.019672,  0.006085,  0.005376,  0.006187,
          0.000446,  0.018066, -0.004624,  0.002293,  0.002316]],

       [[ 0.006552, -0.029944,  0.007801,  0.009022,  0.029701,
          0.015823,  0.019672,  0.006085,  0.005376,  0.006187,
          0.000446,  0.018066, -0.004624,  0.002293,  0.002316,
          0.003254, -0.000179, -0.

In [145]:
HDwindow.shape

(2787, 1, 1, 3, 30)

In [93]:
HDwindow[0]

array([[ 0.0117  ,  0.010348,  0.01122 ,  0.048953,  0.011921,  0.012156,
         0.034943,  0.005915,  0.011528,  0.012012,  0.010009,  0.028698,
         0.000648,  0.010202,  0.041029, -0.024954, -0.035594, -0.031634,
        -0.043162, -0.016297, -0.009922, -0.028508, -0.033712, -0.030935,
        -0.040734, -0.033175, -0.010413, -0.01943 , -0.033251, -0.038521],
       [-0.024954, -0.035594, -0.031634, -0.043162, -0.016297, -0.009922,
        -0.028508, -0.033712, -0.030935, -0.040734, -0.033175, -0.010413,
        -0.01943 , -0.033251, -0.038521,  0.006552, -0.029944,  0.007801,
         0.009022,  0.029701,  0.015823,  0.019672,  0.006085,  0.005376,
         0.006187,  0.000446,  0.018066, -0.004624,  0.002293,  0.002316],
       [ 0.006552, -0.029944,  0.007801,  0.009022,  0.029701,  0.015823,
         0.019672,  0.006085,  0.005376,  0.006187,  0.000446,  0.018066,
        -0.004624,  0.002293,  0.002316,  0.003254, -0.000179, -0.007741,
         0.013691, -0.00504 , -0.006

In [94]:
HDwindow[1]

array([[-0.024954, -0.035594, -0.031634, -0.043162, -0.016297, -0.009922,
        -0.028508, -0.033712, -0.030935, -0.040734, -0.033175, -0.010413,
        -0.01943 , -0.033251, -0.038521,  0.006552, -0.029944,  0.007801,
         0.009022,  0.029701,  0.015823,  0.019672,  0.006085,  0.005376,
         0.006187,  0.000446,  0.018066, -0.004624,  0.002293,  0.002316],
       [ 0.006552, -0.029944,  0.007801,  0.009022,  0.029701,  0.015823,
         0.019672,  0.006085,  0.005376,  0.006187,  0.000446,  0.018066,
        -0.004624,  0.002293,  0.002316,  0.003254, -0.000179, -0.007741,
         0.013691, -0.00504 , -0.006231, -0.009486, -0.000806, -0.011698,
        -0.015651,  0.001782,  0.007605, -0.009954, -0.018302, -0.006932],
       [ 0.003254, -0.000179, -0.007741,  0.013691, -0.00504 , -0.006231,
        -0.009486, -0.000806, -0.011698, -0.015651,  0.001782,  0.007605,
        -0.009954, -0.018302, -0.006932, -0.025361, -0.007719, -0.013896,
        -0.049063, -0.026535, -0.014

In [108]:
X = data["CF"]
stride_array_single = np.lib.stride_tricks.sliding_window_view(X, 10)
stride_array_single.shape

(2781, 10)

In [103]:
stride_array_single[:3]

array([[ 0.0117  , -0.024954,  0.006552,  0.003254, -0.025361,  0.004236,
        -0.00934 , -0.009428,  0.009211, -0.005172],
       [-0.024954,  0.006552,  0.003254, -0.025361,  0.004236, -0.00934 ,
        -0.009428,  0.009211, -0.005172, -0.020489],
       [ 0.006552,  0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,
         0.009211, -0.005172, -0.020489,  0.013113]])

In [104]:
window_array_single = np.lib.stride_tricks.sliding_window_view(stride_array_single, (5, 10), axis=(0, 1))
window_array_single = window_array_single.reshape(-1, 5, 10)
window_array_single.shape

(2777, 5, 10)

In [105]:
window_array_single[0]

array([[ 0.0117  , -0.024954,  0.006552,  0.003254, -0.025361,  0.004236,
        -0.00934 , -0.009428,  0.009211, -0.005172],
       [-0.024954,  0.006552,  0.003254, -0.025361,  0.004236, -0.00934 ,
        -0.009428,  0.009211, -0.005172, -0.020489],
       [ 0.006552,  0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,
         0.009211, -0.005172, -0.020489,  0.013113],
       [ 0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,  0.009211,
        -0.005172, -0.020489,  0.013113, -0.000616],
       [-0.025361,  0.004236, -0.00934 , -0.009428,  0.009211, -0.005172,
        -0.020489,  0.013113, -0.000616, -0.006167]])

In [106]:
window_array_single[1]

array([[-0.024954,  0.006552,  0.003254, -0.025361,  0.004236, -0.00934 ,
        -0.009428,  0.009211, -0.005172, -0.020489],
       [ 0.006552,  0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,
         0.009211, -0.005172, -0.020489,  0.013113],
       [ 0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,  0.009211,
        -0.005172, -0.020489,  0.013113, -0.000616],
       [-0.025361,  0.004236, -0.00934 , -0.009428,  0.009211, -0.005172,
        -0.020489,  0.013113, -0.000616, -0.006167],
       [ 0.004236, -0.00934 , -0.009428,  0.009211, -0.005172, -0.020489,
         0.013113, -0.000616, -0.006167, -0.010549]])

In [152]:
class SequenceDataLoader:
    def __init__(self, df, date_from, window_size, feature_size, include_other, col_name):
        
        self.feature_size = feature_size
        offset = window_size + feature_size
        bt_index = df[df['datetime'] >= date_from].iloc[0].name - offset
        # max_window_size = df[df['datetime'] >= date_from].iloc[0].name - 1
        self.window_size = window_size
        columns = len(df.columns) - 1

        if include_other:
            sample_shape = (feature_size, columns)
            self.X = np.lib.stride_tricks.sliding_window_view(df.iloc[bt_index:, 1:].values, sample_shape, axis=(0,1)) # (nFeature, sFeature, sCol)
            self.X = self.X.reshape(-1, columns * feature_size) # (nFeature, sFeature * sCol)
            window_shape = (-1, window_size, feature_size*columns)
            self.X = self.X = np.lib.stride_tricks.sliding_window_view(self.X, window_shape[1:], axis=(0,1)) # (nWindow, nFeature, sWindow)
            self.X = self.X.reshape(window_shape) # (nWindow, nFeatures, sFeature * sCol)
        else:
            window_shape = (-1, window_size, feature_size)
            self.X = np.lib.stride_tricks.sliding_window_view(df[col_name].iloc[bt_index:].values, feature_size) # (nWindow, sFeature)
            self.X = np.lib.stride_tricks.sliding_window_view(self.X, window_shape[1:], axis=(0,1)) # (nWindow, sWindow, sFeature)
            self.X = self.X.reshape(window_shape)
            
        self.y = np.lib.stride_tricks.sliding_window_view(df[col_name].iloc[bt_index+feature_size:].values, window_size)
        
    def __len__(self):
        return self.y.shape[0] - 1
    
    def __getitem__(self, index):
        return self.X[index], self.y[index], self.X[index+1][-1].reshape(1, -1)
    
#     def _get_include_other(self, index):
    
#     def _get_single_col(self, index):
#         window_train_X = []

#         actual_window_size = self.window_size if self.window_size <= self.bt_index + index else self.bt_index + index # 
#         window_begin_index = self.bt_index + index - actual_window_size
#         actual_feature_size = self.feature_size if self.feature_size < actual_window_size else actual_window_size - 1
#         actual_sample_num = actual_window_size - actual_feature_size

#         for s in range(actual_sample_num):
#             window_train_X.append(self.X[window_begin_index + s: window_begin_index + actual_feature_size + s])
#         window_train_X = np.array(window_train_X)
#         if self.require_2d:
#             window_train_X = window_train_X.reshape(window_train_X.shape[0], -1)
#         window_train_y = self.y[window_begin_index + actual_feature_size: window_begin_index + actual_window_size].reshape(-1, 1)

#         window_test_X = self.X[window_begin_index + actual_sample_num: window_begin_index + actual_window_size]
#         if self.require_2d or len(window_test_X.shape) == 1:
#             window_test_X = window_test_X.reshape(1, -1)
#         window_test_y = self.y[window_begin_index + actual_window_size]
        
#         return window_train_X, window_train_y, window_test_X, window_test_y

In [116]:
seq = SequenceDataLoader(data, date_from='2017/1/1', window_size=20, feature_size=10, include_other=True, col_name="CF")

In [118]:
x, y, tx = seq[0]

In [134]:
tx.shape

(150,)

In [8]:
# def get_data(df, date_from, window_size, feature_size, include_other, col_name, require_2D=False):
#     bt_index = df[df['datetime'] >= date_from].iloc[0].name
#     sample_num = window_size - feature_size
#     window_num = len(df[df['datetime'] >= date_from])
    
#     train_X = []
#     train_y = []
#     test_X = []
#     test_y = []
    
#     X = df.iloc[:, 1:].to_numpy() if include_other else df[col_name].to_numpy()
#     y = df[col_name].to_numpy()
    

#     for w in range(window_num):
#         window_train_X = []

#         actual_window_size = window_size if window_size <= bt_index + w else bt_index + w # 
#         window_begin_index = bt_index + w - actual_window_size
#         actual_feature_size = feature_size if feature_size < actual_window_size else actual_window_size - 1
#         actual_sample_num = actual_window_size - actual_feature_size

#         for s in range(actual_sample_num):
#             window_train_X.append(X[window_begin_index + s: window_begin_index + actual_feature_size + s])
#         window_train_X = np.array(window_train_X)
        
#         if require_2D:
#             window_train_X = window_train_X.reshape(window_train_X.shape[0], -1)
        
#         window_train_y = y[window_begin_index + actual_feature_size: window_begin_index + actual_window_size].reshape(-1, 1)

#         window_test_X = X[window_begin_index + actual_sample_num: window_begin_index + actual_window_size].reshape(1, -1)
#         if require_2D or len(window_test_X.shape) == 1:
#             window_test_X = window_test_X.reshape(1, -1)
#         window_test_y = y[window_begin_index + actual_window_size]

#         train_X.append(window_train_X)
#         train_y.append(window_train_y)
#         test_X.append(window_test_X)
#         test_y.append(window_test_y)

#     return train_X, train_y, test_X, test_y

In [132]:
def evaluate_loader(estimator, data, date_from, window_size, feature_size, include_other):
    signal = np.zeros_like(actual_returns)
    for col_idx, col_name in enumerate(actual_returns.columns):
        dataLoader = SequenceDataLoader(data, date_from, window_size, feature_size, include_other, col_name)
        col_signal = np.zeros((len(signal)))
        for i in range(len(dataLoader)):
            train_X, train_y, test_X = dataLoader[i]
            estimator.fit(train_X, train_y)
            col_signal[i] = estimator.predict(test_X)
        signal[:,col_idx] = col_signal
    signal = np.where(signal > 0, 1, -1)
    return sharpe_ratio(actual_returns, signal)

In [10]:
# def evaluate(estimator, data_arrays):
#     # start_all = time.time()
#     signal = np.zeros_like(actual_returns)
#     # fit_time = []
#     # round_time = []
#     for col_idx, col_name in enumerate(actual_returns.columns):
#         # round_begin = time.time()
#         train_X, train_y, test_X, test_y = data_arrays[col_idx]
#         col_signal = np.zeros((len(signal)))
#         for i in range(len(test_y)):
#             # fit_begin = time.time()
#             estimator.fit(train_X[i], train_y[i])
#             col_signal[i] = estimator.predict(test_X[i])
#             # fit_end = time.time()
#             # fit_time.append(fit_end-fit_begin)
#         signal[:,col_idx] = col_signal
#         # round_end = time.time()
#         # round_time.append(round_end-round_begin)
#     signal = np.where(signal > 0, 1, -1)
#     # end_all = time.time()
#     # print(f"all time {end_all-start_all}, round time {np.mean(round_time)}, fit time {np.mean(fit_time)}")
#     return sharpe_ratio(actual_returns, signal)

In [11]:
# start = time.time()
# data_array = []
# for i in actual_returns.columns:
#     data_array.append(get_data(data, '2017/1/1', 450, 1, False, i))
# end = time.time()
# print(end-start)

In [12]:
# evaluate(LinearRegression(), data_array)

In [13]:
# evaluate(LinearRegression(), data, '2017/1/1', 450, 1, False)

In [14]:
# # param:
# #   window size
# #   feature size
# #   include other?
# #   L1, L2
# #   
# perf = {}
# date_from = '2017/1/1'
# for window_size in tqdm(range(20, len(data), 50), position=0, leave=False): # window_size
#     perf[window_size] = {}
#     for feature_size in tqdm(range(1, window_size, 10),position=1, leave=False): # feature_size
#         perf[window_size][feature_size] = {}
#         for include_other in tqdm((True, False),position=2, leave=False):
#             include_other_name = "T" if include_other else "F"
#             data_arrays = []
#             for col in actual_returns.columns:
#                 data_arrays.append(get_data(data, date_from, ))
#             perf[window_size][feature_size][include_other_name] = {}
#             perf[window_size][feature_size][include_other_name]["None"] = evaluate(LinearRegression(), data, date_from, window_size, feature_size, include_other, require_2D=True)
#             perf[window_size][feature_size][include_other_name]["L1"] = {}
#             for i in tqdm(range(5),position=3, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L1"][c] = evaluate(Lasso(alpha=c), data, date_from, window_size, feature_size, include_other, require_2D=True)
                
#             perf[window_size][feature_size][include_other_name]["L2"] = {}
#             for i in tqdm(range(5),position=4, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L2"][c] = evaluate(Ridge(alpha=c), data, date_from, window_size, feature_size, include_other, require_2D=True)

In [155]:
evaluate_loader(LinearRegression(n_jobs=-1), data, '2017/1/1', 1500, 1, True)

1.8155104487733784

In [154]:
# param:
#   window size
#   feature size
#   include other?
#   L1, L2
#   
perf = {}
date_from = '2017/1/1'
max_window_size = data[data['datetime'] >= date_from].iloc[0].name - 1
for window_size in tqdm(range(20, max_window_size, 100), position=0, leave=False): # window_size
    perf[window_size] = {}
    interval = int(window_size / 5)
    for feature_size in tqdm(range(1, window_size, interval),position=1, leave=False): # feature_size
        perf[window_size][feature_size] = {}
        for include_other in tqdm((True, False),position=2, leave=False):
            include_other_name = "T" if include_other else "F"
            # data_array = []
            # for i in actual_returns.columns:
            #     data_array.append(get_data(data, date_from, window_size, feature_size, include_other, i, require_2D=True))
            # perf[window_size][feature_size][include_other_name] = {}
            # perf[window_size][feature_size][include_other_name]["None"] = evaluate(LinearRegression(), data_array)
            # perf[window_size][feature_size][include_other_name] = evaluate(LinearRegression(), data_array)
            perf[window_size][feature_size][include_other_name] = evaluate_loader(LinearRegression(n_jobs=-1), data, date_from, window_size, feature_size, include_other)
            print(f"{window_size} {feature_size} {include_other} {perf[window_size][feature_size][include_other_name]}")
            # perf[window_size][feature_size][include_other_name]["L1"] = {}
#             for i in tqdm(range(5),position=3, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L1"][c] = evaluate(Lasso(alpha=c), data_array)
                
#             perf[window_size][feature_size][include_other_name]["L2"] = {}
#             for i in tqdm(range(5),position=4, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L2"][c] = evaluate(Ridge(alpha=c), data_array)
            # del data_array
            # gc.collect()

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

20 1 True 0.4183074416588965
20 1 False 0.2793429335746856


  0%|          | 0/2 [00:00<?, ?it/s]

20 5 True 0.001970612567856043
20 5 False -0.3571384871145669


  0%|          | 0/2 [00:00<?, ?it/s]

20 9 True 0.7244231498887432
20 9 False 0.356124539725068


  0%|          | 0/2 [00:00<?, ?it/s]

20 13 True 0.5196672105008787
20 13 False -0.5175758283167443


  0%|          | 0/2 [00:00<?, ?it/s]

20 17 True 0.46950754557323104
20 17 False -0.39519925126382066


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

120 1 True 0.16393440403900728
120 1 False 0.3621974212063328


  0%|          | 0/2 [00:00<?, ?it/s]

120 25 True 0.649863321759965
120 25 False 0.4813452590469126


  0%|          | 0/2 [00:00<?, ?it/s]

120 49 True 0.2327943130616561
120 49 False -0.13483594251217912


  0%|          | 0/2 [00:00<?, ?it/s]

120 73 True 0.01893373581414685
120 73 False -0.8631457671437897


  0%|          | 0/2 [00:00<?, ?it/s]

120 97 True 0.2518192125922907
120 97 False -0.5779448269048526


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

220 1 True 1.1245121348098372
220 1 False 0.6079379064289578


  0%|          | 0/2 [00:00<?, ?it/s]

220 45 True 0.3893515126809338
220 45 False 3.2553096158714344e-05


  0%|          | 0/2 [00:00<?, ?it/s]

220 89 True 0.520864971103561
220 89 False -0.8091705875496665


  0%|          | 0/2 [00:00<?, ?it/s]

220 133 True -0.0686378616227746
220 133 False -0.5905318796425928


  0%|          | 0/2 [00:00<?, ?it/s]

220 177 True -0.5941330744917603
220 177 False -0.09537779240558363


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

320 1 True 1.1013867149463357
320 1 False 0.5149638136947313


  0%|          | 0/2 [00:00<?, ?it/s]

320 65 True 0.5990285235784686
320 65 False -0.3679809225189313


  0%|          | 0/2 [00:00<?, ?it/s]

320 129 True -0.6701518975957211
320 129 False -0.4447814674498815


  0%|          | 0/2 [00:00<?, ?it/s]

320 193 True -1.2365182868528524
320 193 False 0.09681602177190594


  0%|          | 0/2 [00:00<?, ?it/s]

320 257 True -0.3114962022388775
320 257 False 0.12565916797707752


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

420 1 True 0.9737314188988818
420 1 False 0.6205677459310573


  0%|          | 0/2 [00:00<?, ?it/s]

420 85 True -0.23900840118619573
420 85 False -0.3083095874404663


  0%|          | 0/2 [00:00<?, ?it/s]

420 169 True -1.5272602505373485
420 169 False -0.3255643685124339


  0%|          | 0/2 [00:00<?, ?it/s]

420 253 True -0.9493214420197787
420 253 False -0.4495637445438915


  0%|          | 0/2 [00:00<?, ?it/s]

420 337 True -0.8860643499783846
420 337 False 0.2433462496739627


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

520 1 True 0.7730575382063822
520 1 False 0.43795037026503536


  0%|          | 0/2 [00:00<?, ?it/s]

520 105 True -0.5647885085400781
520 105 False -0.4054649808302586


  0%|          | 0/2 [00:00<?, ?it/s]

520 209 True -1.4827723795877696
520 209 False -0.7176910795860243


  0%|          | 0/2 [00:00<?, ?it/s]

520 313 True -1.0623781518652085
520 313 False -0.8652304199159835


  0%|          | 0/2 [00:00<?, ?it/s]

520 417 True -0.7912510035228278
520 417 False -0.37667763755781997


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

620 1 True 1.1992526873694003
620 1 False 0.2943264267809084


  0%|          | 0/2 [00:00<?, ?it/s]

620 125 True -0.04410899739552633
620 125 False 0.2738796093895155


  0%|          | 0/2 [00:00<?, ?it/s]

620 249 True -0.7555929177794315
620 249 False -0.5297760941383891


  0%|          | 0/2 [00:00<?, ?it/s]

620 373 True -1.0539629666558514
620 373 False -1.2991808205030069


  0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
print(perf)

In [None]:
bestFeatureDim_SVR = {}
for k in kernels:
    perf = sharpeRatioForSVR_featureDim[k]
    plt.plot(featureSizes, perf, label=k)
    bestFeatureDim_SVR[k] = featureSizes[np.argmax(perf)]
    print(f"Best feature dim for {k} kernel is: {bestFeatureDim_SVR[k]}")
    print(f"Best sharpe is {np.max(perf)}")
plt.legend()

In [None]:
sharpeRatioForSVR_windowsize_t1 = []
featureSize_t1 = 29
windowSize_t1 = [i for i in range(30, 200, 10)]
kernel = 'linear'
for windowSize in tqdm(windowSize_t1):
    bt_data = data.iloc[data[data['datetime'] >= '2017/1/1'].iloc[0].name-windowSize-1:,1:]
    signals = np.zeros((len(data[data['datetime'] >= '2017/1/1']), len(bt_data.columns)))
    dataloader = DataLoaderV2(bt_data ,feature_size=featureSize_t1, window_size=windowSize)
    for col_index, col_name in enumerate(bt_data.columns):
        for i in range(len(dataloader)):
            x, y = dataloader.__getitem__(i, col_index)
            y = y.reshape(-1)
            model = svm.SVR(kernel=kernel).fit(x, y)
            test_x = dataloader.get_test_axis_1(i, col_index).reshape(1, -1)
            signals[i, col_index] = model.predict(test_x)
    signals = np.where(signals > 0, 1, -1)
    sharpeRatioForSVR_windowsize_t1.append(sharpe_ratio(actual_returns, signals))

In [None]:
plt.plot(windowSize_t1, sharpeRatioForSVR_windowsize_t1)
bestWindowSize_29D = windowSize_t1[np.argmax(sharpeRatioForSVR_windowsize_t1)]
print(f"Best window size for 29D is: {bestWindowSize_29D}")
print(f"Best sharpe is {np.max(sharpeRatioForSVR_windowsize_t1)}")

In [None]:
bestFeatureDim_SVR = 29

## Regularization

In [None]:
sharpeRatioForSVR_regularization = {}
kernels = ["linear", 'poly', 'rbf', 'sigmoid']
c_candidate = [i/10. for i in range(1, 6)]
for k in kernels:
    sharpeRatioForSVR_regularization[k] = []
    for c in tqdm(c_candidate):
        bt_data = data.iloc[data[data['datetime'] >= '2017/1/1'].iloc[0].name-bestWindowSize_SVR-1:,1:]
        signals = np.zeros((len(data[data['datetime'] >= '2017/1/1']), len(bt_data.columns)))
        dataloader = DataLoaderV2(bt_data ,feature_size=bestFeatureDim_SVR, window_size=bestWindowSize_SVR)
        for col_index, col_name in enumerate(bt_data.columns):
            for i in range(len(dataloader)):
                x, y = dataloader.__getitem__(i, col_index)
                y = y.reshape(-1)
                model = svm.SVR(kernel=kernel, C=c).fit(x, y)
                test_x = dataloader.get_test_axis_1(i, col_index).reshape(1, -1)
                signals[i, col_index] = model.predict(test_x)
        signals = np.where(signals > 0, 1, -1)
        sharpeRatioForSVR_regularization[k].append(sharpe_ratio(actual_returns, signals))

In [None]:
for k in kernels:
    perf = sharpeRatioForSVR_regularization[k]
    plt.plot(c_candidate, perf, label=k)
    best_c = c_candidate[np.argmax(perf)]
    print(f"Best C for {k} kernel is: {best_c}")
    print(f"Best sharpe is {np.max(perf)}")
plt.legend()