In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
import matplotlib.pyplot as plt
import seaborn
from tqdm.notebook import tqdm
import time
import gc

In [2]:
data = pd.read_csv("Workshop_Backtest.csv")

In [3]:
def sharpe_ratio(actualReturn, signals):
    assert actualReturn.shape == signals.shape
    dailyReturns = (actualReturn * signals)
    if(dailyReturns.shape[1] > 1):
        dailyReturns = dailyReturns.mean(axis=1)
    return dailyReturns.mean() / dailyReturns.std() * np.sqrt(240)

In [4]:
actual_returns = data[data['datetime'] >= '2017/1/1'].iloc[:, 1:]

In [27]:
data

Unnamed: 0,datetime,CF,SR,TA,al,au,c,cu,l,m,p,rb,ru,v,y,zn
0,2010/1/5,0.011700,0.010348,0.011220,0.048953,0.011921,0.012156,0.034943,0.005915,0.011528,0.012012,0.010009,0.028698,0.000648,0.010202,0.041029
1,2010/1/6,-0.024954,-0.035594,-0.031634,-0.043162,-0.016297,-0.009922,-0.028508,-0.033712,-0.030935,-0.040734,-0.033175,-0.010413,-0.019430,-0.033251,-0.038521
2,2010/1/7,0.006552,-0.029944,0.007801,0.009022,0.029701,0.015823,0.019672,0.006085,0.005376,0.006187,0.000446,0.018066,-0.004624,0.002293,0.002316
3,2010/1/8,0.003254,-0.000179,-0.007741,0.013691,-0.005040,-0.006231,-0.009486,-0.000806,-0.011698,-0.015651,0.001782,0.007605,-0.009954,-0.018302,-0.006932
4,2010/1/11,-0.025361,-0.007719,-0.013896,-0.049063,-0.026535,-0.014107,-0.037981,-0.050040,-0.029422,-0.037763,-0.020231,-0.027869,-0.022788,-0.031072,-0.046999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2785,2021/6/23,0.000945,-0.002906,-0.002797,-0.004259,-0.001391,-0.001161,-0.003336,0.008197,-0.003260,-0.015468,-0.006221,0.006939,-0.013458,-0.012977,-0.001369
2786,2021/6/24,0.008816,0.008927,0.036859,0.018979,-0.000107,0.002713,0.002329,0.013759,0.010110,0.022112,0.028271,0.007657,0.013642,0.019102,0.002742
2787,2021/6/25,0.002809,0.000181,0.002318,-0.011280,-0.003323,0.007731,-0.005663,0.016039,0.013247,0.003985,0.006088,-0.003419,0.016969,-0.010224,-0.010255
2788,2021/6/28,0.000000,0.007222,-0.016962,0.000796,-0.014412,0.002301,-0.007886,-0.006072,0.007844,0.012759,-0.010346,-0.018300,0.001726,0.012051,0.006447


In [6]:
# requirement:
# get multi-dimensional data
# get include other column data
# get test data

In [32]:
X = data.iloc[:, 1:].values

In [74]:
stride_array = np.lib.stride_tricks.sliding_window_view(X, (2, 15), axis=(0,1))

stride_array = stride_array.reshape(-1, 30)
stride_array.shape

(2789, 30)

In [75]:
stride_array[0:3]

array([[ 0.0117  ,  0.010348,  0.01122 ,  0.048953,  0.011921,  0.012156,
         0.034943,  0.005915,  0.011528,  0.012012,  0.010009,  0.028698,
         0.000648,  0.010202,  0.041029, -0.024954, -0.035594, -0.031634,
        -0.043162, -0.016297, -0.009922, -0.028508, -0.033712, -0.030935,
        -0.040734, -0.033175, -0.010413, -0.01943 , -0.033251, -0.038521],
       [-0.024954, -0.035594, -0.031634, -0.043162, -0.016297, -0.009922,
        -0.028508, -0.033712, -0.030935, -0.040734, -0.033175, -0.010413,
        -0.01943 , -0.033251, -0.038521,  0.006552, -0.029944,  0.007801,
         0.009022,  0.029701,  0.015823,  0.019672,  0.006085,  0.005376,
         0.006187,  0.000446,  0.018066, -0.004624,  0.002293,  0.002316],
       [ 0.006552, -0.029944,  0.007801,  0.009022,  0.029701,  0.015823,
         0.019672,  0.006085,  0.005376,  0.006187,  0.000446,  0.018066,
        -0.004624,  0.002293,  0.002316,  0.003254, -0.000179, -0.007741,
         0.013691, -0.00504 , -0.006

In [91]:
HDwindow = np.lib.stride_tricks.sliding_window_view(stride_array, (3, 30))
HDwindow = HDwindow.reshape(-1, 3, 30)

In [92]:
HDwindow.shape

(2787, 3, 30)

In [93]:
HDwindow[0]

array([[ 0.0117  ,  0.010348,  0.01122 ,  0.048953,  0.011921,  0.012156,
         0.034943,  0.005915,  0.011528,  0.012012,  0.010009,  0.028698,
         0.000648,  0.010202,  0.041029, -0.024954, -0.035594, -0.031634,
        -0.043162, -0.016297, -0.009922, -0.028508, -0.033712, -0.030935,
        -0.040734, -0.033175, -0.010413, -0.01943 , -0.033251, -0.038521],
       [-0.024954, -0.035594, -0.031634, -0.043162, -0.016297, -0.009922,
        -0.028508, -0.033712, -0.030935, -0.040734, -0.033175, -0.010413,
        -0.01943 , -0.033251, -0.038521,  0.006552, -0.029944,  0.007801,
         0.009022,  0.029701,  0.015823,  0.019672,  0.006085,  0.005376,
         0.006187,  0.000446,  0.018066, -0.004624,  0.002293,  0.002316],
       [ 0.006552, -0.029944,  0.007801,  0.009022,  0.029701,  0.015823,
         0.019672,  0.006085,  0.005376,  0.006187,  0.000446,  0.018066,
        -0.004624,  0.002293,  0.002316,  0.003254, -0.000179, -0.007741,
         0.013691, -0.00504 , -0.006

In [94]:
HDwindow[1]

array([[-0.024954, -0.035594, -0.031634, -0.043162, -0.016297, -0.009922,
        -0.028508, -0.033712, -0.030935, -0.040734, -0.033175, -0.010413,
        -0.01943 , -0.033251, -0.038521,  0.006552, -0.029944,  0.007801,
         0.009022,  0.029701,  0.015823,  0.019672,  0.006085,  0.005376,
         0.006187,  0.000446,  0.018066, -0.004624,  0.002293,  0.002316],
       [ 0.006552, -0.029944,  0.007801,  0.009022,  0.029701,  0.015823,
         0.019672,  0.006085,  0.005376,  0.006187,  0.000446,  0.018066,
        -0.004624,  0.002293,  0.002316,  0.003254, -0.000179, -0.007741,
         0.013691, -0.00504 , -0.006231, -0.009486, -0.000806, -0.011698,
        -0.015651,  0.001782,  0.007605, -0.009954, -0.018302, -0.006932],
       [ 0.003254, -0.000179, -0.007741,  0.013691, -0.00504 , -0.006231,
        -0.009486, -0.000806, -0.011698, -0.015651,  0.001782,  0.007605,
        -0.009954, -0.018302, -0.006932, -0.025361, -0.007719, -0.013896,
        -0.049063, -0.026535, -0.014

In [78]:
X = data["CF"]
stride_array_single = np.lib.stride_tricks.sliding_window_view(X, 10,axis=0)
stride_array_single.shape

(2781, 10)

In [103]:
stride_array_single[:3]

array([[ 0.0117  , -0.024954,  0.006552,  0.003254, -0.025361,  0.004236,
        -0.00934 , -0.009428,  0.009211, -0.005172],
       [-0.024954,  0.006552,  0.003254, -0.025361,  0.004236, -0.00934 ,
        -0.009428,  0.009211, -0.005172, -0.020489],
       [ 0.006552,  0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,
         0.009211, -0.005172, -0.020489,  0.013113]])

In [104]:
window_array_single = np.lib.stride_tricks.sliding_window_view(stride_array_single, (5, 10), axis=(0, 1))
window_array_single = window_array_single.reshape(-1, 5, 10)
window_array_single.shape

(2777, 5, 10)

In [105]:
window_array_single[0]

array([[ 0.0117  , -0.024954,  0.006552,  0.003254, -0.025361,  0.004236,
        -0.00934 , -0.009428,  0.009211, -0.005172],
       [-0.024954,  0.006552,  0.003254, -0.025361,  0.004236, -0.00934 ,
        -0.009428,  0.009211, -0.005172, -0.020489],
       [ 0.006552,  0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,
         0.009211, -0.005172, -0.020489,  0.013113],
       [ 0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,  0.009211,
        -0.005172, -0.020489,  0.013113, -0.000616],
       [-0.025361,  0.004236, -0.00934 , -0.009428,  0.009211, -0.005172,
        -0.020489,  0.013113, -0.000616, -0.006167]])

In [106]:
window_array_single[1]

array([[-0.024954,  0.006552,  0.003254, -0.025361,  0.004236, -0.00934 ,
        -0.009428,  0.009211, -0.005172, -0.020489],
       [ 0.006552,  0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,
         0.009211, -0.005172, -0.020489,  0.013113],
       [ 0.003254, -0.025361,  0.004236, -0.00934 , -0.009428,  0.009211,
        -0.005172, -0.020489,  0.013113, -0.000616],
       [-0.025361,  0.004236, -0.00934 , -0.009428,  0.009211, -0.005172,
        -0.020489,  0.013113, -0.000616, -0.006167],
       [ 0.004236, -0.00934 , -0.009428,  0.009211, -0.005172, -0.020489,
         0.013113, -0.000616, -0.006167, -0.010549]])

In [7]:
class SequenceDataLoader:
    def __init__(self, df, date_from, window_size, feature_size, include_other, col_name):
        
        self.feature_size = feature_size
        self.bt_index = df[df['datetime'] >= date_from].iloc[0].name
        self.window_size = window_size
        columns = len(df.columns) - 1

        if include_other:
            sample_shape = (feature_size, columns)
            # self.__getitem__ = _get_include_other
            self.X = np.lib.stride_tricks.sliding_window_view(df.iloc[:, 1:].values, sample_shape, axis=(0,1))
            self.X = self.X.reshape(-1, columns * feature_size)
            window_shape = (-1, window_size, feature_size*columns)
            self.X = self.X = np.lib.stride_tricks.sliding_window_view(self.X, window_shape[1:], axis=(0,1)) # (nWindow, nFeature, sWindow)
            self.X = self.X.reshape(window_shape)
        else:
            window_shape = (-1, window_size, feature_size)
            self.X = np.lib.stride_tricks.sliding_window_view(df[col_name].values, feature_size, axis=0) # (nWindow, nFeature)
            self.X = np.lib.stride_tricks.sliding_window_view(self.X, window_shape[1:], axis=(0,1)) # (nWindow, nFeature, sWindow)
            self.X = self.X.reshape(window_shape)
            
        self.y = df[col_name].iloc[].values
        
    def __len__(self):
        return self.window_num
    
    def _get_include_other(self, index):
    
    def _get_single_col(self, index):
        window_train_X = []

        actual_window_size = self.window_size if self.window_size <= self.bt_index + index else self.bt_index + index # 
        window_begin_index = self.bt_index + index - actual_window_size
        actual_feature_size = self.feature_size if self.feature_size < actual_window_size else actual_window_size - 1
        actual_sample_num = actual_window_size - actual_feature_size

        for s in range(actual_sample_num):
            window_train_X.append(self.X[window_begin_index + s: window_begin_index + actual_feature_size + s])
        window_train_X = np.array(window_train_X)
        if self.require_2d:
            window_train_X = window_train_X.reshape(window_train_X.shape[0], -1)
        window_train_y = self.y[window_begin_index + actual_feature_size: window_begin_index + actual_window_size].reshape(-1, 1)

        window_test_X = self.X[window_begin_index + actual_sample_num: window_begin_index + actual_window_size]
        if self.require_2d or len(window_test_X.shape) == 1:
            window_test_X = window_test_X.reshape(1, -1)
        window_test_y = self.y[window_begin_index + actual_window_size]
        
        return window_train_X, window_train_y, window_test_X, window_test_y

In [7]:
class SequenceDataLoader2:
    def __init__(self, df, date_from, window_size, feature_size, include_other, col_name, require_2D=False):
        self.date_from = date_from
        self.window_size = window_size
        self.feature_size = feature_size
        self.require_2d = require_2D
        
        self.bt_index = df[df['datetime'] >= date_from].iloc[0].name
        self.sample_num = window_size - feature_size
        self.window_num = len(df[df['datetime'] >= date_from])
        self.X = df.iloc[:, 1:].to_numpy() if include_other else df[col_name].to_numpy()
        self.y = df[col_name].to_numpy()
        
    def __len__(self):
        return self.window_num
    
    def __getitem__(self, index):
        window_train_X = []

        actual_window_size = self.window_size if self.window_size <= self.bt_index + index else self.bt_index + index # 
        window_begin_index = self.bt_index + index - actual_window_size
        actual_feature_size = self.feature_size if self.feature_size < actual_window_size else actual_window_size - 1
        actual_sample_num = actual_window_size - actual_feature_size

        for s in range(actual_sample_num):
            window_train_X.append(self.X[window_begin_index + s: window_begin_index + actual_feature_size + s])
        window_train_X = np.array(window_train_X)
        if self.require_2d:
            window_train_X = window_train_X.reshape(window_train_X.shape[0], -1)
        window_train_y = self.y[window_begin_index + actual_feature_size: window_begin_index + actual_window_size].reshape(-1, 1)

        window_test_X = self.X[window_begin_index + actual_sample_num: window_begin_index + actual_window_size]
        if self.require_2d or len(window_test_X.shape) == 1:
            window_test_X = window_test_X.reshape(1, -1)
        window_test_y = self.y[window_begin_index + actual_window_size]
        
        return window_train_X, window_train_y, window_test_X, window_test_y

In [8]:
def get_data(df, date_from, window_size, feature_size, include_other, col_name, require_2D=False):
    bt_index = df[df['datetime'] >= date_from].iloc[0].name
    sample_num = window_size - feature_size
    window_num = len(df[df['datetime'] >= date_from])
    
    train_X = []
    train_y = []
    test_X = []
    test_y = []
    
    X = df.iloc[:, 1:].to_numpy() if include_other else df[col_name].to_numpy()
    y = df[col_name].to_numpy()
    

    for w in range(window_num):
        window_train_X = []

        actual_window_size = window_size if window_size <= bt_index + w else bt_index + w # 
        window_begin_index = bt_index + w - actual_window_size
        actual_feature_size = feature_size if feature_size < actual_window_size else actual_window_size - 1
        actual_sample_num = actual_window_size - actual_feature_size

        for s in range(actual_sample_num):
            window_train_X.append(X[window_begin_index + s: window_begin_index + actual_feature_size + s])
        window_train_X = np.array(window_train_X)
        
        if require_2D:
            window_train_X = window_train_X.reshape(window_train_X.shape[0], -1)
        
        window_train_y = y[window_begin_index + actual_feature_size: window_begin_index + actual_window_size].reshape(-1, 1)

        window_test_X = X[window_begin_index + actual_sample_num: window_begin_index + actual_window_size].reshape(1, -1)
        if require_2D or len(window_test_X.shape) == 1:
            window_test_X = window_test_X.reshape(1, -1)
        window_test_y = y[window_begin_index + actual_window_size]

        train_X.append(window_train_X)
        train_y.append(window_train_y)
        test_X.append(window_test_X)
        test_y.append(window_test_y)

    return train_X, train_y, test_X, test_y

In [9]:
def evaluate_loader(estimator, data, date_from, window_size, feature_size, include_other, require_2D=False):
    # start_all = time.time()
    signal = np.zeros_like(actual_returns)
    # fit_time = []
    # round_time = []
    for col_idx, col_name in enumerate(actual_returns.columns):
        # train_X, train_y, test_X, test_y = get_data(data, date_from, window_size, feature_size, include_other, col_name)
        # round_begin = time.time()
        dataLoader = SequenceDataLoader(data, date_from, window_size, feature_size, include_other, col_name, require_2D=require_2D)
        col_signal = np.zeros((len(signal)))
        for i in range(len(dataLoader)):
            train_X, train_y, test_X, test_y = dataLoader[i]
            # fit_begin = time.time()
            estimator.fit(train_X, train_y)
            col_signal[i] = estimator.predict(test_X)
            # fit_end = time.time()
            # fit_time.append(fit_end-fit_begin)
        signal[:,col_idx] = col_signal
        # round_end = time.time()
        # round_time.append(round_end-round_begin)
    signal = np.where(signal > 0, 1, -1)
    # end_all = time.time()
    # print(f"all time {end_all-start_all}, round time {np.mean(round_time)}, fit time {np.mean(fit_time)}")
    return sharpe_ratio(actual_returns, signal)

In [10]:
def evaluate(estimator, data_arrays):
    # start_all = time.time()
    signal = np.zeros_like(actual_returns)
    # fit_time = []
    # round_time = []
    for col_idx, col_name in enumerate(actual_returns.columns):
        # round_begin = time.time()
        train_X, train_y, test_X, test_y = data_arrays[col_idx]
        col_signal = np.zeros((len(signal)))
        for i in range(len(test_y)):
            # fit_begin = time.time()
            estimator.fit(train_X[i], train_y[i])
            col_signal[i] = estimator.predict(test_X[i])
            # fit_end = time.time()
            # fit_time.append(fit_end-fit_begin)
        signal[:,col_idx] = col_signal
        # round_end = time.time()
        # round_time.append(round_end-round_begin)
    signal = np.where(signal > 0, 1, -1)
    # end_all = time.time()
    # print(f"all time {end_all-start_all}, round time {np.mean(round_time)}, fit time {np.mean(fit_time)}")
    return sharpe_ratio(actual_returns, signal)

In [11]:
# start = time.time()
# data_array = []
# for i in actual_returns.columns:
#     data_array.append(get_data(data, '2017/1/1', 450, 1, False, i))
# end = time.time()
# print(end-start)

In [12]:
# evaluate(LinearRegression(), data_array)

In [13]:
# evaluate(LinearRegression(), data, '2017/1/1', 450, 1, False)

In [14]:
# # param:
# #   window size
# #   feature size
# #   include other?
# #   L1, L2
# #   
# perf = {}
# date_from = '2017/1/1'
# for window_size in tqdm(range(20, len(data), 50), position=0, leave=False): # window_size
#     perf[window_size] = {}
#     for feature_size in tqdm(range(1, window_size, 10),position=1, leave=False): # feature_size
#         perf[window_size][feature_size] = {}
#         for include_other in tqdm((True, False),position=2, leave=False):
#             include_other_name = "T" if include_other else "F"
#             data_arrays = []
#             for col in actual_returns.columns:
#                 data_arrays.append(get_data(data, date_from, ))
#             perf[window_size][feature_size][include_other_name] = {}
#             perf[window_size][feature_size][include_other_name]["None"] = evaluate(LinearRegression(), data, date_from, window_size, feature_size, include_other, require_2D=True)
#             perf[window_size][feature_size][include_other_name]["L1"] = {}
#             for i in tqdm(range(5),position=3, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L1"][c] = evaluate(Lasso(alpha=c), data, date_from, window_size, feature_size, include_other, require_2D=True)
                
#             perf[window_size][feature_size][include_other_name]["L2"] = {}
#             for i in tqdm(range(5),position=4, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L2"][c] = evaluate(Ridge(alpha=c), data, date_from, window_size, feature_size, include_other, require_2D=True)

In [18]:
# param:
#   window size
#   feature size
#   include other?
#   L1, L2
#   
perf = {}
date_from = '2017/1/1'
for window_size in tqdm(range(20, len(data), 100), position=0, leave=False): # window_size
    perf[window_size] = {}
    interval = int(window_size / 5)
    for feature_size in tqdm(range(1, window_size, interval),position=1, leave=False): # feature_size
        perf[window_size][feature_size] = {}
        for include_other in tqdm((True, False),position=2, leave=False):
            include_other_name = "T" if include_other else "F"
            # data_array = []
            # for i in actual_returns.columns:
            #     data_array.append(get_data(data, date_from, window_size, feature_size, include_other, i, require_2D=True))
            # perf[window_size][feature_size][include_other_name] = {}
            # perf[window_size][feature_size][include_other_name]["None"] = evaluate(LinearRegression(), data_array)
            # perf[window_size][feature_size][include_other_name] = evaluate(LinearRegression(), data_array)
            perf[window_size][feature_size][include_other_name] = evaluate_loader(LinearRegression(n_jobs=-1), data, date_from, window_size, feature_size, include_other, require_2D=True)
            print(f"{window_size} {feature_size} {include_other} {perf[window_size][feature_size][include_other_name]}")
            # perf[window_size][feature_size][include_other_name]["L1"] = {}
#             for i in tqdm(range(5),position=3, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L1"][c] = evaluate(Lasso(alpha=c), data_array)
                
#             perf[window_size][feature_size][include_other_name]["L2"] = {}
#             for i in tqdm(range(5),position=4, leave=False):
#                 c = 10 ** (-i)
#                 perf[window_size][feature_size][include_other_name]["L2"][c] = evaluate(Ridge(alpha=c), data_array)
            # del data_array
            # gc.collect()

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

20 1 True -0.023914614476982034
20 1 False 0.3376670805595338


  0%|          | 0/2 [00:00<?, ?it/s]

20 5 True 0.4245809336594035
20 5 False -0.6730519711455123


  0%|          | 0/2 [00:00<?, ?it/s]

20 9 True 0.004587367703742813
20 9 False -0.13331851305310674


  0%|          | 0/2 [00:00<?, ?it/s]

20 13 True 0.2637674103265159
20 13 False -0.7915778437138901


  0%|          | 0/2 [00:00<?, ?it/s]

20 17 True 0.7477421890911298
20 17 False 0.5926584870867033


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

120 1 True 0.3704319910190538
120 1 False 0.31636419978649655


  0%|          | 0/2 [00:00<?, ?it/s]

120 25 True 0.42672831485193413
120 25 False -0.03684074732613662


  0%|          | 0/2 [00:00<?, ?it/s]

120 49 True -0.5464305791882368
120 49 False 0.16198097675868933


  0%|          | 0/2 [00:00<?, ?it/s]

120 73 True -0.22544849908485554
120 73 False 0.034340525191703906


  0%|          | 0/2 [00:00<?, ?it/s]

120 97 True 0.14160683425371928
120 97 False 0.5273432335113816


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

220 1 True 1.0753666221967655
220 1 False 0.6049504306890393


  0%|          | 0/2 [00:00<?, ?it/s]

220 45 True 0.3744808670423521
220 45 False -0.18516511650926984


  0%|          | 0/2 [00:00<?, ?it/s]

220 89 True 0.16740010127411517
220 89 False -1.223683033088171


  0%|          | 0/2 [00:00<?, ?it/s]

220 133 True -0.10949220054902756
220 133 False -1.016676272269851


  0%|          | 0/2 [00:00<?, ?it/s]

220 177 True -0.24549410840066863
220 177 False -0.21476645828497093


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

320 1 True 0.9368487856816016
320 1 False 0.5594516178143129


  0%|          | 0/2 [00:00<?, ?it/s]

320 65 True 0.36489272809206297
320 65 False -0.285232964605806


  0%|          | 0/2 [00:00<?, ?it/s]

320 129 True -0.782765044546158
320 129 False -0.10141402564644211


  0%|          | 0/2 [00:00<?, ?it/s]

320 193 True -0.6930464017225149
320 193 False -0.7053698944261311


  0%|          | 0/2 [00:00<?, ?it/s]

320 257 True -0.30378935824991987
320 257 False -0.0973557435828054


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

420 1 True 1.0069769353400968
420 1 False 0.5811384867395044


  0%|          | 0/2 [00:00<?, ?it/s]

420 85 True -0.4125182046709498
420 85 False -0.5077421607549962


  0%|          | 0/2 [00:00<?, ?it/s]

420 169 True -0.5087746793396947
420 169 False 0.6498883533344653


  0%|          | 0/2 [00:00<?, ?it/s]

420 253 True -0.47915540355824127
420 253 False 0.46743523438262946


  0%|          | 0/2 [00:00<?, ?it/s]

420 337 True -0.029957029464337894
420 337 False -0.5656498758518459


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

520 1 True 0.7047639865651654
520 1 False 0.4430098843151339


  0%|          | 0/2 [00:00<?, ?it/s]

520 105 True -0.5355582068039819
520 105 False -0.5030125303759236


  0%|          | 0/2 [00:00<?, ?it/s]

520 209 True -0.9676945769664718
520 209 False -0.15616222074529645


  0%|          | 0/2 [00:00<?, ?it/s]

520 313 True -0.5359014186370542
520 313 False -0.27544546580650736


  0%|          | 0/2 [00:00<?, ?it/s]

520 417 True 0.0031576755837213837
520 417 False -0.8227087808415442


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

620 1 True 1.1383305478079337
620 1 False 0.2826685597969747


  0%|          | 0/2 [00:00<?, ?it/s]

620 125 True -0.6453274082363899
620 125 False -0.4627719095913462


  0%|          | 0/2 [00:00<?, ?it/s]

620 249 True -0.7403257851549248
620 249 False -0.5278813642564134


  0%|          | 0/2 [00:00<?, ?it/s]

620 373 True -0.1949039464806376
620 373 False -0.7908385664887485


  0%|          | 0/2 [00:00<?, ?it/s]

620 497 True -0.14060623047070836
620 497 False -0.16811693773612751


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

720 1 True 1.2447581997301216
720 1 False -0.035224034301786326


  0%|          | 0/2 [00:00<?, ?it/s]

720 145 True -0.8904721869598198
720 145 False -0.4832027034127437


  0%|          | 0/2 [00:00<?, ?it/s]

720 289 True -0.7404795180565876


KeyboardInterrupt: 

In [None]:
print(perf)

In [None]:
bestFeatureDim_SVR = {}
for k in kernels:
    perf = sharpeRatioForSVR_featureDim[k]
    plt.plot(featureSizes, perf, label=k)
    bestFeatureDim_SVR[k] = featureSizes[np.argmax(perf)]
    print(f"Best feature dim for {k} kernel is: {bestFeatureDim_SVR[k]}")
    print(f"Best sharpe is {np.max(perf)}")
plt.legend()

In [None]:
sharpeRatioForSVR_windowsize_t1 = []
featureSize_t1 = 29
windowSize_t1 = [i for i in range(30, 200, 10)]
kernel = 'linear'
for windowSize in tqdm(windowSize_t1):
    bt_data = data.iloc[data[data['datetime'] >= '2017/1/1'].iloc[0].name-windowSize-1:,1:]
    signals = np.zeros((len(data[data['datetime'] >= '2017/1/1']), len(bt_data.columns)))
    dataloader = DataLoaderV2(bt_data ,feature_size=featureSize_t1, window_size=windowSize)
    for col_index, col_name in enumerate(bt_data.columns):
        for i in range(len(dataloader)):
            x, y = dataloader.__getitem__(i, col_index)
            y = y.reshape(-1)
            model = svm.SVR(kernel=kernel).fit(x, y)
            test_x = dataloader.get_test_axis_1(i, col_index).reshape(1, -1)
            signals[i, col_index] = model.predict(test_x)
    signals = np.where(signals > 0, 1, -1)
    sharpeRatioForSVR_windowsize_t1.append(sharpe_ratio(actual_returns, signals))

In [None]:
plt.plot(windowSize_t1, sharpeRatioForSVR_windowsize_t1)
bestWindowSize_29D = windowSize_t1[np.argmax(sharpeRatioForSVR_windowsize_t1)]
print(f"Best window size for 29D is: {bestWindowSize_29D}")
print(f"Best sharpe is {np.max(sharpeRatioForSVR_windowsize_t1)}")

In [None]:
bestFeatureDim_SVR = 29

## Regularization

In [None]:
sharpeRatioForSVR_regularization = {}
kernels = ["linear", 'poly', 'rbf', 'sigmoid']
c_candidate = [i/10. for i in range(1, 6)]
for k in kernels:
    sharpeRatioForSVR_regularization[k] = []
    for c in tqdm(c_candidate):
        bt_data = data.iloc[data[data['datetime'] >= '2017/1/1'].iloc[0].name-bestWindowSize_SVR-1:,1:]
        signals = np.zeros((len(data[data['datetime'] >= '2017/1/1']), len(bt_data.columns)))
        dataloader = DataLoaderV2(bt_data ,feature_size=bestFeatureDim_SVR, window_size=bestWindowSize_SVR)
        for col_index, col_name in enumerate(bt_data.columns):
            for i in range(len(dataloader)):
                x, y = dataloader.__getitem__(i, col_index)
                y = y.reshape(-1)
                model = svm.SVR(kernel=kernel, C=c).fit(x, y)
                test_x = dataloader.get_test_axis_1(i, col_index).reshape(1, -1)
                signals[i, col_index] = model.predict(test_x)
        signals = np.where(signals > 0, 1, -1)
        sharpeRatioForSVR_regularization[k].append(sharpe_ratio(actual_returns, signals))

In [None]:
for k in kernels:
    perf = sharpeRatioForSVR_regularization[k]
    plt.plot(c_candidate, perf, label=k)
    best_c = c_candidate[np.argmax(perf)]
    print(f"Best C for {k} kernel is: {best_c}")
    print(f"Best sharpe is {np.max(perf)}")
plt.legend()