In [1]:
%%capture
!pip install sagemaker==1.72.0

In [2]:
import source
import os
import warnings
import pandas as pd
warnings.filterwarnings("ignore")

In [3]:
%config InlineBackend.figure_format = 'svg'

In [4]:
data_dict = source.load_data()
data = data_dict['data']
symbols = data_dict['symbols']

In [5]:
collection = {}

<h2> SMA Evaluation </h2>

<h5> --- SMA Iteration 1 [SMA-1]</h5>

In [None]:
W = 3
target = 'price'
name = 'SMA-1'
sets = source.create.load_sets(data, target, W)

test_result = sets['test']['ori']
test_result['pred'] = test_result['price_mean']
test_result.set_index('time', drop=True, inplace=True)

trade_result = source.metric.trade(test_result, target)

print('RMSE: ', source.metric.RMSE(test_result, target))
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

<h5> --- SMA Iteration 2 [SMA-2]</h5>

In [None]:
W = 5
target = 'price'
name = 'SMA-2'
sets = source.create.load_sets(data, target, W)

test_result = sets['test']['ori']
test_result['pred'] = test_result['price_mean']
test_result.set_index('time', drop=True, inplace=True)

trade_result = source.metric.trade(test_result, target)

print('RMSE: ', source.metric.RMSE(test_result, target))
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

<h5> --- SMA Iteration 3 [SMA-3] - Best Model - Benchmark</h5>

In [None]:
W = 7
target = 'price'
name = 'SMA-3'
sets = source.create.load_sets(data, target, W)

test_result = sets['test']['ori']
test_result['pred'] = test_result['price_mean']
test_result.set_index('time', drop=True, inplace=True)

trade_result = source.metric.trade(test_result, target)

print('RMSE: ', source.metric.RMSE(test_result, target))
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result.to_csv(test_result_loc)

<h2> XGBoost Evaluation </h2>

<h5> --- XGBoost Iteration 1 [XGB-1]</h5>

In [None]:
W = 5
target = 'price'
name = 'XGB-1'

In [None]:
collection[name]= {}

hyperparams = {'max_depth':10,
               'eta':0.1,
               'gamma':0.2,
               'min_child_weight':5,
               'subsample':0.8,
               'objective':'reg:linear',
               'early_stopping_rounds':20,
               'num_round':300,
               'seed':100}

collection[name] = source.xgb(prefix=name,
                                           data=data,
                                           W=W,
                                           target=target,
                                           hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- XGBoost Iteration 2 [XGB-2]</h5>

In [None]:
W = 7
target = 'price'
name = 'XGB-2'

In [None]:
hyperparams = {'max_depth':10,
               'eta':0.1,
               'gamma':0.2,
               'min_child_weight':15,
               'subsample':0.8,
               'objective':'reg:linear',
               'early_stopping_rounds':20,
               'num_round':300,
               'seed':100}

collection[name] = source.xgb(prefix=name,
                                  data=data,
                                  W=W,
                                  target=target,
                                  hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- XGBoost Iteration 3 [XGB-3]</h5>

In [None]:
W = 5
target = 'price'
name = 'XGB-3'

In [None]:
hyperparams = {'max_depth':20,
               'eta':0.1,
               'gamma':0.2,
               'min_child_weight':5,
               'subsample':0.8,
               'objective':'reg:linear',
               'early_stopping_rounds':20,
               'num_round':300,
               'seed':100}

collection[name] = source.xgb(prefix=name,
                                  data=data,
                                  W=W,
                                  target=target,
                                  hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- XGBoost Iteration 4 [XGB-4]</h5>

In [None]:
W = 7
target = 'price'
name = 'XGB-4'

In [None]:
hyperparams = {'max_depth':20,
               'eta':0.1,
               'gamma':0.2,
               'min_child_weight':15,
               'subsample':0.8,
               'objective':'reg:linear',
               'early_stopping_rounds':20,
               'num_round':300,
               'seed':100}

collection[name] = source.xgb(prefix=name,
                                  data=data,
                                  W=W,
                                  target=target,
                                  hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result -' + name)

In [None]:
collection[name].cleanup()

<h2> Neural Net Evaluation </h2>

<h5> --- Neural Net Iteration 1 [NN-1]</h5>

In [None]:
W = 5
target = 'price'
name = 'NN-1'

In [None]:
hyperparams = {'hidden_layers': 50,
               'max_iter': 5000,
               'random_state': 100}
               
collection[name] = source.neuralnet(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- Neural Net Iteration 2 [NN-2]</h5>

In [None]:
W = 7
target = 'price'
name = 'NN-2'

In [None]:
hyperparams = {'hidden_layers': 50,
               'max_iter': 5000,
               'random_state': 100}
               
collection[name] = source.neuralnet(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- Neural Net Iteration 3 [NN-3]</h5>

In [None]:
W = 5
target = 'price'
name = 'NN-3'

In [None]:
hyperparams = {'hidden_layers': 100,
               'max_iter': 5000,
               'random_state': 100}
               
collection[name] = source.neuralnet(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- Neural Net Iteration 4 [NN-4]</h5>

In [None]:
W = 7
target = 'price'
name = 'NN-4'

In [None]:
hyperparams = {'hidden_layers': 100,
               'max_iter': 5000,
               'random_state': 100}
               
collection[name] = source.neuralnet(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h2> DeepAR Evaluation </h2>

<h5> --- DeepAR Iteration 1 [DA-1]</h5>

In [None]:
W = 5
target = 'price'
name = 'DA-1'

In [None]:
hyperparams = {
    "epochs": "500",
    "time_freq": 'D',
    "prediction_length": '1',
    "context_length": '30',
    "num_cells": "100",
    "num_layers": "4",
    "mini_batch_size": "128",
    "learning_rate": "0.1",
    "early_stopping_patience": "10"
}
               
collection[name] = source.deepar(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- DeepAR Iteration 2 [DA-2]</h5>

In [None]:
W = 7
target = 'price'
name = 'DA-2'

In [None]:
hyperparams = {
    "epochs": "500",
    "time_freq": 'D',
    "prediction_length": '1',
    "context_length": '30',
    "num_cells": "100",
    "num_layers": "4",
    "mini_batch_size": "128",
    "learning_rate": "0.1",
    "early_stopping_patience": "10"
}
               
collection[name] = source.deepar(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- DeepAR Iteration 3 [DA-3]</h5>

In [None]:
W = 5
target = 'price'
name = 'DA-3'

In [None]:
hyperparams = {
    "epochs": "500",
    "time_freq": 'D',
    "prediction_length": '1',
    "context_length": '30',
    "num_cells": "200",
    "num_layers": "4",
    "mini_batch_size": "128",
    "learning_rate": "0.1",
    "early_stopping_patience": "10"
}
               
collection[name] = source.deepar(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h5> --- DeepAR Iteration 4 [DA-4]</h5>

In [None]:
W = 7
target = 'price'
name = 'DA-4'

In [None]:
hyperparams = {
    "epochs": "500",
    "time_freq": 'D',
    "prediction_length": '1',
    "context_length": '30',
    "num_cells": "200",
    "num_layers": "4",
    "mini_batch_size": "128",
    "learning_rate": "0.1",
    "early_stopping_patience": "10"
}
               
collection[name] = source.deepar(prefix=name,
                        data=data,
                        W=W,
                        target=target,
                        hyperparams=hyperparams)

collection[name].fit()
collection[name].init_predictor()

In [None]:
trainval = collection[name].sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = collection[name].predict('test', unscaled=False)
test_result_unscaled = collection[name].predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
import os
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
collection[name].cleanup()

<h2> XGBoost Optimization </h2>

In [6]:
from sagemaker.tuner import IntegerParameter, ContinuousParameter
from sagemaker.estimator import Estimator

In [7]:
W = 7
target = 'price'
name = 'XGB-Optim'

In [8]:
hyperparams = {'max_depth':20,
               'eta':0.1,
               'gamma':0.2,
               'min_child_weight':15,
               'subsample':0.8,
               'objective':'reg:linear',
               'early_stopping_rounds':20,
               'num_round':300,
               'seed':100}

xgb_optim = source.xgb(prefix=name,
                           data=data,
                           W=W,
                           target=target,
                           hyperparams=hyperparams)

In [None]:
hyperparameter_ranges = {'max_depth': IntegerParameter(10, 20),
                         'eta'      : ContinuousParameter(0.05, 0.15),
                         'min_child_weight': IntegerParameter(10, 20),
                         'subsample': ContinuousParameter(0.7, 0.9),
                         'gamma': ContinuousParameter(0,0.5)}

tuner = xgb_optim.tuned_fit(hyperparameter_ranges)

In [None]:
xgb_optim.init_predictor()

In [None]:
trainval = xgb_optim.sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = xgb_optim.predict('test', unscaled=False)
test_result_unscaled = xgb_optim.predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC')

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear')

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result - ' + name)

In [None]:
import boto3
client = boto3.client('sagemaker')
training_job = str(tuner.best_training_job())
print(training_job)
print(client.describe_training_job(TrainingJobName=training_job)['HyperParameters'])

<b>Best training job:</b>
<br>
xgboost-210207-1105-016-7807dba6

<b>Best iteration hyperparameters:</b>
<br>
{'_tuning_objective_metric': 'validation:rmse',
 'early_stopping_rounds': '20',
 'eta': '0.05410745565672248',
 'gamma': '1.9164529523379112',
 'max_depth': '18',
 'min_child_weight': '2',
 'num_round': '300',
 'objective': 'reg:linear',
 'seed': '100',
 'subsample': '0.6339124917100728'}

In [9]:
xgb_optim.model = Estimator.attach('xgboost-210207-1105-016-7807dba6')

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


2021-02-07 11:19:21 Starting - Preparing the instances for training
2021-02-07 11:19:21 Downloading - Downloading input data
2021-02-07 11:19:21 Training - Training image download completed. Training in progress.
2021-02-07 11:19:21 Uploading - Uploading generated training model
2021-02-07 11:19:21 Completed - Training job completed[34mArguments: train[0m
[34m[2021-02-07:11:18:54:INFO] Running standalone xgboost training.[0m
[34m[2021-02-07:11:18:54:INFO] Setting up HPO optimized metric to be : rmse[0m
[34m[2021-02-07:11:18:54:INFO] File size need to be processed in the node: 33.83mb. Available memory size in the node: 8427.28mb[0m
[34m[2021-02-07:11:18:54:INFO] Determined delimiter of CSV input is ','[0m
[34m[11:18:54] S3DistributionType set as FullyReplicated[0m
[34m[11:18:54] 24436x49 matrix with 1197364 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2021-02-07:11:18:54:INFO] Determined delimiter of CSV input is ','[0m
[34

In [None]:
xgb_optim.init_predictor()

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.
Using already existing model: xgboost-210207-1105-016-7807dba6


---

In [None]:
trainval = xgb_optim.sets['trainval']['ori'].copy()
trainval.set_index('time', drop=True, inplace=True)

test_result = xgb_optim.predict('test', unscaled=False)
test_result_unscaled = xgb_optim.predict('test', unscaled=True)
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled.to_csv(test_result_loc)

In [None]:
test_result_loc = os.path.join('./results', name + '_unscaled.csv')
test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
trade_result = source.metric.trade(test_result_unscaled, target)

In [None]:
print('RMSE: ', source.metric.RMSE(test_result_unscaled, target))

In [None]:
source.plot.plot_results({"Test": test_result}, 'BTC Price (Scaled) - ' + name, 'BTC', figsize=(10,5))

In [None]:
source.plot.plot_results({"Train": trainval, 'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='log')

In [None]:
source.plot.plot_results({'Test': test_result_unscaled}, 'BTC Price (Unscaled) - ' + name, 'BTC', scale='linear', figsize=(10,5))

In [None]:
source.plot.plot_history({name: trade_result}, 'Trading Result -' + name)

In [None]:
xgb_optim.cleanup()

<h2> Trade Result Comparison </h2>

In [None]:
%%capture
models = ['SMA-3', 'XGB-2', 'NN-3', 'DA-1', 'XGB-Optim']
sets = {}

for name in models:
    test_result_loc = os.path.join('./results', name + '_unscaled.csv')
    test_result_unscaled = pd.read_csv(test_result_loc, parse_dates=['time'], index_col=0, keep_default_na=False, header=0)
    trade_result = source.metric.trade(test_result_unscaled, target)
    sets[name] = trade_result

source.plot.plot_history(sets, '%Returns', figsize=(10,6))