From 4f317f2fa2efecc5d565c0f7370d82997c5289fc Mon Sep 17 00:00:00 2001 From: lwwang1995 Date: Mon, 30 Nov 2020 18:05:41 +0800 Subject: [PATCH 1/4] Delete Alpha360_denoise --- qlib/contrib/data/handler.py | 73 ------------------------------------ 1 file changed, 73 deletions(-) diff --git a/qlib/contrib/data/handler.py b/qlib/contrib/data/handler.py index e97b00c241..6b3de336dc 100644 --- a/qlib/contrib/data/handler.py +++ b/qlib/contrib/data/handler.py @@ -32,79 +32,6 @@ def check_transform_proc(proc_l, fit_start_time, fit_end_time): return new_l -class ALPHA360_Denoise(DataHandlerLP): - def __init__(self, instruments="csi500", start_time=None, end_time=None, fit_start_time=None, fit_end_time=None): - data_loader = { - "class": "QlibDataLoader", - "kwargs": { - "config": { - "feature": self.get_feature_config(), - "label": self.get_label_config(), - }, - }, - } - - learn_processors = [ - {"class": "DropnaLabel", "kwargs": {"fields_group": "label"}}, - {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}}, - ] - infer_processors = [ - {"class": "ProcessInf", "kwargs": {}}, - {"class": "TanhProcess", "kwargs": {}}, - {"class": "Fillna", "kwargs": {}}, - ] - - super().__init__( - instruments, - start_time, - end_time, - data_loader=data_loader, - learn_processors=learn_processors, - infer_processors=infer_processors, - ) - - def get_label_config(self): - return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]) - - def get_feature_config(self): - - fields = [] - names = [] - - for i in range(59, 0, -1): - fields += ["Ref($close, %d)/$close" % (i)] - names += ["CLOSE%d" % (i)] - fields += ["$close/$close"] - names += ["CLOSE0"] - for i in range(59, 0, -1): - fields += ["Ref($open, %d)/$close" % (i)] - names += ["OPEN%d" % (i)] - fields += ["$open/$close"] - names += ["OPEN0"] - for i in range(59, 0, -1): - fields += ["Ref($high, %d)/$close" % (i)] - names += ["HIGH%d" % (i)] - fields += ["$high/$close"] - names += ["HIGH0"] - for i in range(59, 0, -1): - fields += ["Ref($low, %d)/$close" % (i)] - names += ["LOW%d" % (i)] - fields += ["$low/$close"] - names += ["LOW0"] - for i in range(59, 0, -1): - fields += ["Ref($vwap, %d)/$close" % (i)] - names += ["VWAP%d" % (i)] - fields += ["$vwap/$close"] - names += ["VWAP0"] - for i in range(59, 0, -1): - fields += ["Ref($volume, %d)/$volume" % (i)] - names += ["VOLUME%d" % (i)] - fields += ["$volume/$volume"] - names += ["VOLUME0"] - - return fields, names - - _DEFAULT_LEARN_PROCESSORS = [ {"class": "DropnaLabel"}, {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}}, From 5e53fa32e0d25fecd06cbb7e33ec30cdcab10b86 Mon Sep 17 00:00:00 2001 From: lwwang1995 Date: Mon, 30 Nov 2020 20:51:19 +0800 Subject: [PATCH 2/4] Add random seed. --- qlib/contrib/model/pytorch_alstm.py | 3 +++ qlib/contrib/model/pytorch_gru.py | 3 +++ qlib/contrib/model/pytorch_lstm.py | 3 +++ qlib/contrib/model/pytorch_nn.py | 7 +++++++ qlib/contrib/model/pytorch_sfm.py | 3 +++ 5 files changed, 19 insertions(+) diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index 40c2f82266..80393c3ecc 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -113,6 +113,9 @@ def __init__( ) ) + np.random.seed(self.seed) + torch.manual_seed(self.seed) + self.ALSTM_model = ALSTMModel( d_feat=self.d_feat, hidden_size=self.hidden_size, diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 5daf4707ed..4ef4e25531 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -113,6 +113,9 @@ def __init__( ) ) + np.random.seed(self.seed) + torch.manual_seed(self.seed) + self.gru_model = GRUModel( d_feat=self.d_feat, hidden_size=self.hidden_size, diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index eef1680ecd..59df51e427 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -113,6 +113,9 @@ def __init__( ) ) + np.random.seed(self.seed) + torch.manual_seed(self.seed) + self.lstm_model = LSTMModel( d_feat=self.d_feat, hidden_size=self.hidden_size, diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index d324e27aac..a60227ff0e 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -61,6 +61,7 @@ def __init__( optimizer="gd", loss="mse", GPU="0", + seed=0, **kwargs ): # Set logger. @@ -80,6 +81,7 @@ def __init__( self.loss_type = loss self.visible_GPU = GPU self.use_GPU = torch.cuda.is_available() + self.seed = seed self.logger.info( "DNN parameters setting:" @@ -94,6 +96,7 @@ def __init__( "\noptimizer : {}" "\nloss_type : {}" "\neval_steps : {}" + "\nseed : {}" "\nvisible_GPU : {}" "\nuse_GPU : {}".format( layers, @@ -107,11 +110,15 @@ def __init__( optimizer, loss, eval_steps, + seed, GPU, self.use_GPU, ) ) + np.random.seed(self.seed) + torch.manual_seed(self.seed) + if loss not in {"mse", "binary"}: raise NotImplementedError("loss {} is not supported!".format(loss)) self._scorer = mean_squared_error if loss == "mse" else roc_auc_score diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index 228c0aee5d..d845f62452 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -282,6 +282,9 @@ def __init__( ) ) + np.random.seed(self.seed) + torch.manual_seed(self.seed) + self.sfm_model = SFM_Model( d_feat=self.d_feat, output_dim=self.output_dim, From f35b3d303b84d168593af5264f5b635d7206d76d Mon Sep 17 00:00:00 2001 From: lwwang1995 Date: Mon, 30 Nov 2020 20:55:55 +0800 Subject: [PATCH 3/4] Update format. --- qlib/contrib/model/pytorch_alstm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index 80393c3ecc..ba75c70caa 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -115,7 +115,7 @@ def __init__( np.random.seed(self.seed) torch.manual_seed(self.seed) - + self.ALSTM_model = ALSTMModel( d_feat=self.d_feat, hidden_size=self.hidden_size, From 0feb44fa1426b021267d5e6a99147b6ba1bee615 Mon Sep 17 00:00:00 2001 From: meng-ustc Date: Mon, 30 Nov 2020 22:26:59 +0800 Subject: [PATCH 4/4] Add model_path param to gats --- .../benchmarks/GATs/workflow_config_gats.yaml | 2 ++ qlib/contrib/model/pytorch_gats.py | 29 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/examples/benchmarks/GATs/workflow_config_gats.yaml b/examples/benchmarks/GATs/workflow_config_gats.yaml index c38b4b3129..ef2bbd1864 100644 --- a/examples/benchmarks/GATs/workflow_config_gats.yaml +++ b/examples/benchmarks/GATs/workflow_config_gats.yaml @@ -53,6 +53,8 @@ task: metric: loss loss: mse base_model: LSTM + with_pretrain: True + model_path: "benchmarks/LSTM/model_lstm_csi300.pkl" seed: 0 GPU: 0 dataset: diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index e9cbcf9cb0..2e19c0542c 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -9,10 +9,15 @@ import numpy as np import pandas as pd import copy -from ...utils import create_save_path -from ...log import get_module_logger - - +from sklearn.metrics import roc_auc_score, mean_squared_error +import logging +from ...utils import ( + unpack_archive_with_buffer, + save_multiple_parts_file, + create_save_path, + drop_nan_by_y_index, +) +from ...log import get_module_logger, TimeInspector import torch import torch.nn as nn import torch.optim as optim @@ -54,6 +59,7 @@ def __init__( loss="mse", base_model="GRU", with_pretrain=True, + model_path=None, optimizer="adam", GPU="0", seed=0, @@ -76,6 +82,7 @@ def __init__( self.loss = loss self.base_model = base_model self.with_pretrain = with_pretrain + self.model_path = model_path self.visible_GPU = GPU self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -94,6 +101,7 @@ def __init__( "\nloss_type : {}" "\nbase_model : {}" "\nwith_pretrain : {}" + "\nmodel_path : {}" "\nvisible_GPU : {}" "\nuse_GPU : {}" "\nseed : {}".format( @@ -109,12 +117,14 @@ def __init__( loss, base_model, with_pretrain, + model_path, GPU, self.use_gpu, seed, ) ) - + np.random.seed(self.seed) + torch.manual_seed(self.seed) self.GAT_model = GATModel( d_feat=self.d_feat, hidden_size=self.hidden_size, @@ -254,14 +264,17 @@ def fit( # load pretrained base_model if self.with_pretrain: + if self.model_path == None: + raise ValueError("the path of the pretrained model should be given first!") self.logger.info("Loading pretrained model...") if self.base_model == "LSTM": pretrained_model = LSTMModel() - pretrained_model.load_state_dict(torch.load("benchmarks/LSTM/model_lstm_csi300.pkl")) - + pretrained_model.load_state_dict(torch.load(self.model_path)) elif self.base_model == "GRU": pretrained_model = GRUModel() - pretrained_model.load_state_dict(torch.load("benchmarks/GRU/model_gru_csi300.pkl")) + pretrained_model.load_state_dict(torch.load(self.model_path)) + else: + raise ValueError("unknown base model name `%s`" % self.base_model) model_dict = self.GAT_model.state_dict() pretrained_dict = {k: v for k, v in pretrained_model.state_dict().items() if k in model_dict}