diff --git a/.gitignore b/.gitignore index 0ddd5d21fc..33a2a25303 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,5 @@ tags .vscode/ *.swp + +./pretrain diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index c5bfced6dc..f1e7437fa1 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -17,6 +17,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 | | GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 | | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 | + ## Alpha158 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | |---|---|---|---|---|---|---|---|---| @@ -25,7 +26,6 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 | | LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 | | MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 | -| TabNet with pretrain (Sercan O. Arikm et al) | Alpha158 | 0.0344±0.00|0.205±0.11|0.0398±0.00 |0.3479±0.01|0.0827±0.02|1.1141±0.32 |-0.0925±0.02 | | TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 | | GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 | | LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 | diff --git a/examples/benchmarks/TabNet/pretrain/best.model b/examples/benchmarks/TabNet/pretrain/best.model deleted file mode 100644 index a85cbe392a..0000000000 Binary files a/examples/benchmarks/TabNet/pretrain/best.model and /dev/null differ diff --git a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml index 4e9f0e7e94..243505a895 100644 --- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml +++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml @@ -55,7 +55,7 @@ task: kwargs: *data_handler_config segments: pretrain: [2008-01-01, 2014-12-31] - pretrain_validation: [2015-01-01, 2020-08-01] + pretrain_validation: [2015-01-01, 2016-12-31] train: [2008-01-01, 2014-12-31] valid: [2015-01-01, 2016-12-31] test: [2017-01-01, 2020-08-01] diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index 0cb09a05ae..3c008ae9aa 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -78,7 +78,6 @@ def __init__( self.optimizer = optimizer.lower() self.loss = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -94,7 +93,7 @@ def __init__( "\nearly_stop : {}" "\noptimizer : {}" "\nloss_type : {}" - "\nvisible_GPU : {}" + "\ndevice : {}" "\nuse_GPU : {}" "\nseed : {}".format( d_feat, @@ -108,7 +107,7 @@ def __init__( early_stop, optimizer.lower(), loss, - GPU, + self.device, self.use_gpu, seed, ) @@ -137,6 +136,10 @@ def __init__( self.fitted = False self.ALSTM_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -205,12 +208,13 @@ def test_epoch(self, data_x, data_y): feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device) label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device) - pred = self.ALSTM_model(feature) - loss = self.loss_fn(pred, label) - losses.append(loss.item()) + with torch.no_grad(): + pred = self.ALSTM_model(feature) + loss = self.loss_fn(pred, label) + losses.append(loss.item()) - score = self.metric_fn(pred, label) - scores.append(score.item()) + score = self.metric_fn(pred, label) + scores.append(score.item()) return np.mean(losses), np.mean(scores) @@ -292,10 +296,7 @@ def predict(self, dataset): x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.ALSTM_model(x_batch).detach().cpu().numpy() - else: - pred = self.ALSTM_model(x_batch).detach().numpy() + pred = self.ALSTM_model(x_batch).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index fe562fd1ce..eb6e856efe 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -81,7 +81,6 @@ def __init__( self.loss = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -97,7 +96,7 @@ def __init__( "\nearly_stop : {}" "\noptimizer : {}" "\nloss_type : {}" - "\nvisible_GPU : {}" + "\ndevice : {}" "\nn_jobs : {}" "\nuse_GPU : {}" "\nseed : {}".format( @@ -112,7 +111,7 @@ def __init__( early_stop, optimizer.lower(), loss, - GPU, + self.device, n_jobs, self.use_gpu, seed, @@ -142,6 +141,10 @@ def __init__( self.fitted = False self.ALSTM_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -192,12 +195,13 @@ def test_epoch(self, data_loader): # feature[torch.isnan(feature)] = 0 label = data[:, -1, -1].to(self.device) - pred = self.ALSTM_model(feature.float()) - loss = self.loss_fn(pred, label) - losses.append(loss.item()) + with torch.no_grad(): + pred = self.ALSTM_model(feature.float()) + loss = self.loss_fn(pred, label) + losses.append(loss.item()) - score = self.metric_fn(pred, label) - scores.append(score.item()) + score = self.metric_fn(pred, label) + scores.append(score.item()) return np.mean(losses), np.mean(scores) @@ -277,10 +281,7 @@ def predict(self, dataset): feature = data[:, :, 0:-1].to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.ALSTM_model(feature.float()).detach().cpu().numpy() - else: - pred = self.ALSTM_model(feature.float()).detach().numpy() + pred = self.ALSTM_model(feature.float()).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index b5330146fb..4edbc8bcff 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -103,7 +103,7 @@ def __init__( "\nbase_model : {}" "\nwith_pretrain : {}" "\nmodel_path : {}" - "\nvisible_GPU : {}" + "\ndevice : {}" "\nuse_GPU : {}" "\nseed : {}".format( d_feat, @@ -119,7 +119,7 @@ def __init__( base_model, with_pretrain, model_path, - GPU, + self.device, self.use_gpu, seed, ) @@ -149,6 +149,10 @@ def __init__( self.fitted = False self.GAT_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -326,10 +330,7 @@ def predict(self, dataset): x_batch = torch.from_numpy(x_values[batch]).float().to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.GAT_model(x_batch).detach().cpu().numpy() - else: - pred = self.GAT_model(x_batch).detach().numpy() + pred = self.GAT_model(x_batch).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index 369d1ca7fe..dd83c00f9c 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -107,7 +107,6 @@ def __init__( self.model_path = model_path self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -171,6 +170,10 @@ def __init__( self.fitted = False self.GAT_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -347,10 +350,7 @@ def predict(self, dataset): feature = data[:, :, 0:-1].to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.GAT_model(feature.float()).detach().cpu().numpy() - else: - pred = self.GAT_model(feature.float()).detach().numpy() + pred = self.GAT_model(feature.float()).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 697b71cc9a..0070d18113 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -78,7 +78,6 @@ def __init__( self.optimizer = optimizer.lower() self.loss = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -137,6 +136,10 @@ def __init__( self.fitted = False self.gru_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -205,12 +208,13 @@ def test_epoch(self, data_x, data_y): feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device) label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device) - pred = self.gru_model(feature) - loss = self.loss_fn(pred, label) - losses.append(loss.item()) + with torch.no_grad(): + pred = self.gru_model(feature) + loss = self.loss_fn(pred, label) + losses.append(loss.item()) - score = self.metric_fn(pred, label) - scores.append(score.item()) + score = self.metric_fn(pred, label) + scores.append(score.item()) return np.mean(losses), np.mean(scores) @@ -292,10 +296,7 @@ def predict(self, dataset): x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.gru_model(x_batch).detach().cpu().numpy() - else: - pred = self.gru_model(x_batch).detach().numpy() + pred = self.gru_model(x_batch).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index 483f419ce6..4553c7537f 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -81,7 +81,6 @@ def __init__( self.loss = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -97,7 +96,7 @@ def __init__( "\nearly_stop : {}" "\noptimizer : {}" "\nloss_type : {}" - "\nvisible_GPU : {}" + "\ndevice : {}" "\nn_jobs : {}" "\nuse_GPU : {}" "\nseed : {}".format( @@ -112,7 +111,7 @@ def __init__( early_stop, optimizer.lower(), loss, - GPU, + self.device, n_jobs, self.use_gpu, seed, @@ -142,6 +141,10 @@ def __init__( self.fitted = False self.GRU_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -192,12 +195,13 @@ def test_epoch(self, data_loader): # feature[torch.isnan(feature)] = 0 label = data[:, -1, -1].to(self.device) - pred = self.GRU_model(feature.float()) - loss = self.loss_fn(pred, label) - losses.append(loss.item()) + with torch.no_grad(): + pred = self.GRU_model(feature.float()) + loss = self.loss_fn(pred, label) + losses.append(loss.item()) - score = self.metric_fn(pred, label) - scores.append(score.item()) + score = self.metric_fn(pred, label) + scores.append(score.item()) return np.mean(losses), np.mean(scores) @@ -277,10 +281,7 @@ def predict(self, dataset): feature = data[:, :, 0:-1].to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.GRU_model(feature.float()).detach().cpu().numpy() - else: - pred = self.GRU_model(feature.float()).detach().numpy() + pred = self.GRU_model(feature.float()).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 648a909c74..c7385c6a7c 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -77,7 +77,6 @@ def __init__( self.optimizer = optimizer.lower() self.loss = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -133,6 +132,10 @@ def __init__( self.fitted = False self.lstm_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -288,10 +291,7 @@ def predict(self, dataset): x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.lstm_model(x_batch).detach().cpu().numpy() - else: - pred = self.lstm_model(x_batch).detach().numpy() + pred = self.lstm_model(x_batch).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index 95476fedfd..288bdc2021 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -80,7 +80,6 @@ def __init__( self.loss = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -96,7 +95,7 @@ def __init__( "\nearly_stop : {}" "\noptimizer : {}" "\nloss_type : {}" - "\nvisible_GPU : {}" + "\ndevice : {}" "\nn_jobs : {}" "\nuse_GPU : {}" "\nseed : {}".format( @@ -111,7 +110,7 @@ def __init__( early_stop, optimizer.lower(), loss, - GPU, + self.device, n_jobs, self.use_gpu, seed, @@ -138,6 +137,10 @@ def __init__( self.fitted = False self.LSTM_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def mse(self, pred, label): loss = (pred - label) ** 2 return torch.mean(loss) @@ -273,10 +276,7 @@ def predict(self, dataset): feature = data[:, :, 0:-1].to(self.device) with torch.no_grad(): - if self.use_gpu: - pred = self.LSTM_model(feature.float()).detach().cpu().numpy() - else: - pred = self.LSTM_model(feature.float()).detach().numpy() + pred = self.LSTM_model(feature.float()).detach().cpu().numpy() preds.append(pred) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 37d8dec3ee..fad4661658 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -82,7 +82,6 @@ def __init__( self.optimizer = optimizer.lower() self.loss_type = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") - self.use_GPU = torch.cuda.is_available() self.seed = seed self.weight_decay = weight_decay @@ -100,7 +99,7 @@ def __init__( "\nloss_type : {}" "\neval_steps : {}" "\nseed : {}" - "\nvisible_GPU : {}" + "\ndevice : {}" "\nuse_GPU : {}" "\nweight_decay : {}".format( layers, @@ -115,8 +114,8 @@ def __init__( loss, eval_steps, seed, - GPU, - self.use_GPU, + self.device, + self.use_gpu, weight_decay, ) ) @@ -157,6 +156,10 @@ def __init__( self.fitted = False self.dnn_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def fit( self, dataset: DatasetH, @@ -219,7 +222,8 @@ def fit( # validation train_loss += loss.val - if step and step % self.eval_steps == 0: + # for evert `eval_steps` steps or at the last steps, we will evaluate the model. + if step % self.eval_steps == 0 or step + 1 == self.max_steps: stop_steps += 1 train_loss /= self.eval_steps @@ -252,9 +256,9 @@ def fit( # update learning rate self.scheduler.step(cur_loss_val) - # restore the optimal parameters after training ?? + # restore the optimal parameters after training self.dnn_model.load_state_dict(torch.load(save_path)) - if self.use_GPU: + if self.use_gpu: torch.cuda.empty_cache() def get_loss(self, pred, w, target, loss_type): @@ -276,10 +280,7 @@ def predict(self, dataset): self.dnn_model.eval() with torch.no_grad(): - if self.use_GPU: - preds = self.dnn_model(x_test).detach().cpu().numpy() - else: - preds = self.dnn_model(x_test).detach().numpy() + preds = self.dnn_model(x_test).detach().cpu().numpy() return pd.Series(np.squeeze(preds), index=x_test_pd.index) def save(self, filename, **kwargs): diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index cc600a9558..f013d81a35 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -241,7 +241,6 @@ def __init__( self.optimizer = optimizer.lower() self.loss = loss self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") - self.use_gpu = torch.cuda.is_available() self.seed = seed self.logger.info( @@ -260,7 +259,7 @@ def __init__( "\neval_steps : {}" "\noptimizer : {}" "\nloss_type : {}" - "\nvisible_GPU : {}" + "\ndevice : {}" "\nuse_GPU : {}" "\nseed : {}".format( d_feat, @@ -277,7 +276,7 @@ def __init__( eval_steps, optimizer.lower(), loss, - GPU, + self.device, self.use_gpu, seed, ) @@ -309,6 +308,10 @@ def __init__( self.fitted = False self.sfm_model.to(self.device) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def test_epoch(self, data_x, data_y): # prepare training data diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 93b2a36dac..c1dce93086 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -55,7 +55,7 @@ def __init__( ps=0.3, lr=0.01, pretrain=True, - pretrain_file="./pretrain/best.model", + pretrain_file=None, ): """ TabNet model for Qlib @@ -81,13 +81,13 @@ def __init__( self.metric = metric self.early_stop = early_stop self.pretrain = pretrain - self.pretrain_file = pretrain_file + self.pretrain_file = get_or_create_path(pretrain_file) self.logger.info( "TabNet:" "\nbatch_size : {}" "\nvirtual bs : {}" - "\nGPU : {}" - "\npretrain: {}".format(self.batch_size, vbs, GPU, pretrain) + "\ndevice : {}" + "\npretrain: {}".format(self.batch_size, vbs, self.device, self.pretrain) ) self.fitted = False np.random.seed(self.seed) @@ -116,6 +116,10 @@ def __init__( else: raise NotImplementedError("optimizer {} is not supported!".format(optimizer)) + @property + def use_gpu(self): + return self.device != torch.device("cpu") + def pretrain_fn(self, dataset=DatasetH, pretrain_file="./pretrain/best.model"): get_or_create_path(pretrain_file) @@ -182,7 +186,7 @@ def fit( stop_steps = 0 train_loss = 0 - best_score = np.inf + best_score = -np.inf best_epoch = 0 evals_result["train"] = [] evals_result["valid"] = [] @@ -201,7 +205,7 @@ def fit( evals_result["train"].append(train_score) evals_result["valid"].append(val_score) - if val_score < best_score: + if val_score > best_score: best_score = val_score stop_steps = 0 best_epoch = epoch_idx @@ -216,6 +220,9 @@ def fit( self.tabnet_model.load_state_dict(best_param) torch.save(best_param, save_path) + if self.use_gpu: + torch.cuda.empty_cache() + def predict(self, dataset): if not self.fitted: raise ValueError("model is not fitted yet!") @@ -264,12 +271,13 @@ def test_epoch(self, data_x, data_y): feature = x_values[indices[i : i + self.batch_size]].float().to(self.device) label = y_values[indices[i : i + self.batch_size]].float().to(self.device) priors = torch.ones(self.batch_size, self.d_feat).to(self.device) - pred = self.tabnet_model(feature, priors) - loss = self.loss_fn(pred, label) - losses.append(loss.item()) + with torch.no_grad(): + pred = self.tabnet_model(feature, priors) + loss = self.loss_fn(pred, label) + losses.append(loss.item()) - score = self.metric_fn(pred, label) - scores.append(score.item()) + score = self.metric_fn(pred, label) + scores.append(score.item()) return np.mean(losses), np.mean(scores) @@ -352,10 +360,11 @@ def pretrain_test_epoch(self, x_train): label = y_train_values.float().to(self.device) S_mask = S_mask.to(self.device) priors = 1 - S_mask - (vec, sparse_loss) = self.tabnet_model(feature, priors) - f = self.tabnet_decoder(vec) + with torch.no_grad(): + (vec, sparse_loss) = self.tabnet_model(feature, priors) + f = self.tabnet_decoder(vec) - loss = self.pretrain_loss_fn(label, f, S_mask) + loss = self.pretrain_loss_fn(label, f, S_mask) losses.append(loss.item()) return np.mean(losses)