microsoft · javaThonc · Mar 5, 2021 · Mar 5, 2021 · Mar 10, 2021 · Mar 10, 2021
diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md
@@ -17,6 +17,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
 | GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
+| SFM (Liheng Zhang, et al.)| Alpha360 | 0.0314±0.00 | 0.2389±0.02| 0.0407±0.00 | 0.3245±0.01 | 0.0127±0.01 | 0.1440±0.07| -0.1631±0.01 |
 
 ## Alpha158 dataset
 | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
@@ -32,6 +33,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
 | GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 |
+| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 |
 
 - The selected 20 features are based on the feature importance of a lightgbm-based model.
-- The base model of DoubleEnsemble is LGBM.
+- The base model of DoubleEnsemble is LGBM.
diff --git a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
@@ -0,0 +1,81 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: TabnetModel
+        module_path: qlib.contrib.model.pytorch_tabnet
+        kwargs:
+            pretrain: True
+            d_feat: 360
+            n_d: 8
+            n_a: 8
+            n_shared: 2
+            n_ind: 2
+            n_steps: 3
+            GPU: "2"
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                pretrain: [2008-01-01, 2014-12-31]
+                pretrain_validation: [2015-01-01, 2020-08-01]
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
diff --git a/examples/highfreq/high_freq_tree.ipynb b/examples/highfreq/high_freq_tree.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#  Copyright (c) Microsoft Corporation.\n",
+    "#  Licensed under the MIT License.\n",
+    "import qlib\n",
+    "import os\n",
+    "import random\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from multiprocessing import Pool\n",
+    "from qlib.config import REG_CN, HIGH_FREQ_CONFIG\n",
+    "from qlib.contrib.model.gbdt import LGBModel\n",
+    "from qlib.contrib.data.handler import Alpha158\n",
+    "from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n",
+    "from qlib.contrib.evaluate import (\n",
+    "    backtest as normal_backtest,\n",
+    "    risk_analysis,\n",
+    ")\n",
+    "from qlib.utils import exists_qlib_data, init_instance_by_config\n",
+    "from qlib.workflow import R\n",
+    "from qlib.data import D\n",
+    "from qlib.data.filter import NameDFilter\n",
+    "from qlib.workflow.record_temp import SignalRecord, PortAnaRecord\n",
+    "from qlib.data.dataset.handler import DataHandlerLP\n",
+    "from qlib.utils import flatten_dict\n",
+    "import lightgbm as lgb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Qlib configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG}\n",
+    "qlib.init(**QLIB_INIT_CONFIG)\n",
+    "instruments = D.instruments(market='all')\n",
+    "random.seed(710)\n",
+    "instruments = D.list_instruments(instruments=instruments, freq = '1min', as_list=True)\n",
+    "# Randomly select instruments to boost the training efficiency\n",
+    "instruments = random.sample(instruments, 150)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# train model configuration\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MARKET = 'ALL'\n",
+    "BENCHMARK = \"SH000300\"\n",
+    "\n",
+    "start_time = \"2020-09-15 00:00:00\"\n",
+    "end_time = \"2021-01-18 16:00:00\"\n",
+    "train_end_time = \"2020-11-15 16:00:00\"\n",
+    "valid_start_time = \"2020-11-16 00:00:00\"\n",
+    "valid_end_time = \"2020-11-30 16:00:00\"\n",
+    "test_start_time = \"2020-12-01 00:00:00\"\n",
+    "\n",
+    "data_handler_config = {\n",
+    "    \"start_time\": start_time,\n",
+    "    \"end_time\": end_time,\n",
+    "    \"fit_start_time\": start_time,\n",
+    "    \"fit_end_time\": train_end_time,\n",
+    "    \"freq\": \"1min\",\n",
+    "    \"instruments\": instruments,\n",
+    "    \"learn_processors\":[\n",
+    "        {\"class\": \"DropnaLabel\"}\n",
+    "    ],\n",
+    "    \"infer_processors\": [         \n",
+    "        {\"class\": \"RobustZScoreNorm\",\n",
+    "        \"kwargs\": {\n",
+    "            \"fields_group\": \"feature\",\n",
+    "            \"clip_outlier\": True,\n",
+    "        }},\n",
+    "        {\"class\": \"Fillna\",\n",
+    "         \"kwargs\": {\n",
+    "             \"fields_group\": \"feature\",\n",
+    "         }},],\n",
+    "    \"label\": [\"Ref($close, -1) / $close - 1\"],\n",
+    "}\n",
+    "\n",
+    "\n",
+    "task = {\n",
+    "    \"model\": {\n",
+    "        \"class\": \"HF_LGBModel\",\n",
+    "        \"module_path\": \"highfreq_gdbt_model.py\",\n",
+    "        \"kwargs\": {\n",
+    "            \"objective\": 'binary', \n",
+    "            \"metric\": ['binary_logloss','auc'],\n",
+    "            \"verbosity\": -1,\n",
+    "            \"learning_rate\": 0.01,\n",
+    "            \"max_depth\": 8,\n",
+    "            \"num_leaves\": 150, \n",
+    "            \"lambda_l1\": 1.5,\n",
+    "            \"lambda_l2\": 1,\n",
+    "            \"num_threads\": 20\n",
+    "        },\n",
+    "    },\n",
+    "    \"dataset\": {\n",
+    "        \"class\": \"DatasetH\",\n",
+    "        \"module_path\": \"qlib.data.dataset\",\n",
+    "        \"kwargs\": {\n",
+    "            \"handler\": {\n",
+    "                \"class\": \"Alpha158\",\n",
+    "                \"module_path\": \"qlib.contrib.data.handler\",\n",
+    "                \"kwargs\": data_handler_config,\n",
+    "            },\n",
+    "            \"segments\": {\n",
+    "                \"train\": (start_time, train_end_time),\n",
+    "                \"valid\": (train_end_time, valid_end_time),\n",
+    "                \"test\": (\n",
+    "                    test_start_time,\n",
+    "                    end_time,\n",
+    "                ),\n",
+    "            },\n",
+    "        },\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "provider_uri = QLIB_INIT_CONFIG.get(\"provider_uri\")\n",
+    "if not exists_qlib_data(provider_uri):\n",
+    "    print(f\"Qlib data is not found in {provider_uri}\")\n",
+    "    GetData().qlib_data(target_dir=provider_uri, interval=\"1min\", region=REG_CN)\n",
+    "\n",
+    "dataset = init_instance_by_config(task[\"dataset\"])\n",
+    "model = init_instance_by_config(task[\"model\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# train model and back test\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# start exp to train model with signal test\n",
+    "with R.start(experiment_name=\"train_model\"):\n",
+    "    R.log_params(**flatten_dict(task))\n",
+    "    model.fit(dataset)\n",
+    "    model.hf_signal_test(dataset, 0.1)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:trade]",
+   "language": "python",
+   "name": "conda-env-trade-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}