diff --git a/examples/highfreq/workflow.py b/examples/highfreq/workflow.py
index 5660ab2e9e..7bf5fd09a7 100644
--- a/examples/highfreq/workflow.py
+++ b/examples/highfreq/workflow.py
@@ -1,24 +1,13 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
 
-import sys
 import fire
-from pathlib import Path
 
 import qlib
 import pickle
-import numpy as np
-import pandas as pd
 from qlib.config import REG_CN, HIGH_FREQ_CONFIG
-from qlib.contrib.model.gbdt import LGBModel
-from qlib.contrib.data.handler import Alpha158
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-
-from qlib.utils import init_instance_by_config, exists_qlib_data
+
+from qlib.utils import init_instance_by_config
 from qlib.data.dataset.handler import DataHandlerLP
 from qlib.data.ops import Operators
 from qlib.data.data import Cal
@@ -96,9 +85,7 @@ def _init_qlib(self):
         # use yahoo_cn_1min data
         QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG, **self.SPEC_CONF}
         provider_uri = QLIB_INIT_CONFIG.get("provider_uri")
-        if not exists_qlib_data(provider_uri):
-            print(f"Qlib data is not found in {provider_uri}")
-            GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN)
+        GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN, exists_skip=True)
         qlib.init(**QLIB_INIT_CONFIG)
 
     def _prepare_calender_cache(self):
diff --git a/examples/hyperparameter/LightGBM/hyperparameter_158.py b/examples/hyperparameter/LightGBM/hyperparameter_158.py
index 5e4887a14f..89cc10cc6a 100644
--- a/examples/hyperparameter/LightGBM/hyperparameter_158.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py
@@ -1,46 +1,9 @@
 import qlib
-from qlib.config import REG_CN
-from qlib.utils import exists_qlib_data, init_instance_by_config
 import optuna
-
-provider_uri = "~/.qlib/qlib_data/cn_data"
-if not exists_qlib_data(provider_uri):
-    print(f"Qlib data is not found in {provider_uri}")
-    sys.path.append(str(scripts_dir))
-    from get_data import GetData
-
-    GetData().qlib_data(target_dir=provider_uri, region="cn")
-qlib.init(provider_uri=provider_uri, region="cn")
-
-market = "csi300"
-benchmark = "SH000300"
-
-data_handler_config = {
-    "start_time": "2008-01-01",
-    "end_time": "2020-08-01",
-    "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-12-31",
-    "instruments": market,
-}
-dataset_task = {
-    "dataset": {
-        "class": "DatasetH",
-        "module_path": "qlib.data.dataset",
-        "kwargs": {
-            "handler": {
-                "class": "Alpha158",
-                "module_path": "qlib.contrib.data.handler",
-                "kwargs": data_handler_config,
-            },
-            "segments": {
-                "train": ("2008-01-01", "2014-12-31"),
-                "valid": ("2015-01-01", "2016-12-31"),
-                "test": ("2017-01-01", "2020-08-01"),
-            },
-        },
-    },
-}
-dataset = init_instance_by_config(dataset_task["dataset"])
+from qlib.config import REG_CN
+from qlib.utils import init_instance_by_config
+from qlib.tests.config import CSI300_DATASET_CONFIG
+from qlib.tests.data import GetData
 
 
 def objective(trial):
@@ -65,12 +28,19 @@ def objective(trial):
             },
         },
     }
-
     evals_result = dict()
     model = init_instance_by_config(task["model"])
     model.fit(dataset, evals_result=evals_result)
     return min(evals_result["valid"])
 
 
-study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3")
-study.optimize(objective, n_jobs=6)
+if __name__ == "__main__":
+
+    provider_uri = "~/.qlib/qlib_data/cn_data"
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
+    qlib.init(provider_uri=provider_uri, region="cn")
+
+    dataset = init_instance_by_config(CSI300_DATASET_CONFIG)
+
+    study = optuna.Study(study_name="LGBM_158", storage="sqlite:///db.sqlite3")
+    study.optimize(objective, n_jobs=6)
diff --git a/examples/hyperparameter/LightGBM/hyperparameter_360.py b/examples/hyperparameter/LightGBM/hyperparameter_360.py
index 8b498e912c..bc0cc245df 100644
--- a/examples/hyperparameter/LightGBM/hyperparameter_360.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py
@@ -1,46 +1,11 @@
 import qlib
-from qlib.config import REG_CN
-from qlib.utils import exists_qlib_data, init_instance_by_config
 import optuna
+from qlib.config import REG_CN
+from qlib.utils import init_instance_by_config
+from qlib.tests.data import GetData
+from qlib.tests.config import get_dataset_config, CSI300_MARKET, DATASET_ALPHA360_CLASS
 
-provider_uri = "~/.qlib/qlib_data/cn_data"
-if not exists_qlib_data(provider_uri):
-    print(f"Qlib data is not found in {provider_uri}")
-    sys.path.append(str(scripts_dir))
-    from get_data import GetData
-
-    GetData().qlib_data(target_dir=provider_uri, region="cn")
-qlib.init(provider_uri=provider_uri, region="cn")
-
-market = "csi300"
-benchmark = "SH000300"
-
-data_handler_config = {
-    "start_time": "2008-01-01",
-    "end_time": "2020-08-01",
-    "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-12-31",
-    "instruments": market,
-}
-dataset_task = {
-    "dataset": {
-        "class": "DatasetH",
-        "module_path": "qlib.data.dataset",
-        "kwargs": {
-            "handler": {
-                "class": "Alpha360",
-                "module_path": "qlib.contrib.data.handler",
-                "kwargs": data_handler_config,
-            },
-            "segments": {
-                "train": ("2008-01-01", "2014-12-31"),
-                "valid": ("2015-01-01", "2016-12-31"),
-                "test": ("2017-01-01", "2020-08-01"),
-            },
-        },
-    },
-}
-dataset = init_instance_by_config(dataset_task["dataset"])
+DATASET_CONFIG = get_dataset_config(market=CSI300_MARKET, dataset_class=DATASET_ALPHA360_CLASS)
 
 
 def objective(trial):
@@ -72,5 +37,13 @@ def objective(trial):
     return min(evals_result["valid"])
 
 
-study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3")
-study.optimize(objective, n_jobs=6)
+if __name__ == "__main__":
+
+    provider_uri = "~/.qlib/qlib_data/cn_data"
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
+    qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    dataset = init_instance_by_config(DATASET_CONFIG)
+
+    study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3")
+    study.optimize(objective, n_jobs=6)
diff --git a/examples/model_interpreter/feature.py b/examples/model_interpreter/feature.py
new file mode 100644
index 0000000000..a1288e07d2
--- /dev/null
+++ b/examples/model_interpreter/feature.py
@@ -0,0 +1,32 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+
+import qlib
+from qlib.config import REG_CN
+
+from qlib.utils import init_instance_by_config
+from qlib.tests.data import GetData
+from qlib.tests.config import CSI300_GBDT_TASK
+
+
+if __name__ == "__main__":
+
+    # use default data
+    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
+
+    qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+    ###################################
+    # train model
+    ###################################
+    # model initialization
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
+    model.fit(dataset)
+
+    # get model feature importance
+    feature_importance = model.get_feature_importance()
+    print("feature importance:")
+    print(feature_importance)
diff --git a/examples/model_rolling/task_manager_rolling.py b/examples/model_rolling/task_manager_rolling.py
index 4f3ac04b15..9ef8694bf4 100644
--- a/examples/model_rolling/task_manager_rolling.py
+++ b/examples/model_rolling/task_manager_rolling.py
@@ -17,63 +17,7 @@
 from qlib.workflow.task.collect import RecorderCollector
 from qlib.model.ens.group import RollingGroup
 from qlib.model.trainer import TrainerRM
-
-
-data_handler_config = {
-    "start_time": "2008-01-01",
-    "end_time": "2020-08-01",
-    "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-12-31",
-    "instruments": "csi100",
-}
-
-dataset_config = {
-    "class": "DatasetH",
-    "module_path": "qlib.data.dataset",
-    "kwargs": {
-        "handler": {
-            "class": "Alpha158",
-            "module_path": "qlib.contrib.data.handler",
-            "kwargs": data_handler_config,
-        },
-        "segments": {
-            "train": ("2008-01-01", "2014-12-31"),
-            "valid": ("2015-01-01", "2016-12-31"),
-            "test": ("2017-01-01", "2020-08-01"),
-        },
-    },
-}
-
-record_config = [
-    {
-        "class": "SignalRecord",
-        "module_path": "qlib.workflow.record_temp",
-    },
-    {
-        "class": "SigAnaRecord",
-        "module_path": "qlib.workflow.record_temp",
-    },
-]
-
-# use lgb
-task_lgb_config = {
-    "model": {
-        "class": "LGBModel",
-        "module_path": "qlib.contrib.model.gbdt",
-    },
-    "dataset": dataset_config,
-    "record": record_config,
-}
-
-# use xgboost
-task_xgboost_config = {
-    "model": {
-        "class": "XGBModel",
-        "module_path": "qlib.contrib.model.xgboost",
-    },
-    "dataset": dataset_config,
-    "record": record_config,
-}
+from qlib.tests.config import CSI100_RECORD_LGB_TASK_CONFIG, CSI100_RECORD_XGBOOST_TASK_CONFIG
 
 
 class RollingTaskExample:
@@ -85,11 +29,13 @@ def __init__(
         task_db_name="rolling_db",
         experiment_name="rolling_exp",
         task_pool="rolling_task",
-        task_config=[task_xgboost_config, task_lgb_config],
+        task_config=None,
         rolling_step=550,
         rolling_type=RollingGen.ROLL_SD,
     ):
         # TaskManager config
+        if task_config is None:
+            task_config = [CSI100_RECORD_XGBOOST_TASK_CONFIG, CSI100_RECORD_LGB_TASK_CONFIG]
         mongo_conf = {
             "task_url": task_url,
             "task_db_name": task_db_name,
diff --git a/examples/online_srv/online_management_simulate.py b/examples/online_srv/online_management_simulate.py
index 4bb5022ee0..8c9e77bf7f 100644
--- a/examples/online_srv/online_management_simulate.py
+++ b/examples/online_srv/online_management_simulate.py
@@ -13,63 +13,7 @@
 from qlib.workflow.online.strategy import RollingStrategy
 from qlib.workflow.task.gen import RollingGen
 from qlib.workflow.task.manage import TaskManager
-
-
-data_handler_config = {
-    "start_time": "2018-01-01",
-    "end_time": "2018-10-31",
-    "fit_start_time": "2018-01-01",
-    "fit_end_time": "2018-03-31",
-    "instruments": "csi100",
-}
-
-dataset_config = {
-    "class": "DatasetH",
-    "module_path": "qlib.data.dataset",
-    "kwargs": {
-        "handler": {
-            "class": "Alpha158",
-            "module_path": "qlib.contrib.data.handler",
-            "kwargs": data_handler_config,
-        },
-        "segments": {
-            "train": ("2018-01-01", "2018-03-31"),
-            "valid": ("2018-04-01", "2018-05-31"),
-            "test": ("2018-06-01", "2018-09-10"),
-        },
-    },
-}
-
-record_config = [
-    {
-        "class": "SignalRecord",
-        "module_path": "qlib.workflow.record_temp",
-    },
-    {
-        "class": "SigAnaRecord",
-        "module_path": "qlib.workflow.record_temp",
-    },
-]
-
-# use lgb model
-task_lgb_config = {
-    "model": {
-        "class": "LGBModel",
-        "module_path": "qlib.contrib.model.gbdt",
-    },
-    "dataset": dataset_config,
-    "record": record_config,
-}
-
-# use xgboost model
-task_xgboost_config = {
-    "model": {
-        "class": "XGBModel",
-        "module_path": "qlib.contrib.model.xgboost",
-    },
-    "dataset": dataset_config,
-    "record": record_config,
-}
+from qlib.tests.config import CSI100_RECORD_LGB_TASK_CONFIG, CSI100_RECORD_XGBOOST_TASK_CONFIG
 
 
 class OnlineSimulationExample:
@@ -84,7 +28,7 @@ def __init__(
         rolling_step=80,
         start_time="2018-09-10",
         end_time="2018-10-31",
-        tasks=[task_xgboost_config, task_lgb_config],
+        tasks=None,
     ):
         """
         Init OnlineManagerExample.
@@ -101,6 +45,8 @@ def __init__(
             end_time (str, optional): the end time of simulating. Defaults to "2018-10-31".
             tasks (dict or list[dict]): a set of the task config waiting for rolling and training
         """
+        if tasks is None:
+            tasks = [CSI100_RECORD_XGBOOST_TASK_CONFIG, CSI100_RECORD_LGB_TASK_CONFIG]
         self.exp_name = exp_name
         self.task_pool = task_pool
         self.start_time = start_time
diff --git a/examples/online_srv/rolling_online_management.py b/examples/online_srv/rolling_online_management.py
index 25b8b2a0c0..592f1f866c 100644
--- a/examples/online_srv/rolling_online_management.py
+++ b/examples/online_srv/rolling_online_management.py
@@ -17,62 +17,7 @@
 from qlib.workflow.online.strategy import RollingStrategy
 from qlib.workflow.task.gen import RollingGen
 from qlib.workflow.online.manager import OnlineManager
-
-data_handler_config = {
-    "start_time": "2013-01-01",
-    "end_time": "2020-09-25",
-    "fit_start_time": "2013-01-01",
-    "fit_end_time": "2014-12-31",
-    "instruments": "csi100",
-}
-
-dataset_config = {
-    "class": "DatasetH",
-    "module_path": "qlib.data.dataset",
-    "kwargs": {
-        "handler": {
-            "class": "Alpha158",
-            "module_path": "qlib.contrib.data.handler",
-            "kwargs": data_handler_config,
-        },
-        "segments": {
-            "train": ("2013-01-01", "2014-12-31"),
-            "valid": ("2015-01-01", "2015-12-31"),
-            "test": ("2016-01-01", "2020-07-10"),
-        },
-    },
-}
-
-record_config = [
-    {
-        "class": "SignalRecord",
-        "module_path": "qlib.workflow.record_temp",
-    },
-    {
-        "class": "SigAnaRecord",
-        "module_path": "qlib.workflow.record_temp",
-    },
-]
-
-# use lgb model
-task_lgb_config = {
-    "model": {
-        "class": "LGBModel",
-        "module_path": "qlib.contrib.model.gbdt",
-    },
-    "dataset": dataset_config,
-    "record": record_config,
-}
-
-# use xgboost model
-task_xgboost_config = {
-    "model": {
-        "class": "XGBModel",
-        "module_path": "qlib.contrib.model.xgboost",
-    },
-    "dataset": dataset_config,
-    "record": record_config,
-}
+from qlib.tests.config import CSI100_RECORD_XGBOOST_TASK_CONFIG, CSI100_RECORD_LGB_TASK_CONFIG
 
 
 class RollingOnlineExample:
@@ -83,9 +28,13 @@ def __init__(
         task_url="mongodb://10.0.0.4:27017/",
         task_db_name="rolling_db",
         rolling_step=550,
-        tasks=[task_xgboost_config],
-        add_tasks=[task_lgb_config],
+        tasks=None,
+        add_tasks=None,
     ):
+        if add_tasks is None:
+            add_tasks = [CSI100_RECORD_LGB_TASK_CONFIG]
+        if tasks is None:
+            tasks = [CSI100_RECORD_XGBOOST_TASK_CONFIG]
         mongo_conf = {
             "task_url": task_url,  # your MongoDB url
             "task_db_name": task_db_name,  # database name
diff --git a/examples/online_srv/update_online_pred.py b/examples/online_srv/update_online_pred.py
index 228bc0dacb..8afc665538 100644
--- a/examples/online_srv/update_online_pred.py
+++ b/examples/online_srv/update_online_pred.py
@@ -7,56 +7,19 @@
 Firstly, we will finish the training and set the trained models to the `online` models.
 Next, we will finish updating online predictions.
 """
+import copy
 import fire
 import qlib
 from qlib.config import REG_CN
 from qlib.model.trainer import task_train
 from qlib.workflow.online.utils import OnlineToolR
+from qlib.tests.config import CSI300_GBDT_TASK
 
-data_handler_config = {
-    "start_time": "2008-01-01",
-    "end_time": "2020-08-01",
-    "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-12-31",
-    "instruments": "csi100",
-}
+task = copy.deepcopy(CSI300_GBDT_TASK)
 
-task = {
-    "model": {
-        "class": "LGBModel",
-        "module_path": "qlib.contrib.model.gbdt",
-        "kwargs": {
-            "loss": "mse",
-            "colsample_bytree": 0.8879,
-            "learning_rate": 0.0421,
-            "subsample": 0.8789,
-            "lambda_l1": 205.6999,
-            "lambda_l2": 580.9768,
-            "max_depth": 8,
-            "num_leaves": 210,
-            "num_threads": 20,
-        },
-    },
-    "dataset": {
-        "class": "DatasetH",
-        "module_path": "qlib.data.dataset",
-        "kwargs": {
-            "handler": {
-                "class": "Alpha158",
-                "module_path": "qlib.contrib.data.handler",
-                "kwargs": data_handler_config,
-            },
-            "segments": {
-                "train": ("2008-01-01", "2014-12-31"),
-                "valid": ("2015-01-01", "2016-12-31"),
-                "test": ("2017-01-01", "2020-08-01"),
-            },
-        },
-    },
-    "record": {
-        "class": "SignalRecord",
-        "module_path": "qlib.workflow.record_temp",
-    },
+task["record"] = {
+    "class": "SignalRecord",
+    "module_path": "qlib.workflow.record_temp",
 }
 
 
diff --git a/examples/rolling_process_data/workflow.py b/examples/rolling_process_data/workflow.py
index 5757aaa876..387d5cde70 100644
--- a/examples/rolling_process_data/workflow.py
+++ b/examples/rolling_process_data/workflow.py
@@ -4,13 +4,11 @@
 import qlib
 import fire
 import pickle
-import pandas as pd
 
 from datetime import datetime
 from qlib.config import REG_CN
 from qlib.data.dataset.handler import DataHandlerLP
-from qlib.contrib.data.handler import Alpha158
-from qlib.utils import exists_qlib_data, init_instance_by_config
+from qlib.utils import init_instance_by_config
 from qlib.tests.data import GetData
 
 
@@ -25,9 +23,7 @@ def _init_qlib(self):
         """initialize qlib"""
         # use yahoo_cn_1min data
         provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-        if not exists_qlib_data(provider_uri):
-            print(f"Qlib data is not found in {provider_uri}")
-            GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+        GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
         qlib.init(provider_uri=provider_uri, region=REG_CN)
 
     def _dump_pre_handler(self, path):
diff --git a/examples/run_all_model.py b/examples/run_all_model.py
index d587eff155..c79fee004d 100644
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -5,13 +5,11 @@
 import sys
 import fire
 import time
-import venv
 import glob
 import shutil
 import signal
 import inspect
 import tempfile
-import traceback
 import functools
 import statistics
 import subprocess
@@ -23,8 +21,7 @@
 import qlib
 from qlib.config import REG_CN
 from qlib.workflow import R
-from qlib.workflow.cli import workflow
-from qlib.utils import exists_qlib_data
+from qlib.tests.data import GetData
 
 
 # init qlib
@@ -39,12 +36,8 @@
         "default_exp_name": "Experiment",
     },
 }
-if not exists_qlib_data(provider_uri):
-    print(f"Qlib data is not found in {provider_uri}")
-    sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-    from get_data import GetData
 
-    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
+GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
 qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager)
 
 # decorator to check the arguments
diff --git a/examples/workflow_by_code.py b/examples/workflow_by_code.py
index d5dab89178..1cdf2ac80f 100644
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@@ -1,82 +1,22 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
 
-import sys
-from pathlib import Path
-
 import qlib
-import pandas as pd
 from qlib.config import REG_CN
-from qlib.contrib.model.gbdt import LGBModel
-from qlib.contrib.data.handler import Alpha158
-from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-from qlib.contrib.evaluate import (
-    backtest as normal_backtest,
-    risk_analysis,
-)
-from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
+from qlib.utils import init_instance_by_config, flatten_dict
 from qlib.workflow import R
 from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
 from qlib.tests.data import GetData
+from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK
+
 
 if __name__ == "__main__":
 
     # use default data
     provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    if not exists_qlib_data(provider_uri):
-        print(f"Qlib data is not found in {provider_uri}")
-        GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
-
+    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
     qlib.init(provider_uri=provider_uri, region=REG_CN)
 
-    market = "csi300"
-    benchmark = "SH000300"
-
-    ###################################
-    # train model
-    ###################################
-    data_handler_config = {
-        "start_time": "2008-01-01",
-        "end_time": "2020-08-01",
-        "fit_start_time": "2008-01-01",
-        "fit_end_time": "2014-12-31",
-        "instruments": market,
-    }
-
-    task = {
-        "model": {
-            "class": "LGBModel",
-            "module_path": "qlib.contrib.model.gbdt",
-            "kwargs": {
-                "loss": "mse",
-                "colsample_bytree": 0.8879,
-                "learning_rate": 0.0421,
-                "subsample": 0.8789,
-                "lambda_l1": 205.6999,
-                "lambda_l2": 580.9768,
-                "max_depth": 8,
-                "num_leaves": 210,
-                "num_threads": 20,
-            },
-        },
-        "dataset": {
-            "class": "DatasetH",
-            "module_path": "qlib.data.dataset",
-            "kwargs": {
-                "handler": {
-                    "class": "Alpha158",
-                    "module_path": "qlib.contrib.data.handler",
-                    "kwargs": data_handler_config,
-                },
-                "segments": {
-                    "train": ("2008-01-01", "2014-12-31"),
-                    "valid": ("2015-01-01", "2016-12-31"),
-                    "test": ("2017-01-01", "2020-08-01"),
-                },
-            },
-        },
-    }
-
     port_analysis_config = {
         "strategy": {
             "class": "TopkDropoutStrategy",
@@ -90,7 +30,7 @@
             "verbose": False,
             "limit_threshold": 0.095,
             "account": 100000000,
-            "benchmark": benchmark,
+            "benchmark": CSI300_BENCH,
             "deal_price": "close",
             "open_cost": 0.0005,
             "close_cost": 0.0015,
@@ -100,8 +40,8 @@
     }
 
     # model initialization
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
 
     # NOTE: This line is optional
     # It demonstrates that the dataset can be used standalone.
@@ -110,7 +50,7 @@
 
     # start exp
     with R.start(experiment_name="workflow"):
-        R.log_params(**flatten_dict(task))
+        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
         model.fit(dataset)
         R.save_objects(**{"params.pkl": model})
 
diff --git a/qlib/contrib/model/catboost_model.py b/qlib/contrib/model/catboost_model.py
index 98b9b9c2df..5138e0e6f0 100644
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -10,9 +10,10 @@
 from ...model.base import Model
 from ...data.dataset import DatasetH
 from ...data.dataset.handler import DataHandlerLP
+from ...model.interpret.base import FeatureInt
 
 
-class CatBoostModel(Model):
+class CatBoostModel(Model, FeatureInt):
     """CatBoost Model"""
 
     def __init__(self, loss="RMSE", **kwargs):
@@ -69,6 +70,18 @@ def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
         x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
         return pd.Series(self.model.predict(x_test.values), index=x_test.index)
 
+    def get_feature_importance(self, *args, **kwargs) -> pd.Series:
+        """get feature importance
+
+        Notes
+        -----
+            parameters references:
+            https://catboost.ai/docs/concepts/python-reference_catboost_get_feature_importance.html#python-reference_catboost_get_feature_importance
+        """
+        return pd.Series(
+            data=self.model.get_feature_importance(*args, **kwargs), index=self.model.feature_names_
+        ).sort_values(ascending=False)
+
 
 if __name__ == "__main__":
     cat = CatBoostModel()
diff --git a/qlib/contrib/model/double_ensemble.py b/qlib/contrib/model/double_ensemble.py
index 4b267a2b00..d3ca898f87 100644
--- a/qlib/contrib/model/double_ensemble.py
+++ b/qlib/contrib/model/double_ensemble.py
@@ -1,251 +1,265 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import lightgbm as lgb
-import numpy as np
-import pandas as pd
-from typing import Text, Union
-from ...model.base import Model
-from ...data.dataset import DatasetH
-from ...data.dataset.handler import DataHandlerLP
-from ...log import get_module_logger
-
-
-class DEnsembleModel(Model):
-    """Double Ensemble Model"""
-
-    def __init__(
-        self,
-        base_model="gbm",
-        loss="mse",
-        num_models=6,
-        enable_sr=True,
-        enable_fs=True,
-        alpha1=1.0,
-        alpha2=1.0,
-        bins_sr=10,
-        bins_fs=5,
-        decay=None,
-        sample_ratios=None,
-        sub_weights=None,
-        epochs=100,
-        **kwargs
-    ):
-        self.base_model = base_model  # "gbm" or "mlp", specifically, we use lgbm for "gbm"
-        self.num_models = num_models  # the number of sub-models
-        self.enable_sr = enable_sr
-        self.enable_fs = enable_fs
-        self.alpha1 = alpha1
-        self.alpha2 = alpha2
-        self.bins_sr = bins_sr
-        self.bins_fs = bins_fs
-        self.decay = decay
-        if sample_ratios is None:  # the default values for sample_ratios
-            sample_ratios = [0.8, 0.7, 0.6, 0.5, 0.4]
-        if sub_weights is None:  # the default values for sub_weights
-            sub_weights = [1.0, 0.2, 0.2, 0.2, 0.2, 0.2]
-        if not len(sample_ratios) == bins_fs:
-            raise ValueError("The length of sample_ratios should be equal to bins_fs.")
-        self.sample_ratios = sample_ratios
-        if not len(sub_weights) == num_models:
-            raise ValueError("The length of sub_weights should be equal to num_models.")
-        self.sub_weights = sub_weights
-        self.epochs = epochs
-        self.logger = get_module_logger("DEnsembleModel")
-        self.logger.info("Double Ensemble Model...")
-        self.ensemble = []  # the current ensemble model, a list contains all the sub-models
-        self.sub_features = []  # the features for each sub model in the form of pandas.Index
-        self.params = {"objective": loss}
-        self.params.update(kwargs)
-        self.loss = loss
-
-    def fit(self, dataset: DatasetH):
-        df_train, df_valid = dataset.prepare(
-            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
-        )
-        x_train, y_train = df_train["feature"], df_train["label"]
-        # initialize the sample weights
-        N, F = x_train.shape
-        weights = pd.Series(np.ones(N, dtype=float))
-        # initialize the features
-        features = x_train.columns
-        pred_sub = pd.DataFrame(np.zeros((N, self.num_models), dtype=float), index=x_train.index)
-        # train sub-models
-        for k in range(self.num_models):
-            self.sub_features.append(features)
-            self.logger.info("Training sub-model: ({}/{})".format(k + 1, self.num_models))
-            model_k = self.train_submodel(df_train, df_valid, weights, features)
-            self.ensemble.append(model_k)
-            # no further sample re-weight and feature selection needed for the last sub-model
-            if k + 1 == self.num_models:
-                break
-
-            self.logger.info("Retrieving loss curve and loss values...")
-            loss_curve = self.retrieve_loss_curve(model_k, df_train, features)
-            pred_k = self.predict_sub(model_k, df_train, features)
-            pred_sub.iloc[:, k] = pred_k
-            pred_ensemble = pred_sub.iloc[:, : k + 1].mean(axis=1)
-            loss_values = pd.Series(self.get_loss(y_train.values.squeeze(), pred_ensemble.values))
-
-            if self.enable_sr:
-                self.logger.info("Sample re-weighting...")
-                weights = self.sample_reweight(loss_curve, loss_values, k + 1)
-
-            if self.enable_fs:
-                self.logger.info("Feature selection...")
-                features = self.feature_selection(df_train, loss_values)
-
-    def train_submodel(self, df_train, df_valid, weights, features):
-        dtrain, dvalid = self._prepare_data_gbm(df_train, df_valid, weights, features)
-        evals_result = dict()
-        model = lgb.train(
-            self.params,
-            dtrain,
-            num_boost_round=self.epochs,
-            valid_sets=[dtrain, dvalid],
-            valid_names=["train", "valid"],
-            verbose_eval=20,
-            evals_result=evals_result,
-        )
-        evals_result["train"] = list(evals_result["train"].values())[0]
-        evals_result["valid"] = list(evals_result["valid"].values())[0]
-        return model
-
-    def _prepare_data_gbm(self, df_train, df_valid, weights, features):
-        x_train, y_train = df_train["feature"].loc[:, features], df_train["label"]
-        x_valid, y_valid = df_valid["feature"].loc[:, features], df_valid["label"]
-
-        # Lightgbm need 1D array as its label
-        if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
-            y_train, y_valid = np.squeeze(y_train.values), np.squeeze(y_valid.values)
-        else:
-            raise ValueError("LightGBM doesn't support multi-label training")
-
-        dtrain = lgb.Dataset(x_train.values, label=y_train, weight=weights)
-        dvalid = lgb.Dataset(x_valid.values, label=y_valid)
-        return dtrain, dvalid
-
-    def sample_reweight(self, loss_curve, loss_values, k_th):
-        """
-        the SR module of Double Ensemble
-        :param loss_curve: the shape is NxT
-        the loss curve for the previous sub-model, where the element (i, t) if the error on the i-th sample
-        after the t-th iteration in the training of the previous sub-model.
-        :param loss_values: the shape is N
-        the loss of the current ensemble on the i-th sample.
-        :param k_th: the index of the current sub-model, starting from 1
-        :return: weights
-        the weights for all the samples.
-        """
-        # normalize loss_curve and loss_values with ranking
-        loss_curve_norm = loss_curve.rank(axis=0, pct=True)
-        loss_values_norm = (-loss_values).rank(pct=True)
-
-        # calculate l_start and l_end from loss_curve
-        N, T = loss_curve.shape
-        part = np.maximum(int(T * 0.1), 1)
-        l_start = loss_curve_norm.iloc[:, :part].mean(axis=1)
-        l_end = loss_curve_norm.iloc[:, -part:].mean(axis=1)
-
-        # calculate h-value for each sample
-        h1 = loss_values_norm
-        h2 = (l_end / l_start).rank(pct=True)
-        h = pd.DataFrame({"h_value": self.alpha1 * h1 + self.alpha2 * h2})
-
-        # calculate weights
-        h["bins"] = pd.cut(h["h_value"], self.bins_sr)
-        h_avg = h.groupby("bins")["h_value"].mean()
-        weights = pd.Series(np.zeros(N, dtype=float))
-        for i_b, b in enumerate(h_avg.index):
-            weights[h["bins"] == b] = 1.0 / (self.decay ** k_th * h_avg[i_b] + 0.1)
-        return weights
-
-    def feature_selection(self, df_train, loss_values):
-        """
-        the FS module of Double Ensemble
-        :param df_train: the shape is NxF
-        :param loss_values: the shape is N
-        the loss of the current ensemble on the i-th sample.
-        :return: res_feat: in the form of pandas.Index
-
-        """
-        x_train, y_train = df_train["feature"], df_train["label"]
-        features = x_train.columns
-        N, F = x_train.shape
-        g = pd.DataFrame({"g_value": np.zeros(F, dtype=float)})
-        M = len(self.ensemble)
-
-        # shuffle specific columns and calculate g-value for each feature
-        x_train_tmp = x_train.copy()
-        for i_f, feat in enumerate(features):
-            x_train_tmp.loc[:, feat] = np.random.permutation(x_train_tmp.loc[:, feat].values)
-            pred = pd.Series(np.zeros(N), index=x_train_tmp.index)
-            for i_s, submodel in enumerate(self.ensemble):
-                pred += (
-                    pd.Series(
-                        submodel.predict(x_train_tmp.loc[:, self.sub_features[i_s]].values), index=x_train_tmp.index
-                    )
-                    / M
-                )
-            loss_feat = self.get_loss(y_train.values.squeeze(), pred.values)
-            g.loc[i_f, "g_value"] = np.mean(loss_feat - loss_values) / (np.std(loss_feat - loss_values) + 1e-7)
-            x_train_tmp.loc[:, feat] = x_train.loc[:, feat].copy()
-
-        # one column in train features is all-nan # if g['g_value'].isna().any()
-        g["g_value"].replace(np.nan, 0, inplace=True)
-
-        # divide features into bins_fs bins
-        g["bins"] = pd.cut(g["g_value"], self.bins_fs)
-
-        # randomly sample features from bins to construct the new features
-        res_feat = []
-        sorted_bins = sorted(g["bins"].unique(), reverse=True)
-        for i_b, b in enumerate(sorted_bins):
-            b_feat = features[g["bins"] == b]
-            num_feat = int(np.ceil(self.sample_ratios[i_b] * len(b_feat)))
-            res_feat = res_feat + np.random.choice(b_feat, size=num_feat).tolist()
-        return pd.Index(res_feat)
-
-    def get_loss(self, label, pred):
-        if self.loss == "mse":
-            return (label - pred) ** 2
-        else:
-            raise ValueError("not implemented yet")
-
-    def retrieve_loss_curve(self, model, df_train, features):
-        if self.base_model == "gbm":
-            num_trees = model.num_trees()
-            x_train, y_train = df_train["feature"].loc[:, features], df_train["label"]
-            # Lightgbm need 1D array as its label
-            if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
-                y_train = np.squeeze(y_train.values)
-            else:
-                raise ValueError("LightGBM doesn't support multi-label training")
-
-            N = x_train.shape[0]
-            loss_curve = pd.DataFrame(np.zeros((N, num_trees)))
-            pred_tree = np.zeros(N, dtype=float)
-            for i_tree in range(num_trees):
-                pred_tree += model.predict(x_train.values, start_iteration=i_tree, num_iteration=1)
-                loss_curve.iloc[:, i_tree] = self.get_loss(y_train, pred_tree)
-        else:
-            raise ValueError("not implemented yet")
-        return loss_curve
-
-    def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
-        if self.ensemble is None:
-            raise ValueError("model is not fitted yet!")
-        x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
-        pred = pd.Series(np.zeros(x_test.shape[0]), index=x_test.index)
-        for i_sub, submodel in enumerate(self.ensemble):
-            feat_sub = self.sub_features[i_sub]
-            pred += (
-                pd.Series(submodel.predict(x_test.loc[:, feat_sub].values), index=x_test.index)
-                * self.sub_weights[i_sub]
-            )
-        return pred
-
-    def predict_sub(self, submodel, df_data, features):
-        x_data, y_data = df_data["feature"].loc[:, features], df_data["label"]
-        pred_sub = pd.Series(submodel.predict(x_data.values), index=x_data.index)
-        return pred_sub
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import lightgbm as lgb
+import numpy as np
+import pandas as pd
+from typing import Text, Union
+from ...model.base import Model
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+from ...model.interpret.base import FeatureInt
+from ...log import get_module_logger
+
+
+class DEnsembleModel(Model, FeatureInt):
+    """Double Ensemble Model"""
+
+    def __init__(
+        self,
+        base_model="gbm",
+        loss="mse",
+        num_models=6,
+        enable_sr=True,
+        enable_fs=True,
+        alpha1=1.0,
+        alpha2=1.0,
+        bins_sr=10,
+        bins_fs=5,
+        decay=None,
+        sample_ratios=None,
+        sub_weights=None,
+        epochs=100,
+        **kwargs
+    ):
+        self.base_model = base_model  # "gbm" or "mlp", specifically, we use lgbm for "gbm"
+        self.num_models = num_models  # the number of sub-models
+        self.enable_sr = enable_sr
+        self.enable_fs = enable_fs
+        self.alpha1 = alpha1
+        self.alpha2 = alpha2
+        self.bins_sr = bins_sr
+        self.bins_fs = bins_fs
+        self.decay = decay
+        if sample_ratios is None:  # the default values for sample_ratios
+            sample_ratios = [0.8, 0.7, 0.6, 0.5, 0.4]
+        if sub_weights is None:  # the default values for sub_weights
+            sub_weights = [1.0, 0.2, 0.2, 0.2, 0.2, 0.2]
+        if not len(sample_ratios) == bins_fs:
+            raise ValueError("The length of sample_ratios should be equal to bins_fs.")
+        self.sample_ratios = sample_ratios
+        if not len(sub_weights) == num_models:
+            raise ValueError("The length of sub_weights should be equal to num_models.")
+        self.sub_weights = sub_weights
+        self.epochs = epochs
+        self.logger = get_module_logger("DEnsembleModel")
+        self.logger.info("Double Ensemble Model...")
+        self.ensemble = []  # the current ensemble model, a list contains all the sub-models
+        self.sub_features = []  # the features for each sub model in the form of pandas.Index
+        self.params = {"objective": loss}
+        self.params.update(kwargs)
+        self.loss = loss
+
+    def fit(self, dataset: DatasetH):
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+        x_train, y_train = df_train["feature"], df_train["label"]
+        # initialize the sample weights
+        N, F = x_train.shape
+        weights = pd.Series(np.ones(N, dtype=float))
+        # initialize the features
+        features = x_train.columns
+        pred_sub = pd.DataFrame(np.zeros((N, self.num_models), dtype=float), index=x_train.index)
+        # train sub-models
+        for k in range(self.num_models):
+            self.sub_features.append(features)
+            self.logger.info("Training sub-model: ({}/{})".format(k + 1, self.num_models))
+            model_k = self.train_submodel(df_train, df_valid, weights, features)
+            self.ensemble.append(model_k)
+            # no further sample re-weight and feature selection needed for the last sub-model
+            if k + 1 == self.num_models:
+                break
+
+            self.logger.info("Retrieving loss curve and loss values...")
+            loss_curve = self.retrieve_loss_curve(model_k, df_train, features)
+            pred_k = self.predict_sub(model_k, df_train, features)
+            pred_sub.iloc[:, k] = pred_k
+            pred_ensemble = pred_sub.iloc[:, : k + 1].mean(axis=1)
+            loss_values = pd.Series(self.get_loss(y_train.values.squeeze(), pred_ensemble.values))
+
+            if self.enable_sr:
+                self.logger.info("Sample re-weighting...")
+                weights = self.sample_reweight(loss_curve, loss_values, k + 1)
+
+            if self.enable_fs:
+                self.logger.info("Feature selection...")
+                features = self.feature_selection(df_train, loss_values)
+
+    def train_submodel(self, df_train, df_valid, weights, features):
+        dtrain, dvalid = self._prepare_data_gbm(df_train, df_valid, weights, features)
+        evals_result = dict()
+        model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=self.epochs,
+            valid_sets=[dtrain, dvalid],
+            valid_names=["train", "valid"],
+            verbose_eval=20,
+            evals_result=evals_result,
+        )
+        evals_result["train"] = list(evals_result["train"].values())[0]
+        evals_result["valid"] = list(evals_result["valid"].values())[0]
+        return model
+
+    def _prepare_data_gbm(self, df_train, df_valid, weights, features):
+        x_train, y_train = df_train["feature"].loc[:, features], df_train["label"]
+        x_valid, y_valid = df_valid["feature"].loc[:, features], df_valid["label"]
+
+        # Lightgbm need 1D array as its label
+        if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+            y_train, y_valid = np.squeeze(y_train.values), np.squeeze(y_valid.values)
+        else:
+            raise ValueError("LightGBM doesn't support multi-label training")
+
+        dtrain = lgb.Dataset(x_train, label=y_train, weight=weights)
+        dvalid = lgb.Dataset(x_valid, label=y_valid)
+        return dtrain, dvalid
+
+    def sample_reweight(self, loss_curve, loss_values, k_th):
+        """
+        the SR module of Double Ensemble
+        :param loss_curve: the shape is NxT
+        the loss curve for the previous sub-model, where the element (i, t) if the error on the i-th sample
+        after the t-th iteration in the training of the previous sub-model.
+        :param loss_values: the shape is N
+        the loss of the current ensemble on the i-th sample.
+        :param k_th: the index of the current sub-model, starting from 1
+        :return: weights
+        the weights for all the samples.
+        """
+        # normalize loss_curve and loss_values with ranking
+        loss_curve_norm = loss_curve.rank(axis=0, pct=True)
+        loss_values_norm = (-loss_values).rank(pct=True)
+
+        # calculate l_start and l_end from loss_curve
+        N, T = loss_curve.shape
+        part = np.maximum(int(T * 0.1), 1)
+        l_start = loss_curve_norm.iloc[:, :part].mean(axis=1)
+        l_end = loss_curve_norm.iloc[:, -part:].mean(axis=1)
+
+        # calculate h-value for each sample
+        h1 = loss_values_norm
+        h2 = (l_end / l_start).rank(pct=True)
+        h = pd.DataFrame({"h_value": self.alpha1 * h1 + self.alpha2 * h2})
+
+        # calculate weights
+        h["bins"] = pd.cut(h["h_value"], self.bins_sr)
+        h_avg = h.groupby("bins")["h_value"].mean()
+        weights = pd.Series(np.zeros(N, dtype=float))
+        for i_b, b in enumerate(h_avg.index):
+            weights[h["bins"] == b] = 1.0 / (self.decay ** k_th * h_avg[i_b] + 0.1)
+        return weights
+
+    def feature_selection(self, df_train, loss_values):
+        """
+        the FS module of Double Ensemble
+        :param df_train: the shape is NxF
+        :param loss_values: the shape is N
+        the loss of the current ensemble on the i-th sample.
+        :return: res_feat: in the form of pandas.Index
+
+        """
+        x_train, y_train = df_train["feature"], df_train["label"]
+        features = x_train.columns
+        N, F = x_train.shape
+        g = pd.DataFrame({"g_value": np.zeros(F, dtype=float)})
+        M = len(self.ensemble)
+
+        # shuffle specific columns and calculate g-value for each feature
+        x_train_tmp = x_train.copy()
+        for i_f, feat in enumerate(features):
+            x_train_tmp.loc[:, feat] = np.random.permutation(x_train_tmp.loc[:, feat].values)
+            pred = pd.Series(np.zeros(N), index=x_train_tmp.index)
+            for i_s, submodel in enumerate(self.ensemble):
+                pred += (
+                    pd.Series(
+                        submodel.predict(x_train_tmp.loc[:, self.sub_features[i_s]].values), index=x_train_tmp.index
+                    )
+                    / M
+                )
+            loss_feat = self.get_loss(y_train.values.squeeze(), pred.values)
+            g.loc[i_f, "g_value"] = np.mean(loss_feat - loss_values) / (np.std(loss_feat - loss_values) + 1e-7)
+            x_train_tmp.loc[:, feat] = x_train.loc[:, feat].copy()
+
+        # one column in train features is all-nan # if g['g_value'].isna().any()
+        g["g_value"].replace(np.nan, 0, inplace=True)
+
+        # divide features into bins_fs bins
+        g["bins"] = pd.cut(g["g_value"], self.bins_fs)
+
+        # randomly sample features from bins to construct the new features
+        res_feat = []
+        sorted_bins = sorted(g["bins"].unique(), reverse=True)
+        for i_b, b in enumerate(sorted_bins):
+            b_feat = features[g["bins"] == b]
+            num_feat = int(np.ceil(self.sample_ratios[i_b] * len(b_feat)))
+            res_feat = res_feat + np.random.choice(b_feat, size=num_feat, replace=False).tolist()
+        return pd.Index(set(res_feat))
+
+    def get_loss(self, label, pred):
+        if self.loss == "mse":
+            return (label - pred) ** 2
+        else:
+            raise ValueError("not implemented yet")
+
+    def retrieve_loss_curve(self, model, df_train, features):
+        if self.base_model == "gbm":
+            num_trees = model.num_trees()
+            x_train, y_train = df_train["feature"].loc[:, features], df_train["label"]
+            # Lightgbm need 1D array as its label
+            if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+                y_train = np.squeeze(y_train.values)
+            else:
+                raise ValueError("LightGBM doesn't support multi-label training")
+
+            N = x_train.shape[0]
+            loss_curve = pd.DataFrame(np.zeros((N, num_trees)))
+            pred_tree = np.zeros(N, dtype=float)
+            for i_tree in range(num_trees):
+                pred_tree += model.predict(x_train.values, start_iteration=i_tree, num_iteration=1)
+                loss_curve.iloc[:, i_tree] = self.get_loss(y_train, pred_tree)
+        else:
+            raise ValueError("not implemented yet")
+        return loss_curve
+
+    def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
+        if self.ensemble is None:
+            raise ValueError("model is not fitted yet!")
+        x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
+        pred = pd.Series(np.zeros(x_test.shape[0]), index=x_test.index)
+        for i_sub, submodel in enumerate(self.ensemble):
+            feat_sub = self.sub_features[i_sub]
+            pred += (
+                pd.Series(submodel.predict(x_test.loc[:, feat_sub].values), index=x_test.index)
+                * self.sub_weights[i_sub]
+            )
+        return pred
+
+    def predict_sub(self, submodel, df_data, features):
+        x_data, y_data = df_data["feature"].loc[:, features], df_data["label"]
+        pred_sub = pd.Series(submodel.predict(x_data.values), index=x_data.index)
+        return pred_sub
+
+    def get_feature_importance(self, *args, **kwargs) -> pd.Series:
+        """get feature importance
+
+        Notes
+        -----
+            parameters reference:
+            https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html?highlight=feature_importance#lightgbm.Booster.feature_importance
+        """
+        res = []
+        for _model, _weight in zip(self.ensemble, self.sub_weights):
+            res.append(pd.Series(_model.feature_importance(*args, **kwargs), index=_model.feature_name()) * _weight)
+        return pd.concat(res, axis=1, sort=False).sum(axis=1).sort_values(ascending=False)
diff --git a/qlib/contrib/model/gbdt.py b/qlib/contrib/model/gbdt.py
index 463cf8f4fa..1a7cf7fba3 100644
--- a/qlib/contrib/model/gbdt.py
+++ b/qlib/contrib/model/gbdt.py
@@ -8,9 +8,10 @@
 from ...model.base import ModelFT
 from ...data.dataset import DatasetH
 from ...data.dataset.handler import DataHandlerLP
+from ...model.interpret.base import LightGBMFInt
 
 
-class LGBModel(ModelFT):
+class LGBModel(ModelFT, LightGBMFInt):
     """LightGBM Model"""
 
     def __init__(self, loss="mse", **kwargs):
@@ -33,8 +34,8 @@ def _prepare_data(self, dataset: DatasetH):
         else:
             raise ValueError("LightGBM doesn't support multi-label training")
 
-        dtrain = lgb.Dataset(x_train.values, label=y_train)
-        dvalid = lgb.Dataset(x_valid.values, label=y_valid)
+        dtrain = lgb.Dataset(x_train, label=y_train)
+        dvalid = lgb.Dataset(x_valid, label=y_valid)
         return dtrain, dvalid
 
     def fit(
diff --git a/qlib/contrib/model/highfreq_gdbt_model.py b/qlib/contrib/model/highfreq_gdbt_model.py
index 5a2eeb50a9..04d6ab9d58 100644
--- a/qlib/contrib/model/highfreq_gdbt_model.py
+++ b/qlib/contrib/model/highfreq_gdbt_model.py
@@ -1,17 +1,18 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+import warnings
 import numpy as np
 import pandas as pd
 import lightgbm as lgb
 
-from qlib.model.base import ModelFT
-from qlib.data.dataset import DatasetH
-from qlib.data.dataset.handler import DataHandlerLP
-import warnings
+from ...model.base import ModelFT
+from ...data.dataset import DatasetH
+from ...data.dataset.handler import DataHandlerLP
+from ...model.interpret.base import LightGBMFInt
 
 
-class HFLGBModel(ModelFT):
+class HFLGBModel(ModelFT, LightGBMFInt):
     """LightGBM Model for high frequency prediction"""
 
     def __init__(self, loss="mse", **kwargs):
@@ -97,8 +98,8 @@ def _prepare_data(self, dataset: DatasetH):
         else:
             raise ValueError("LightGBM doesn't support multi-label training")
 
-        dtrain = lgb.Dataset(x_train.values, label=y_train)
-        dvalid = lgb.Dataset(x_valid.values, label=y_valid)
+        dtrain = lgb.Dataset(x_train, label=y_train)
+        dvalid = lgb.Dataset(x_valid, label=y_valid)
         return dtrain, dvalid
 
     def fit(
diff --git a/qlib/contrib/model/xgboost.py b/qlib/contrib/model/xgboost.py
index cbba146782..2a38f4fe19 100755
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -8,9 +8,10 @@
 from ...model.base import Model
 from ...data.dataset import DatasetH
 from ...data.dataset.handler import DataHandlerLP
+from ...model.interpret.base import FeatureInt
 
 
-class XGBModel(Model):
+class XGBModel(Model, FeatureInt):
     """XGBModel Model"""
 
     def __init__(self, **kwargs):
@@ -42,8 +43,8 @@ def fit(
         else:
             raise ValueError("XGBoost doesn't support multi-label training")
 
-        dtrain = xgb.DMatrix(x_train.values, label=y_train_1d)
-        dvalid = xgb.DMatrix(x_valid.values, label=y_valid_1d)
+        dtrain = xgb.DMatrix(x_train, label=y_train_1d)
+        dvalid = xgb.DMatrix(x_valid, label=y_valid_1d)
         self.model = xgb.train(
             self._params,
             dtrain=dtrain,
@@ -62,3 +63,13 @@ def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
             raise ValueError("model is not fitted yet!")
         x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
         return pd.Series(self.model.predict(xgb.DMatrix(x_test.values)), index=x_test.index)
+
+    def get_feature_importance(self, *args, **kwargs) -> pd.Series:
+        """get feature importance
+
+        Notes
+        -------
+            parameters reference:
+                https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.Booster.get_score
+        """
+        return pd.Series(self.model.get_score(*args, **kwargs)).sort_values(ascending=False)
diff --git a/qlib/model/interpret/__init__.py b/qlib/model/interpret/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qlib/model/interpret/base.py b/qlib/model/interpret/base.py
new file mode 100644
index 0000000000..57cc7929a9
--- /dev/null
+++ b/qlib/model/interpret/base.py
@@ -0,0 +1,40 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+"""
+Interfaces to interpret models
+"""
+
+import pandas as pd
+from abc import abstractmethod
+
+
+class FeatureInt:
+    """Feature (Int)erpreter"""
+
+    @abstractmethod
+    def get_feature_importance(self) -> pd.Series:
+        """get feature importance
+
+        Returns
+        -------
+            The index is the feature name.
+
+            The greater the value, the higher importance.
+        """
+
+
+class LightGBMFInt(FeatureInt):
+    """LightGBM (F)eature (Int)erpreter"""
+
+    def get_feature_importance(self, *args, **kwargs) -> pd.Series:
+        """get feature importance
+
+        Notes
+        -----
+            parameters reference:
+            https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html?highlight=feature_importance#lightgbm.Booster.feature_importance
+        """
+        return pd.Series(self.model.feature_importance(*args, **kwargs), index=self.model.feature_name()).sort_values(
+            ascending=False
+        )
diff --git a/qlib/tests/__init__.py b/qlib/tests/__init__.py
index 8b53bc53a5..7f43cd99ac 100644
--- a/qlib/tests/__init__.py
+++ b/qlib/tests/__init__.py
@@ -1,6 +1,4 @@
-import sys
 import unittest
-from ..utils import exists_qlib_data
 from .data import GetData
 from .. import init
 from ..config import REG_CN
@@ -14,14 +12,13 @@ class TestAutoData(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> None:
         # use default data
-        if not exists_qlib_data(cls.provider_uri):
-            print(f"Qlib data is not found in {cls.provider_uri}")
 
-            GetData().qlib_data(
-                name="qlib_data_simple",
-                region="cn",
-                interval="1d",
-                target_dir=cls.provider_uri,
-                delete_old=False,
-            )
+        GetData().qlib_data(
+            name="qlib_data_simple",
+            region=REG_CN,
+            interval="1d",
+            target_dir=cls.provider_uri,
+            delete_old=False,
+            exists_skip=True,
+        )
         init(provider_uri=cls.provider_uri, region=REG_CN, **cls._setup_kwargs)
diff --git a/qlib/tests/config.py b/qlib/tests/config.py
new file mode 100644
index 0000000000..80461f6f9b
--- /dev/null
+++ b/qlib/tests/config.py
@@ -0,0 +1,108 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
+CSI300_MARKET = "csi300"
+CSI100_MARKET = "csi100"
+
+CSI300_BENCH = "SH000300"
+
+DATASET_ALPHA158_CLASS = "Alpha158"
+DATASET_ALPHA360_CLASS = "Alpha360"
+
+###################################
+# config
+###################################
+
+
+GBDT_MODEL = {
+    "class": "LGBModel",
+    "module_path": "qlib.contrib.model.gbdt",
+    "kwargs": {
+        "loss": "mse",
+        "colsample_bytree": 0.8879,
+        "learning_rate": 0.0421,
+        "subsample": 0.8789,
+        "lambda_l1": 205.6999,
+        "lambda_l2": 580.9768,
+        "max_depth": 8,
+        "num_leaves": 210,
+        "num_threads": 20,
+    },
+}
+
+
+RECORD_CONFIG = [
+    {
+        "class": "SignalRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+    {
+        "class": "SigAnaRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+]
+
+
+def get_data_handler_config(market=CSI300_MARKET):
+    return {
+        "start_time": "2008-01-01",
+        "end_time": "2020-08-01",
+        "fit_start_time": "2008-01-01",
+        "fit_end_time": "2014-12-31",
+        "instruments": market,
+    }
+
+
+def get_dataset_config(market=CSI300_MARKET, dataset_class=DATASET_ALPHA158_CLASS):
+    return {
+        "class": "DatasetH",
+        "module_path": "qlib.data.dataset",
+        "kwargs": {
+            "handler": {
+                "class": dataset_class,
+                "module_path": "qlib.contrib.data.handler",
+                "kwargs": get_data_handler_config(market),
+            },
+            "segments": {
+                "train": ("2008-01-01", "2014-12-31"),
+                "valid": ("2015-01-01", "2016-12-31"),
+                "test": ("2017-01-01", "2020-08-01"),
+            },
+        },
+    }
+
+
+def get_gbdt_task(market=CSI300_MARKET):
+    return {
+        "model": GBDT_MODEL,
+        "dataset": get_dataset_config(market),
+    }
+
+
+def get_record_lgb_config(market=CSI300_MARKET):
+    return {
+        "model": {
+            "class": "LGBModel",
+            "module_path": "qlib.contrib.model.gbdt",
+        },
+        "dataset": get_dataset_config(market),
+        "record": RECORD_CONFIG,
+    }
+
+
+def get_record_xgboost_config(market=CSI300_MARKET):
+    return {
+        "model": {
+            "class": "XGBModel",
+            "module_path": "qlib.contrib.model.xgboost",
+        },
+        "dataset": get_dataset_config(market),
+        "record": RECORD_CONFIG,
+    }
+
+
+CSI300_DATASET_CONFIG = get_dataset_config(market=CSI300_MARKET)
+CSI300_GBDT_TASK = get_gbdt_task(market=CSI300_MARKET)
+
+CSI100_RECORD_XGBOOST_TASK_CONFIG = get_record_xgboost_config(market=CSI100_MARKET)
+CSI100_RECORD_LGB_TASK_CONFIG = get_record_lgb_config(market=CSI100_MARKET)
diff --git a/qlib/tests/data.py b/qlib/tests/data.py
index 3bf6a2c969..2bfe435906 100644
--- a/qlib/tests/data.py
+++ b/qlib/tests/data.py
@@ -10,6 +10,7 @@
 from tqdm import tqdm
 from pathlib import Path
 from loguru import logger
+from qlib.utils import exists_qlib_data
 
 
 class GetData:
@@ -112,6 +113,7 @@ def qlib_data(
         interval="1d",
         region="cn",
         delete_old=True,
+        exists_skip=False,
     ):
         """download cn qlib data from remote
 
@@ -129,6 +131,8 @@ def qlib_data(
             data region, value from [cn, us], by default cn
         delete_old: bool
             delete an existing directory, by default True
+        exists_skip: bool
+            exists skip, by default False
 
         Examples
         ---------
@@ -140,6 +144,13 @@ def qlib_data(
         -------
 
         """
+        if exists_skip and exists_qlib_data(target_dir):
+            logger.warning(
+                f"Data already exists: {target_dir}, the data download will be skipped\n"
+                f"\tIf downloading is required: `exists_skip=False` or `change target_dir`"
+            )
+            return
+
         qlib_version = ".".join(re.findall(r"(\d+)\.+", qlib.__version__))
 
         def _get_file_name(v):
diff --git a/tests/dataset_tests/test_datalayer.py b/tests/dataset_tests/test_datalayer.py
index 9d282b1672..bdd0d915bf 100644
--- a/tests/dataset_tests/test_datalayer.py
+++ b/tests/dataset_tests/test_datalayer.py
@@ -1,26 +1,10 @@
-import sys
-from pathlib import Path
-import qlib
-from qlib.data import D
-from qlib.config import REG_CN
 import unittest
 import numpy as np
-from qlib.utils import exists_qlib_data
-
-
-class TestDataset(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls) -> None:
-        # use default data
-        provider_uri = "~/.qlib/qlib_data/cn_data_simple"  # target_dir
-        if not exists_qlib_data(provider_uri):
-            print(f"Qlib data is not found in {provider_uri}")
-            sys.path.append(str(Path(__file__).resolve().parent.parent.parent.joinpath("scripts")))
-            from get_data import GetData
+from qlib.data import D
+from qlib.tests import TestAutoData
 
-            GetData().qlib_data(name="qlib_data_simple", target_dir=provider_uri)
-        qlib.init(provider_uri=provider_uri, region=REG_CN)
 
+class TestDataset(TestAutoData):
     def testCSI300(self):
         close_p = D.features(D.instruments("csi300"), ["$close"])
         size = close_p.groupby("datetime").size()
diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py
index d34c1773ad..4c20405fa7 100644
--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -12,55 +12,7 @@
 from qlib.workflow import R
 from qlib.workflow.record_temp import SignalRecord, SigAnaRecord, PortAnaRecord
 from qlib.tests import TestAutoData
-
-
-market = "csi300"
-benchmark = "SH000300"
-
-###################################
-# train model
-###################################
-data_handler_config = {
-    "start_time": "2008-01-01",
-    "end_time": "2020-08-01",
-    "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-12-31",
-    "instruments": market,
-}
-
-task = {
-    "model": {
-        "class": "LGBModel",
-        "module_path": "qlib.contrib.model.gbdt",
-        "kwargs": {
-            "loss": "mse",
-            "colsample_bytree": 0.8879,
-            "learning_rate": 0.0421,
-            "subsample": 0.8789,
-            "lambda_l1": 205.6999,
-            "lambda_l2": 580.9768,
-            "max_depth": 8,
-            "num_leaves": 210,
-            "num_threads": 20,
-        },
-    },
-    "dataset": {
-        "class": "DatasetH",
-        "module_path": "qlib.data.dataset",
-        "kwargs": {
-            "handler": {
-                "class": "Alpha158",
-                "module_path": "qlib.contrib.data.handler",
-                "kwargs": data_handler_config,
-            },
-            "segments": {
-                "train": ("2008-01-01", "2014-12-31"),
-                "valid": ("2015-01-01", "2016-12-31"),
-                "test": ("2017-01-01", "2020-08-01"),
-            },
-        },
-    },
-}
+from qlib.tests.config import CSI300_GBDT_TASK, CSI300_BENCH
 
 port_analysis_config = {
     "strategy": {
@@ -75,7 +27,7 @@
         "verbose": False,
         "limit_threshold": 0.095,
         "account": 100000000,
-        "benchmark": benchmark,
+        "benchmark": CSI300_BENCH,
         "deal_price": "close",
         "open_cost": 0.0005,
         "close_cost": 0.0015,
@@ -96,15 +48,15 @@ def train():
     """
 
     # model initiaiton
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
     # To test __repr__
     print(dataset)
     print(R)
 
     # start exp
     with R.start(experiment_name="workflow"):
-        R.log_params(**flatten_dict(task))
+        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
         model.fit(dataset)
 
         # prediction
@@ -137,12 +89,12 @@ def train_with_sigana():
         performance: dict
             model performance
     """
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
 
     # start exp
     with R.start(experiment_name="workflow_with_sigana"):
-        R.log_params(**flatten_dict(task))
+        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
         model.fit(dataset)
 
         # predict and calculate ic and ric
@@ -171,7 +123,7 @@ def fake_experiment():
     default_uri = R.get_uri()
     current_uri = "file:./temp-test-exp-mag"
     with R.start(experiment_name="fake_workflow_for_expm", uri=current_uri):
-        R.log_params(**flatten_dict(task))
+        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
 
         current_uri_to_check = R.get_uri()
     default_uri_to_check = R.get_uri()
diff --git a/tests/test_contrib_workflow.py b/tests/test_contrib_workflow.py
index ccd3c6a901..9b1edbd4eb 100644
--- a/tests/test_contrib_workflow.py
+++ b/tests/test_contrib_workflow.py
@@ -1,73 +1,22 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import sys
 import shutil
 import unittest
 from pathlib import Path
 
-import qlib
-from qlib.config import C
 from qlib.contrib.workflow import MultiSegRecord, SignalMseRecord
 from qlib.utils import init_instance_by_config, flatten_dict
 from qlib.workflow import R
 from qlib.tests import TestAutoData
-
-
-market = "csi300"
-benchmark = "SH000300"
-
-###################################
-# train model
-###################################
-data_handler_config = {
-    "start_time": "2008-01-01",
-    "end_time": "2020-08-01",
-    "fit_start_time": "2008-01-01",
-    "fit_end_time": "2014-12-31",
-    "instruments": market,
-}
-
-task = {
-    "model": {
-        "class": "LGBModel",
-        "module_path": "qlib.contrib.model.gbdt",
-        "kwargs": {
-            "loss": "mse",
-            "colsample_bytree": 0.8879,
-            "learning_rate": 0.0421,
-            "subsample": 0.8789,
-            "lambda_l1": 205.6999,
-            "lambda_l2": 580.9768,
-            "max_depth": 8,
-            "num_leaves": 210,
-            "num_threads": 20,
-        },
-    },
-    "dataset": {
-        "class": "DatasetH",
-        "module_path": "qlib.data.dataset",
-        "kwargs": {
-            "handler": {
-                "class": "Alpha158",
-                "module_path": "qlib.contrib.data.handler",
-                "kwargs": data_handler_config,
-            },
-            "segments": {
-                "train": ("2008-01-01", "2014-12-31"),
-                "valid": ("2015-01-01", "2016-12-31"),
-                "test": ("2017-01-01", "2020-08-01"),
-            },
-        },
-    },
-}
+from qlib.tests.config import CSI300_GBDT_TASK
 
 
 def train_multiseg():
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
     with R.start(experiment_name="workflow"):
-        R.log_params(**flatten_dict(task))
+        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
         model.fit(dataset)
         recorder = R.get_recorder()
         sr = MultiSegRecord(model, dataset, recorder)
@@ -77,10 +26,10 @@ def train_multiseg():
 
 
 def train_mse():
-    model = init_instance_by_config(task["model"])
-    dataset = init_instance_by_config(task["dataset"])
+    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
+    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
     with R.start(experiment_name="workflow"):
-        R.log_params(**flatten_dict(task))
+        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
         model.fit(dataset)
         recorder = R.get_recorder()
         sr = SignalMseRecord(recorder, model=model, dataset=dataset)
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
index c511d1b910..93a852f554 100644
--- a/tests/test_get_data.py
+++ b/tests/test_get_data.py
@@ -1,16 +1,13 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
 
-import sys
 import shutil
 import unittest
 from pathlib import Path
 
-sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
-from get_data import GetData
-
 import qlib
 from qlib.data import D
+from qlib.tests.data import GetData
 
 DATA_DIR = Path(__file__).parent.joinpath("test_get_data")
 SOURCE_DIR = DATA_DIR.joinpath("source")
@@ -37,7 +34,9 @@ def tearDownClass(cls) -> None:
 
     def test_0_qlib_data(self):
 
-        GetData().qlib_data(name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", delete_old=False)
+        GetData().qlib_data(
+            name="qlib_data_simple", target_dir=QLIB_DIR, region="cn", interval="1d", delete_old=False, exists_skip=True
+        )
         df = D.features(D.instruments("csi300"), self.FIELDS)
         self.assertListEqual(list(df.columns), self.FIELDS, "get qlib data failed")
         self.assertFalse(df.dropna().empty, "get qlib data failed")
diff --git a/tests/test_register_ops.py b/tests/test_register_ops.py
index 7d3322ddcc..ac86be59ce 100644
--- a/tests/test_register_ops.py
+++ b/tests/test_register_ops.py
@@ -1,17 +1,12 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import sys
 import unittest
 import numpy as np
 
-import qlib
 from qlib.data import D
 from qlib.data.ops import ElemOperator, PairOperator
-from qlib.config import REG_CN
-from qlib.utils import exists_qlib_data
 from qlib.tests import TestAutoData
-from qlib.tests.data import GetData
 
 
 class Diff(ElemOperator):