In [1]:
import sys
import copy
from pathlib import Path

import qlib
import numpy as np
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
    backtest as normal_backtest,
    risk_analysis,
)
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict

In [2]:
# use default data
# NOTE: need to download data from remote: python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
if not exists_qlib_data(provider_uri):
    print(f"Qlib data is not found in {provider_uri}")
    sys.path.append(str(Path.cwd().parent.joinpath("scripts")))
    from get_data import GetData
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN)

[36502:MainThread](2020-11-27 16:26:57,240) INFO - qlib.Initialization - [__init__.py:41] - default_conf: client.
[36502:MainThread](2020-11-27 16:26:57,243) INFO - qlib.Initialization - [__init__.py:76] - qlib successfully initialized based on client settings.
[36502:MainThread](2020-11-27 16:26:57,244) INFO - qlib.Initialization - [__init__.py:79] - data_path=/home/dongzho/.qlib/qlib_data/cn_data


In [3]:
market = "csi300"
benchmark = "SH000300"

## Model Training

In [4]:
###################################
# train model
###################################
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2020-08-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": market,
}

task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha158",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", "2017-12-31"),  # NOTE: use a shorter time range
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

# start exp to train model
with R.start(experiment_name="train_model"):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id


[36502:MainThread](2020-11-27 16:27:17,338) INFO - qlib.timer - [log.py:81] - Time cost: 19.994s | Loading data Done
[36502:MainThread](2020-11-27 16:27:18,164) INFO - qlib.timer - [log.py:81] - Time cost: 0.245s | DropnaLabel Done
[36502:MainThread](2020-11-27 16:27:26,086) INFO - qlib.timer - [log.py:81] - Time cost: 7.921s | CSZScoreNorm Done
[36502:MainThread](2020-11-27 16:27:26,087) INFO - qlib.timer - [log.py:81] - Time cost: 8.747s | fit & process data Done
[36502:MainThread](2020-11-27 16:27:26,088) INFO - qlib.timer - [log.py:81] - Time cost: 28.744s | Init data Done
[36502:MainThread](2020-11-27 16:27:26,097) INFO - qlib.workflow - [exp.py:180] - Experiment 2 starts running ...
[36502:MainThread](2020-11-27 16:27:26,221) INFO - qlib.workflow - [recorder.py:234] - Recorder 3fa4def1f6694119a3d336a7a06c88cb starts running under Experiment 2 ...
[36502:MainThread](2020-11-27 16:27:26,223) INFO - qlib.workflow - [expm.py:251] - No tracking URI is provided. The default tracking UR

## Optimization Based Strategy

In [5]:
from qlib.contrib.strategy.strategy import BaseStrategy


class OptBasedStrategy(BaseStrategy):
    """Optimization Based Strategy"""

    def __init__(self, data_handler, cov_estimator, optimizer):
        self.data_handler = data_handler
        self.cov_estimator = cov_estimator
        self.optimizer = optimizer

    def generate_order_list(self, score_series, current, trade_exchange, pred_date, trade_date):
        """
        Parameters
        -----------
        score_series : pd.Seires
            stock_id , score.
        current : Position()
            current of account.
        trade_exchange : Exchange()
            exchange.
        trade_date : pd.Timestamp
            date.
        """
        score_series = score_series.dropna()

        # check stock holdings, if
        # 1. doesn't have score: target amount = 0 (force sell)
        # 2. stock not tradable: target amount = current amount
        current_position = current.get_stock_amount_dict()
        target_position = {}
        for stock_id in current_position:
            if not trade_exchange.is_stock_tradable(stock_id=stock_id, trade_date=trade_date):
                target_position[stock_id] = current_position[stock_id]
            elif stock_id not in score_series.index:
                target_position[stock_id] = 0
            else:
                # need to be solved by optimizer
                pass

        # filter scores, if
        # 1. kept in `amount_dict` by previous rules
        # 2. not tradable
        skipped = []
        for stock_id in score_series.index:
            if stock_id in target_position:
                skipped.append(stock_id)
            elif not trade_exchange.is_stock_tradable(stock_id=stock_id, trade_date=trade_date):
                skipped.append(stock_id)
        score_series = score_series[~score_series.index.isin(skipped)]

        # calc remaining value
        current_value = pd.Series({
            stock_id: current.get_stock_price(stock_id) * amount
            for stock_id, amount in current_position.items()
        })
        risk_total_value = self.get_risk_degree(trade_date) * current.calculate_value()
        traded_value = risk_total_value - current_value.loc[list(target_position)].sum()

        # portfolio init weight
        init_weight = current_value.reindex(score_series.index, fill_value=0)
        init_weight_sum = init_weight.sum()
        if init_weight_sum > 0:
            init_weight /= init_weight_sum

        # covariance estimation
        selector = (self.data_handler.get_range_selector(pred_date, 252), score_series.index)
        price = self.data_handler.fetch(selector, level=None, squeeze=True)
        cov = self.cov_estimator(price)
        cov = cov.reindex(
            index=score_series.index, 
            columns=score_series.index, 
            #fill_value=cov.max().max()
        )

        # optimize target portfolio
        try:
            if init_weight.sum() > 0:
                target_weight = self.optimizer(cov, score_series, init_weight)
            else:
                target_weight = self.optimizer(cov, score_series)
            target_weight = target_weight[target_weight > 1e-6]
            for stock_id, weight in target_weight.items():
                target_position[stock_id] = int(traded_value * weight / trade_exchange.get_close(stock_id, pred_date))
        except Exception as e:
            print('Unknown exception:', trade_date, e)
            for stock_id in score_series.index:
                if stock_id in current_position:
                    target_position[stock_id] = current_position[stock_id]

        # generate order list
        order_list = trade_exchange.generate_order_for_target_amount_position(
            target_position=target_position,
            current_position=current_position,
            trade_date=trade_date,
        )

        return order_list

In [6]:
from qlib.data.dataset.loader import QlibDataLoader
from qlib.data.dataset.handler import DataHandler
from qlib.model.riskmodel import ShrinkCovEstimator
from qlib.portfolio.optimizer import PortfolioOptimizer

In [7]:
data_loader = QlibDataLoader(["$close"])
data_handler = DataHandler("all", "2015-01-01", "2020-08-01", data_loader)
cov_estimator = ShrinkCovEstimator(nan_option="mask")
optimizer = PortfolioOptimizer("mvo", lamb=2, delta=0.2, tol=1e-5)
strategy = OptBasedStrategy(data_handler, cov_estimator, optimizer)

[36502:MainThread](2020-11-27 16:27:43,722) INFO - qlib.timer - [log.py:81] - Time cost: 6.369s | Loading data Done
[36502:MainThread](2020-11-27 16:27:43,724) INFO - qlib.timer - [log.py:81] - Time cost: 6.371s | Init data Done


In [8]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "strategy": strategy,
    "backtest": {
        "verbose": False,
        "limit_threshold": 0.095,
        "account": 100000000,
        "benchmark": benchmark,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.0015,
        "min_cost": 5,
    },
}


# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config)
    par.generate()

[36502:MainThread](2020-11-27 16:27:43,761) INFO - qlib.workflow - [exp.py:180] - Experiment 3 starts running ...
[36502:MainThread](2020-11-27 16:27:43,779) INFO - qlib.workflow - [recorder.py:234] - Recorder 67d105113f424259889fc0b6b0b94973 starts running under Experiment 3 ...
[36502:MainThread](2020-11-27 16:27:43,780) INFO - qlib.workflow - [expm.py:251] - No tracking URI is provided. The default tracking URI is set as `mlruns` under the working directory.
[36502:MainThread](2020-11-27 16:27:43,991) INFO - qlib.workflow - [record_temp.py:127] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 3
[36502:MainThread](2020-11-27 16:27:44,050) INFO - qlib.Evaluate - [evaluate.py:161] - Create new exchange
'The following are prediction results of the LGBModel model.'
                          score
datetime   instrument          
2017-01-03 SH600000   -0.053414
           SH600008    0.001820
           SH600009    0.023472
           SH600010   -0.005625
       