In [None]:
import sys
import json
import yaml
import pickle
from pathlib import Path

import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.utils import exists_qlib_data

In [None]:
CUR_DIR = Path.cwd()
MARKET = "csi300"

In [None]:
# use default data
# NOTE: need to download data from remote: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data
provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
if not exists_qlib_data(provider_uri):
    print(f"Qlib data is not found in {provider_uri}")
    sys.path.append(str(CUR_DIR.parent.parent.joinpath("scripts")))
    from get_data import GetData
    GetData().qlib_data_cn(provider_uri)
qlib.init(provider_uri=provider_uri, region=REG_CN)

In [None]:
with CUR_DIR.joinpath('estimator_config.yaml').open() as fp:
    estimator_name = yaml.load(fp, Loader=yaml.FullLoader)['experiment']['name']
with CUR_DIR.joinpath(estimator_name, 'exp_info.json').open() as fp:
    latest_id = json.load(fp)['id']
    
estimator_dir = CUR_DIR.joinpath(estimator_name, 'sacred', latest_id)

# read estimator result

In [None]:
pred_df = pd.read_pickle(estimator_dir.joinpath('pred.pkl'))
report_normal_df = pd.read_pickle(estimator_dir.joinpath('report_normal.pkl'))
report_normal_df.index.names = ['index']

analysis_df = pd.read_pickle(estimator_dir.joinpath('analysis.pkl'))
positions = pickle.load(estimator_dir.joinpath('positions.pkl').open('rb'))

# analyze graphs

In [None]:
from qlib.data import D
from qlib.contrib.report import analysis_model, analysis_position
pred_df_dates = pred_df.index.get_level_values(level='datetime')

## analysis position

In [None]:
stock_ret = D.features(D.instruments(MARKET), ['Ref($close, -1)/$close - 1'], pred_df_dates.min(), pred_df_dates.max())
stock_ret.columns = ['label']

### report

In [None]:
analysis_position.report_graph(report_normal_df)

### risk analysis

In [None]:
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

## analysis model

In [None]:
label_df = D.features(D.instruments(MARKET), ['Ref($close, -2)/Ref($close, -1) - 1'], pred_df_dates.min(), pred_df_dates.max())
label_df.columns = ['label']

### score IC

In [None]:
pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)
analysis_position.score_ic_graph(pred_label)

### model performance

In [None]:
analysis_model.model_performance_graph(pred_label)