Update CI & add black formatter

microsoft · Oct 10, 2020 · 7c6e5e8 · 7c6e5e8
1 parent 143f3f9
commit 7c6e5e8
Show file tree

Hide file tree

Showing 26 changed files with 194 additions and 255 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -38,14 +38,12 @@ jobs:
     - name: Install test dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 pytest
+        pip install black pytest
 
-    - name: Lint with flake8
+    - name: Lint with Black
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        cd ..
+        python -m black qlib -l 120
 
     - name: Unit tests with Pytest
       run: |

diff --git a/README.md b/README.md
@@ -2,6 +2,7 @@
 [![Platform](https://img.shields.io/badge/platform-linux%20%7C%20windows%20%7C%20macos-lightgrey)](https://pypi.org/project/pyqlib/#files)
 [![PypI Versions](https://img.shields.io/pypi/v/pyqlib)](https://pypi.org/project/pyqlib/#history)
 [![Upload Python Package](https://github.com/microsoft/qlib/workflows/Upload%20Python%20Package/badge.svg)](https://pypi.org/project/pyqlib/)
+[![Github Actions Test Status](https://github.com/microsoft/qlib/workflows/Test/badge.svg?branch=main)](https://github.com/microsoft/qlib/actions)
 [![Documentation Status](https://readthedocs.org/projects/qlib/badge/?version=latest)](https://qlib.readthedocs.io/en/latest/?badge=latest)
 [![License](https://img.shields.io/pypi/l/pyqlib)](LICENSE)
 [![Join the chat at https://gitter.im/Microsoft/qlib](https://badges.gitter.im/Microsoft/qlib.svg)](https://gitter.im/Microsoft/qlib?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

diff --git a/docs/conf.py b/docs/conf.py
@@ -53,7 +53,6 @@
 master_doc = "index"
 
 
-
 # General information about the project.
 project = u"QLib"
 copyright = u"Microsoft"
@@ -104,8 +103,7 @@
 #
 html_theme = "sphinx_rtd_theme"
 
-html_logo = '_static/img/logo/1.png'
-
+html_logo = "_static/img/logo/1.png"
 
 
 # Theme options are theme-specific and customize the look and feel of a theme
@@ -161,15 +159,12 @@
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
-
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',

diff --git a/examples/train_and_backtest.py b/examples/train_and_backtest.py
@@ -54,9 +54,9 @@
 
     # use default DataHandler
     # custom DataHandler, refer to: TODO: DataHandler API url
-    x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(
-        **DATA_HANDLER_CONFIG
-    ).get_split_data(**TRAINER_CONFIG)
+    x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(
+        **TRAINER_CONFIG
+    )
 
     MODEL_CONFIG = {
         "loss": "mse",
@@ -114,6 +114,8 @@
     ###################################
     analysis = dict()
     analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
-    analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])
+    analysis["excess_return_with_cost"] = risk_analysis(
+        report_normal["return"] - report_normal["bench"] - report_normal["cost"]
+    )
     analysis_df = pd.concat(analysis)  # type: pd.DataFrame
     print(analysis_df)
diff --git a/qlib/__init__.py b/qlib/__init__.py
@@ -44,7 +44,7 @@ def init(default_conf="client", **kwargs):
         if k not in C:
             LOG.warning("Unrecognized config %s" % k)
 
-    C.set_region(kwargs.get('region', C['region'] if 'region' in C else REG_CN ))
+    C.set_region(kwargs.get("region", C["region"] if "region" in C else REG_CN))
     C.resolve_path()
 
     if not (C["expression_cache"] is None and C["dataset_cache"] is None):
@@ -83,6 +83,7 @@ def init(default_conf="client", **kwargs):
 
 def _mount_nfs_uri(C):
     from .log import get_module_logger
+
     LOG = get_module_logger("mount nfs", level=logging.INFO)
 
     # FIXME: the C["provider_uri"] is modified in this function
@@ -161,9 +162,7 @@ def _mount_nfs_uri(C):
                 command_res = os.popen("dpkg -l | grep nfs-common")
                 command_res = command_res.readlines()
                 if not command_res:
-                    raise OSError(
-                        "nfs-common is not found, please install it by execute: sudo apt install nfs-common"
-                    )
+                    raise OSError("nfs-common is not found, please install it by execute: sudo apt install nfs-common")
                 # manually mount
                 command_status = os.system(mount_command)
                 if command_status == 256:

diff --git a/qlib/config.py b/qlib/config.py
@@ -17,7 +17,6 @@
 
 
 class Config:
-
     def __init__(self, default_conf):
         self.__dict__["_default_config"] = default_conf  # avoiding conflictions with __getattr__
         self.reset()
@@ -128,7 +127,7 @@ def update(self, *args, **kwargs):
 }
 
 MODE_CONF = {
-    'server': {
+    "server": {
         # data provider config
         "calendar_provider": "LocalCalendarProvider",
         "instrument_provider": "LocalInstrumentProvider",
@@ -147,8 +146,7 @@ def update(self, *args, **kwargs):
         "expression_cache": "DiskExpressionCache",
         "dataset_cache": "DiskDatasetCache",
     },
-
-    'client': {
+    "client": {
         # data provider config
         "calendar_provider": "LocalCalendarProvider",
         "instrument_provider": "LocalInstrumentProvider",
@@ -172,7 +170,7 @@ def update(self, *args, **kwargs):
         "timeout": 100,
         "logging_level": "INFO",
         "region": REG_CN,
-    }
+    },
 }
 
 
@@ -192,8 +190,8 @@ def update(self, *args, **kwargs):
 
 class QlibConfig(Config):
     # URI_TYPE
-    LOCAL_URI = 'local'
-    NFS_URI = 'nfs'
+    LOCAL_URI = "local"
+    NFS_URI = "nfs"
 
     def set_mode(self, mode):
         # raise KeyError
@@ -222,9 +220,9 @@ def get_uri_type(self):
 
     def get_data_path(self):
         if self.get_uri_type() == QlibConfig.LOCAL_URI:
-            return self['provider_uri']
+            return self["provider_uri"]
         elif self.get_uri_type() == QlibConfig.NFS_URI:
-            return self['mount_path']
+            return self["mount_path"]
         else:
             raise NotImplementedError(f"This type of uri is not supported")
 

diff --git a/qlib/contrib/estimator/estimator.py b/qlib/contrib/estimator/estimator.py
@@ -186,7 +186,9 @@ def _analyze(report_normal):
         # analysis["pred_short"] = risk_analysis(long_short_reports["short"])
         # analysis["pred_long_short"] = risk_analysis(long_short_reports["long_short"])
         analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
-        analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])
+        analysis["excess_return_with_cost"] = risk_analysis(
+            report_normal["return"] - report_normal["bench"] - report_normal["cost"]
+        )
         analysis_df = pd.concat(analysis)  # type: pd.DataFrame
         TimeInspector.log_cost_time(
             "Finished generating analysis," " average turnover is: {0:.4f}.".format(report_normal["turnover"].mean())

diff --git a/qlib/contrib/estimator/handler.py b/qlib/contrib/estimator/handler.py
@@ -558,16 +558,16 @@ def setup_label(self):
 
 class Alpha158(QLibDataHandlerV1):
     config_template = {
-        'kbar': {},
-        'price': {
-            'windows': [0],
-            'feature': ['OPEN', 'HIGH', 'LOW', 'CLOSE'],
+        "kbar": {},
+        "price": {
+            "windows": [0],
+            "feature": ["OPEN", "HIGH", "LOW", "CLOSE"],
         },
-        'rolling': {}
+        "rolling": {},
     }
 
     def _init_kwargs(self, **kwargs):
-        kwargs['labels'] = ["Ref($close, -2)/Ref($close, -1) - 1"]
+        kwargs["labels"] = ["Ref($close, -2)/Ref($close, -1) - 1"]
         super(Alpha158, self)._init_kwargs(**kwargs)
 
 

diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py
@@ -34,8 +34,13 @@ def risk_analysis(r, N=252):
     annualized_return = mean * N
     information_ratio = mean / std * np.sqrt(N)
     max_drawdown = (r.cumsum() - r.cumsum().cummax()).min()
-    data = {"mean": mean, "std": std, "annualized_return": annualized_return,
-            "information_ratio": information_ratio, "max_drawdown": max_drawdown}
+    data = {
+        "mean": mean,
+        "std": std,
+        "annualized_return": annualized_return,
+        "information_ratio": information_ratio,
+        "max_drawdown": max_drawdown,
+    }
     res = pd.Series(data, index=data.keys()).to_frame("risk")
     return res
 
@@ -230,7 +235,7 @@ def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **k
         limit move 0.1 (10%) for example, long and short with same limit
     extract_codes: bool
         will we pass the codes extracted from the pred to the exchange.
-        
+
         .. note:: This will be faster with offline qlib.
     """
     # check strategy:

diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py
@@ -167,7 +167,7 @@ def fit(
         # train
         self.logger.info("training...")
         self._fitted = True
-        #return
+        # return
         # prepare training data
         x_train_values = torch.from_numpy(x_train.values).float()
         y_train_values = torch.from_numpy(y_train.values).float()
@@ -210,7 +210,7 @@ def fit(
 
             # validation
             train_loss += loss.val
-            #print(loss.val)
+            # print(loss.val)
             if step and step % self.eval_steps == 0:
                 stop_steps += 1
                 train_loss /= self.eval_steps
@@ -263,7 +263,7 @@ def predict(self, x_test):
             raise ValueError("model is not fitted yet!")
         x_test = torch.from_numpy(x_test.values).float().cuda()
         self.dnn_model.eval()
-        
+
         with torch.no_grad():
             preds = self.dnn_model(x_test).detach().cpu().numpy()
         return preds

diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py
@@ -14,9 +14,7 @@
 from ..graph import ScatterGraph, SubplotsGraph, BarGraph, HeatmapGraph
 
 
-def _group_return(
-    pred_label: pd.DataFrame = None, reverse: bool = False, N: int = 5, **kwargs
-) -> tuple:
+def _group_return(pred_label: pd.DataFrame = None, reverse: bool = False, N: int = 5, **kwargs) -> tuple:
     """
 
     :param pred_label:
@@ -48,19 +46,15 @@ def _group_return(
     t_df["long-short"] = t_df["Group1"] - t_df["Group%d" % N]
 
     # Long-Average
-    t_df["long-average"] = (
-        t_df["Group1"] - pred_label.groupby(level="datetime")["label"].mean()
-    )
+    t_df["long-average"] = t_df["Group1"] - pred_label.groupby(level="datetime")["label"].mean()
 
     t_df = t_df.dropna(how="all")  # for days which does not contain label
     # FIXME: support HIGH-FREQ
     t_df.index = t_df.index.strftime("%Y-%m-%d")
     # Cumulative Return By Group
     group_scatter_figure = ScatterGraph(
         t_df.cumsum(),
-        layout=dict(
-            title="Cumulative Return", xaxis=dict(type="category", tickangle=45)
-        ),
+        layout=dict(title="Cumulative Return", xaxis=dict(type="category", tickangle=45)),
     ).figure
 
     t_df = t_df.loc[:, ["long-short", "long-average"]]
@@ -103,13 +97,9 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
             lambda x: x["label"].rank(pct=True).corr(x["score"].rank(pct=True))
         )
     else:
-        ic = pred_label.groupby(level="datetime").apply(
-            lambda x: x["label"].corr(x["score"])
-        )
+        ic = pred_label.groupby(level="datetime").apply(lambda x: x["label"].corr(x["score"]))
 
-    _index = (
-        ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
-    )
+    _index = ic.index.get_level_values(0).astype("str").str.replace("-", "").str.slice(0, 6)
     _monthly_ic = ic.groupby(_index).mean()
     _monthly_ic.index = pd.MultiIndex.from_arrays(
         [_monthly_ic.index.str.slice(0, 4), _monthly_ic.index.str.slice(4, 6)],
@@ -186,17 +176,13 @@ def _pred_ic(pred_label: pd.DataFrame = None, rank: bool = False, **kwargs) -> t
 def _pred_autocorr(pred_label: pd.DataFrame, lag=1, **kwargs) -> tuple:
     pred = pred_label.copy()
     pred["score_last"] = pred.groupby(level="instrument")["score"].shift(lag)
-    ac = pred.groupby(level="datetime").apply(
-        lambda x: x["score"].rank(pct=True).corr(x["score_last"].rank(pct=True))
-    )
+    ac = pred.groupby(level="datetime").apply(lambda x: x["score"].rank(pct=True).corr(x["score_last"].rank(pct=True)))
     # FIXME: support HIGH-FREQ
     _df = ac.to_frame("value")
     _df.index = _df.index.strftime("%Y-%m-%d")
     ac_figure = ScatterGraph(
         _df,
-        layout=dict(
-            title="Auto Correlation", xaxis=dict(type="category", tickangle=45)
-        ),
+        layout=dict(title="Auto Correlation", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (ac_figure,)
 
@@ -206,9 +192,7 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
     pred["score_last"] = pred.groupby(level="instrument")["score"].shift(lag)
     top = pred.groupby(level="datetime").apply(
         lambda x: 1
-        - x.nlargest(len(x) // N, columns="score")
-        .index.isin(x.nlargest(len(x) // N, columns="score_last").index)
-        .sum()
+        - x.nlargest(len(x) // N, columns="score").index.isin(x.nlargest(len(x) // N, columns="score_last").index).sum()
         / (len(x) // N)
     )
     bottom = pred.groupby(level="datetime").apply(
@@ -218,14 +202,17 @@ def _pred_turnover(pred_label: pd.DataFrame, N=5, lag=1, **kwargs) -> tuple:
         .sum()
         / (len(x) // N)
     )
-    r_df = pd.DataFrame({"Top": top, "Bottom": bottom,})
+    r_df = pd.DataFrame(
+        {
+            "Top": top,
+            "Bottom": bottom,
+        }
+    )
     # FIXME: support HIGH-FREQ
     r_df.index = r_df.index.strftime("%Y-%m-%d")
     turnover_figure = ScatterGraph(
         r_df,
-        layout=dict(
-            title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)
-        ),
+        layout=dict(title="Top-Bottom Turnover", xaxis=dict(type="category", tickangle=45)),
     ).figure
     return (turnover_figure,)
 
@@ -270,12 +257,12 @@ def model_performance_graph(
 
             .. code-block:: python
 
-                instrument	datetime        score       label
-                SH600004	2017-12-11	-0.013502       -0.013502
-                                2017-12-12	-0.072367       -0.072367
-                                2017-12-13	-0.068605       -0.068605
-                                2017-12-14	0.012440        0.012440
-                                2017-12-15	-0.102778       -0.102778
+                instrument  datetime        score       label
+                SH600004    2017-12-11  -0.013502       -0.013502
+                                2017-12-12  -0.072367       -0.072367
+                                2017-12-13  -0.068605       -0.068605
+                                2017-12-14  0.012440        0.012440
+                                2017-12-15  -0.102778       -0.102778
 
 
     :param lag: `pred.groupby(level='instrument')['score'].shift(lag)`. It will be only used in the auto-correlation computing.