In [1]:
# TODO(Grisha): does it belong to `dataflow/system` or to `dataflow_amp/system`?

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import logging
import os

import pandas as pd

import core.config as cconfig
import core.finance as cofinanc
import dataflow.system as dtfsys
import dataflow.universe as dtfuniver
# TODO(Grisha): import as package.
import dataflow_amp.system.mock1.mock1_forecast_system as dtfasmmfosy
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hprint as hprint
import im_v2.common.data.client as icdc
import im_v2.common.universe as ivcu

  from tqdm.autonotebook import tqdm


In [4]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-06519e4c-df97-4c32-959c-14eb34f226f0.json'
[31m-----------------------------------------------------------------------------
This code is not in sync with the container:
code_version='1.9.0' != container_version='1.6.0'
-----------------------------------------------------------------------------
You need to:
- merge origin/master into your branch with `invoke git_merge_master`
- pull the latest container with `invoke docker_pull`[0m
INFO  # Git
  branch_name='CmampTask5648_Document_SystemConfig'
  hash='576e00674'
  # Last commits:
    * 576e00674 GP Saggese Checkpoint                                                        (  17 hours ago) Wed Oct 11 19:41:02 2023  (HEAD -> CmampTask5648_Document_SystemConfig, origin/CmampTask5648_Document_SystemConfig)
    * 9f7eb86d8 GP Saggese Checkpoint                                                        (  17 hours a

# Initialize the System

During this stage only the DAG config is built.

In [5]:
system = dtfasmmfosy.Mock1_NonTime_ForecastSystem()
print(system.config)

dag_config: 
  filter_ath: 
    col_mode: replace_all
    transformer_kwargs: 
      start_time: 09:30:00
      end_time: 16:00:00
  resample: 
    in_col_groups: [('close',), ('volume',), ('feature1',)]
    out_col_group: ()
    transformer_kwargs: 
      rule: 5T
      resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})]
      vwap_groups: [('close', 'volume', 'vwap')]
    reindex_like_input: False
    join_output_with_input: False
  compute_ret_0: 
    in_col_groups: [('close',), ('vwap',), ('twap',)]
    out_col_group: ()
    transformer_kwargs: 
      mode: log_rets
    col_mapping: 
      close: close.ret_0
      vwap: vwap.ret_0
      twap: twap.ret_0
  compute_vol: 
    in_col_group: ('vwap.ret_0',)
    out_col_group: ('vwap.ret_0.vol',)
    drop_nans: True
    permitted_exceptions: (<class 'ValueError'>,)
  adjust_rets: 
    in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)]
    out_col_group: ()
    transformer_kwar

# Fill the SystemConfig

The parameters are required to instantiate System's components (e.g., MarketData, DAG).

In [6]:
# Fill the backtest config section.
backtest_config = "mock1_v1-top2.5T.Jan2000"
(
    universe_str,
    trading_period_str,
    time_interval_str,
) = cconfig.parse_backtest_config(backtest_config)
hdbg.dassert_in(trading_period_str, ("1T", "5T", "15T"))
# Override the resampling frequency using the backtest config.
system.config[
    "dag_config", "resample", "transformer_kwargs", "rule"
] = trading_period_str
system.config["backtest_config", "universe_str"] = universe_str
system.config["backtest_config", "trading_period_str"] = trading_period_str
system.config["backtest_config", "time_interval_str"] = time_interval_str
system.config["backtest_config", "freq_as_pd_str"] = "M"
system.config["backtest_config", "lookback_as_pd_str"] = "10D"
# TODO(Grisha): the parameters below should be a function of `time_interval_str`.
system.config[
    "backtest_config", "start_timestamp_with_lookback"
] = pd.Timestamp("2000-01-01 00:00:00+0000", tz="UTC")
system.config["backtest_config", "end_timestamp"] = pd.Timestamp(
    "2000-01-31 00:00:00+0000", tz="UTC"
)
print(system.config)

INFO  backtest_config='mock1_v1-top2.5T.Jan2000'
INFO  universe_str='mock1_v1-top2', trading_period_str='5T', time_interval_str='Jan2000'
dag_config: 
  filter_ath: 
    col_mode: replace_all
    transformer_kwargs: 
      start_time: 09:30:00
      end_time: 16:00:00
  resample: 
    in_col_groups: [('close',), ('volume',), ('feature1',)]
    out_col_group: ()
    transformer_kwargs: 
      rule: 5T
      resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})]
      vwap_groups: [('close', 'volume', 'vwap')]
    reindex_like_input: False
    join_output_with_input: False
  compute_ret_0: 
    in_col_groups: [('close',), ('vwap',), ('twap',)]
    out_col_group: ()
    transformer_kwargs: 
      mode: log_rets
    col_mapping: 
      close: close.ret_0
      vwap: vwap.ret_0
      twap: twap.ret_0
  compute_vol: 
    in_col_group: ('vwap.ret_0',)
    out_col_group: ('vwap.ret_0.vol',)
    drop_nans: True
    permitted_exceptions: (<

In [7]:
# Specify ImClient ctor and its configuration and fill the market data config.
vendor = "mock1"
mode = "trade"
universe = ivcu.get_vendor_universe(
    vendor, mode, version="v1", as_full_symbol=True
)
df = cofinanc.get_MarketData_df6(universe)
system.config[
    "market_data_config", "im_client_ctor"
] = icdc.get_DataFrameImClient_example1
system.config[
    "market_data_config", "im_client_config"
] = cconfig.Config().from_dict({"df": df})
# Build ImClient and write it to config.
im_client = dtfsys.build_ImClient_from_System(system)
universe_str = system.config["backtest_config", "universe_str"]
full_symbols = dtfuniver.get_universe(universe_str)
asset_ids = im_client.get_asset_ids_from_full_symbols(full_symbols)
#
system.config["market_data_config", "im_client"] = im_client
system.config["market_data_config", "asset_ids"] = asset_ids
system.config["market_data_config", "asset_id_col_name"] = "asset_id"
print(system.config)

dag_config: 
  filter_ath: 
    col_mode: replace_all
    transformer_kwargs: 
      start_time: 09:30:00
      end_time: 16:00:00
  resample: 
    in_col_groups: [('close',), ('volume',), ('feature1',)]
    out_col_group: ()
    transformer_kwargs: 
      rule: 5T
      resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})]
      vwap_groups: [('close', 'volume', 'vwap')]
    reindex_like_input: False
    join_output_with_input: False
  compute_ret_0: 
    in_col_groups: [('close',), ('vwap',), ('twap',)]
    out_col_group: ()
    transformer_kwargs: 
      mode: log_rets
    col_mapping: 
      close: close.ret_0
      vwap: vwap.ret_0
      twap: twap.ret_0
  compute_vol: 
    in_col_group: ('vwap.ret_0',)
    out_col_group: ('vwap.ret_0.vol',)
    drop_nans: True
    permitted_exceptions: (<class 'ValueError'>,)
  adjust_rets: 
    in_col_groups: [('vwap.ret_0',), ('vwap.ret_0.vol',)]
    out_col_group: ()
    transformer_kwar

# Build all the components and run the System.

In [8]:
# Calling a DagRunner builds all the components.
dag_runner = system.dag_runner
print(system.is_fully_built())

INFO  
################################################################################
# Before building dag_runner, config=
################################################################################
dag_config: 
  filter_ath: 
    col_mode: replace_all
    transformer_kwargs: 
      start_time: 09:30:00
      end_time: 16:00:00
  resample: 
    in_col_groups: [('close',), ('volume',), ('feature1',)]
    out_col_group: ()
    transformer_kwargs: 
      rule: 5T
      resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})]
      vwap_groups: [('close', 'volume', 'vwap')]
    reindex_like_input: False
    join_output_with_input: False
  compute_ret_0: 
    in_col_groups: [('close',), ('vwap',), ('twap',)]
    out_col_group: ()
    transformer_kwargs: 
      mode: log_rets
    col_mapping: 
      close: close.ret_0
      vwap: vwap.ret_0
      twap: twap.ret_0
  compute_vol: 
    in_col_group: ('vwap.ret_0',)
    out_col_group:

ERROR exception="head_key='dag_property_config' not in ['dag_config', 'dag_builder_object', 'dag_builder_class', 'system_class', 'system_log_dir', 'backtest_config', 'market_data_config', 'market_object', 'object.builder_function'] at level 0"
key='('dag_property_config', 'force_free_nodes')'
config=
  dag_config: 
    filter_ath: 
      col_mode: replace_all
      transformer_kwargs: 
        start_time: 09:30:00
        end_time: 16:00:00
    resample: 
      in_col_groups: [('close',), ('volume',), ('feature1',)]
      out_col_group: ()
      transformer_kwargs: 
        rule: 5T
        resampling_groups: [({'close': 'close'}, 'last', {}), ({'close': 'twap', 'feature1': 'feature1'}, 'mean', {})]
        vwap_groups: [('close', 'volume', 'vwap')]
      reindex_like_input: False
      join_output_with_input: False
    compute_ret_0: 
      in_col_groups: [('close',), ('vwap',), ('twap',)]
      out_col_group: ()
      transformer_kwargs: 
        mode: log_rets
      col_mapping: 
  

In [9]:
# Extract run parameters from the SystemConfig and run.
start_datetime = system.config[
    "backtest_config", "start_timestamp_with_lookback"
]
end_datetime = system.config["backtest_config", "end_timestamp"]
dag_runner.set_predict_intervals([(start_datetime, end_datetime)])
result_bundle = dag_runner.predict()
result_df = result_bundle.result_df
result_df.tail(3)

run_leq_node:   0%|          | 0/9 [00:00<?, ?it/s]

  cond = value in valid_values
  cond = value in valid_values
  cond = value in valid_values
  cond = value in valid_values


Unnamed: 0_level_0,prediction,prediction,vwap.ret_0.vol_adj.c.lag0,vwap.ret_0.vol_adj.c.lag0,vwap.ret_0.vol_adj.c.lag1,vwap.ret_0.vol_adj.c.lag1,vwap.ret_0.vol_adj.c.lag2,vwap.ret_0.vol_adj.c.lag2,vwap.ret_0.vol_adj.c.lag3,vwap.ret_0.vol_adj.c.lag3,vwap.ret_0.vol_adj.c,vwap.ret_0.vol_adj.c,vwap.ret_0.vol_adj,vwap.ret_0.vol_adj,vwap.ret_0.vol,vwap.ret_0.vol,close.ret_0,close.ret_0,twap.ret_0,twap.ret_0,vwap.ret_0,vwap.ret_0,close,close,feature1,feature1,twap,twap,vwap,vwap
Unnamed: 0_level_1,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233,1467591036,3303714233
end_ts,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2
2000-01-01 12:00:00-05:00,-0.570171,-0.570171,-0.979675,-0.979675,0.979675,0.979675,-0.979675,-0.979675,0.979675,0.979675,-0.979675,-0.979675,-1.0,-1.0,0.00995,0.00995,-0.00995,-0.00995,-0.00995,-0.00995,-0.00995,-0.00995,100.0,100.0,-1.0,-1.0,100.0,100.0,100.0,100.0
2000-01-01 12:05:00-05:00,0.570171,0.570171,0.979675,0.979675,-0.979675,-0.979675,0.979675,0.979675,-0.979675,-0.979675,0.979675,0.979675,1.0,1.0,0.00995,0.00995,0.00995,0.00995,0.00995,0.00995,0.00995,0.00995,101.0,101.0,1.0,1.0,101.0,101.0,101.0,101.0
2000-01-01 12:10:00-05:00,-0.570171,-0.570171,-0.979675,-0.979675,0.979675,0.979675,-0.979675,-0.979675,0.979675,0.979675,-0.979675,-0.979675,-1.0,-1.0,0.00995,0.00995,-0.00995,-0.00995,-0.00995,-0.00995,-0.00995,-0.00995,100.0,100.0,-1.0,-1.0,100.0,100.0,100.0,100.0
