# Description

Adapted from docs/dataflow/ck.run_batch_computation_dag.tutorial.ipynb

Build and run Mock2

# Imports

In [None]:
import logging

import dataflow.core as dtfcore
import dataflow.system as dtfsys
import helpers.hdbg as hdbg
import helpers.hprint as hprint

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

hprint.config_notebook()

# Build DAG

Build a `DagBuilder` object that defines a model's configuration
- `get_config_template()`: creates a configuration for each DAG Node
- `_get_dag()`: specifies all the DAG Nodes and builds a DAG using these Nodes

In [None]:
import dataflow_amp.pipelines.mock2.mock2_pipeline as dapmmopi

dag_builder = dapmmopi.Mock2_DagBuilder()
dag_config = dag_builder.get_config_template()
print(dag_config)

In [None]:
# Plot the model.
dag = dag_builder.get_dag(dag_config)
dtfcore.draw(dag)

# Add a node with data

## Build im_client

In [None]:
import im_v2.common.data.client.historical_pq_clients as imvcdchpcl

root_dir = "s3://cryptokaizen-data-test/v3/bulk"

im_client_config_dict = {
    "vendor": "bloomberg",
    "universe_version": "v1",
    "root_dir": root_dir,
    "partition_mode": "by_year_month",
    "dataset": "ohlcv",
    "contract_type": "spot",
    "data_snapshot": "",
    "download_mode": "manual",
    "downloading_entity": "",
    "aws_profile": "ck",
    "resample_1min": False,
    "version": "v1_0_0",
    "download_universe_version": "v1",
    "tag": "resampled_1min",
}

im_client = imvcdchpcl.HistoricalPqByCurrencyPairTileClient(
    **im_client_config_dict
)

In [None]:
# Show how to read the raw data through the `ImClient`.
full_symbols = ["us_market::MSFT"]
start_ts = end_ts = None
columns = None
filter_data_mode = "assert"
datapull_data = im_client.read_data(
    full_symbols, start_ts, end_ts, columns, filter_data_mode
)
display(datapull_data)

## Read universe

In [None]:
import dataflow.universe as dtfuniver

universe_str = "bloomberg_v1-top1"
full_symbols = dtfuniver.get_universe(universe_str)
asset_ids = im_client.get_asset_ids_from_full_symbols(full_symbols)

print(asset_ids)

## Build market_data

In [None]:
import market_data as mdata

columns = None
columns_remap = None
market_data = mdata.get_HistoricalImClientMarketData_example1(
    im_client, asset_ids, columns, columns_remap
)

In [None]:
# Print data in market data format.
timestamp_column_name = "end_ts"
tmp_data = market_data.get_data_for_interval(
    start_ts, end_ts, timestamp_column_name, asset_ids
)
display(tmp_data)

## Build a HistoricalDataSource

In [None]:
stage = "read_data"
multiindex_output = True
col_names_to_remove = ["start_ts"]
timestamp_column_name = "end_ts"
node = dtfsys.HistoricalDataSource(
    stage,
    market_data,
    timestamp_column_name,
    multiindex_output,
    col_names_to_remove=col_names_to_remove,
)

In [None]:
# Data in dataflow format.
node.fit()["df_out"]

In [None]:
dag.insert_at_head(node)
dtfcore.draw(dag)

In [None]:
# Run the DAG.
dag_runner = dtfcore.FitPredictDagRunner(dag)
dag_runner.set_fit_intervals(
    [
        (
            tmp_data.index.min(),
            tmp_data.index.max(),
        )
    ],
)
fit_result_bundle = dag_runner.fit()
#
result_df = fit_result_bundle.result_df
result_df.head()

In [None]:
result_df.dropna()