# Description

Calculate longitudinal sweep of slippage in bps.

# Imports

In [18]:
%load_ext autoreload
%autoreload 2
import logging

import pandas as pd

import core.config as cconfig
import core.finance.target_position_df_processing as cftpdp
import dataflow_amp.system.Cx as dtfamsysc
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hprint as hprint
import im_v2.common.universe as ivcu
import oms.broker.ccxt.ccxt_aggregation_functions as obccagfu
import oms.broker.ccxt.ccxt_execution_quality as obccexqu
import oms.broker.ccxt.ccxt_logger as obcccclo
import oms.order.order_converter as oororcon

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

INFO  # Git
  branch_name='CmTask7376_Longitudina_sweep_of_slippage_in_bps'
  hash='96e6da278'
  # Last commits:
    *   96e6da278 Danya Tikhomirov Merge branch 'master' into CmTask7376_Longitudina_sweep_of_slippage_in_bps (   5 hours ago) Wed Mar 6 12:24:45 2024  (HEAD -> CmTask7376_Longitudina_sweep_of_slippage_in_bps, origin/CmTask7376_Longitudina_sweep_of_slippage_in_bps)
    |\  
    | * 0bfc20d14 Sameep Pote CmTask7435_Kill_get_Cx_NonTime_ForecastSystem_for_unit_tests_example1 (#7449) (   6 hours ago) Wed Mar 6 10:48:18 2024  (origin/CmampTask7456_Allow_downloading_parts_of_universe)
    | * 497e81b9d Sameep Pote CmTask7334 create new universe 1 (#7458)                          (  20 hours ago) Tue Mar 5 20:37:30 2024           
# Machine info
  system=Linux
  node name=5de9a1b8ea9d
  release=5.15.0-1053-aws
  version=#58~20.04.1-Ubuntu SMP Mon Jan 22 17:15:01 UTC 2024
  machine=x86_64
  processor=x86_64
  cpu count=8
  cpu freq=scpufreq(current=2499.992, min=0.0, max=0.0)
  memo

# Config

In [26]:
# TODO(Toma): turn this into master notebook.

In [20]:
config = cconfig.get_config_from_env()
if config:
    _LOG.info("Using config from env vars")
else:
    id_col = "asset_id"
    universe_version = "v7.5"
    vendor = "CCXT"
    mode = "trade"
    bar_duration = "3T"
    config_dict = {
        "meta": {"id_col": id_col},
        "ohlcv_market_data": {
            "vendor": vendor,
            "mode": mode,
            "universe": {
                "universe_version": universe_version,
            },
        },
        "execution_parameters": {
            "bar_duration": bar_duration,
        },
    }
    config = cconfig.Config.from_dict(config_dict)
print(config)

meta: 
  id_col: asset_id
ohlcv_market_data: 
  vendor: CCXT
  mode: trade
  universe: 
    universe_version: v7.5
execution_parameters: 
  bar_duration: 3T


# Specify the paths to experiments

In [21]:
# Provide full system_log_dir paths with `process_forecasts` from Algo execution doc.
paths_to_process = [
    "/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_115700.20240221_125400/system_log_dir.manual/process_forecasts",
    "/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_144500.20240221_154200/system_log_dir.manual/process_forecasts",
    "/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_150900.20240219_160600/system_log_dir.manual/process_forecasts",
    "/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240223_160900.20240223_170600/system_log_dir.manual/process_forecasts",
    "/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_165700.20240219_175400/system_log_dir.manual/process_forecasts",
]

# Process the experiments

In [22]:
id_col = config.get_and_mark_as_used(("meta", "id_col"))
universe_version = config.get_and_mark_as_used(
    ("ohlcv_market_data", "universe", "universe_version")
)
vendor = config.get_and_mark_as_used(("ohlcv_market_data", "vendor"))
mode = config.get_and_mark_as_used(("ohlcv_market_data", "mode"))
bar_duration = config.get_and_mark_as_used(
    ("execution_parameters", "bar_duration")
)

In [23]:
slippage_in_bps_rows = []

# TODO(Toma): move to a lib.
for log_dir in paths_to_process:
    _LOG.info("Processing `%s`", log_dir)
    # Init the log reader.
    ccxt_log_reader = obcccclo.CcxtLogger(log_dir)
    # # Load and aggregate data.
    # ## Load OMS parent orders.
    parent_order_df = ccxt_log_reader.load_oms_parent_order(
        convert_to_dataframe=True, abort_on_missing_data=False
    )
    # ## Load CCXT fills (trades).
    fills_df = ccxt_log_reader.load_ccxt_trades(
        convert_to_dataframe=True, abort_on_missing_data=False
    )
    # ## Aggregate CCXT Data.
    bar_fills = obccagfu.aggregate_fills_by_bar(
        fills_df, bar_duration, groupby_id_col=id_col
    )
    # ## Load OHLCV data.
    start_timestamp = bar_fills["first_datetime"].min() - pd.Timedelta(
        bar_duration
    )
    end_timestamp = bar_fills["last_datetime"].max() + pd.Timedelta(bar_duration)
    # Get asset ids.
    asset_ids = ivcu.get_vendor_universe_as_asset_ids(
        universe_version, vendor, mode
    )
    # Get prod `MarketData`.
    db_stage = "preprod"
    market_data = dtfamsysc.get_Cx_RealTimeMarketData_prod_instance1(
        asset_ids, db_stage
    )
    # Load and resample OHLCV data.
    ohlcv_bars = dtfamsysc.load_and_resample_ohlcv_data(
        market_data,
        start_timestamp,
        end_timestamp,
        bar_duration,
    )
    # # Execution quality.
    # Compute `target_position_df` and `portfolio_df`.
    price_df = ohlcv_bars["close"]
    target_position_df = oororcon.convert_order_df_to_target_position_df(
        parent_order_df,
        price_df,
    )
    portfolio_df = obccexqu.convert_bar_fills_to_portfolio_df(
        bar_fills,
        price_df,
    )
    (
        execution_quality_df,
        execution_quality_stats_df,
    ) = cftpdp.compute_execution_quality_df(
        portfolio_df,
        target_position_df,
    )
    # Calculate slippage in BPS.
    # TODO(Toma): improve the way mean is calculated.
    slippage_in_bps = execution_quality_df["slippage_in_bps"].mean()
    # TODO(Toma): add caching previous calculations using decorator, PP with GP.
    row = pd.DataFrame(slippage_in_bps.to_dict(), index=[start_timestamp])
    slippage_in_bps_rows.append(row)

INFO  Processing `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_115700.20240221_125400/system_log_dir.manual/process_forecasts`


Loading files from '/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_115700.20240221_12540…

Loading `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_115700.20240221_125400/system_lo…

  df = pd.read_sql_query(query, connection)


INFO  fit_intervals=[(Timestamp('2024-02-21 11:54:24.714000+0000', tz='UTC'), Timestamp('2024-02-21 12:57:10.734000+0000', tz='UTC'))]
INFO  Processing `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_144500.20240221_154200/system_log_dir.manual/process_forecasts`


  1e4 * side * holdings_price_per_share.pct_change().shift(-1)




Loading files from '/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_144500.20240221_15420…

Loading `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240221_144500.20240221_154200/system_lo…

  df = pd.read_sql_query(query, connection)


INFO  fit_intervals=[(Timestamp('2024-02-21 14:42:20.631000+0000', tz='UTC'), Timestamp('2024-02-21 15:45:50.153000+0000', tz='UTC'))]
INFO  Processing `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_150900.20240219_160600/system_log_dir.manual/process_forecasts`


Loading files from '/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_150900.20240219_16060…

Loading `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_150900.20240219_160600/system_lo…

  df = pd.read_sql_query(query, connection)


INFO  fit_intervals=[(Timestamp('2024-02-19 15:06:26.149000+0000', tz='UTC'), Timestamp('2024-02-19 16:10:00.644000+0000', tz='UTC'))]
INFO  Processing `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240223_160900.20240223_170600/system_log_dir.manual/process_forecasts`


Loading files from '/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240223_160900.20240223_17060…

Loading `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240223_160900.20240223_170600/system_lo…

  df = pd.read_sql_query(query, connection)


INFO  fit_intervals=[(Timestamp('2024-02-23 16:06:12.247000+0000', tz='UTC'), Timestamp('2024-02-23 16:36:09.035000+0000', tz='UTC'))]
INFO  Processing `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_165700.20240219_175400/system_log_dir.manual/process_forecasts`


Loading files from '/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_165700.20240219_17540…

Loading `/shared_data/ecs_tokyo/test/system_reconciliation/C12a/prod/20240219_165700.20240219_175400/system_lo…

  df = pd.read_sql_query(query, connection)


INFO  fit_intervals=[(Timestamp('2024-02-19 16:54:17.936000+0000', tz='UTC'), Timestamp('2024-02-19 17:57:13.856000+0000', tz='UTC'))]


# Build the longitudinal sweep of slippage Dataframe

In [24]:
slippage_in_bps_df = pd.concat(slippage_in_bps_rows).sort_index()

In [25]:
slippage_in_bps_df

Unnamed: 0,1464553467,1467591036
2024-02-19 15:06:26.149000+00:00,0.787916,2.80595
2024-02-19 16:54:17.936000+00:00,3.503681,8.694168
2024-02-21 11:54:24.714000+00:00,2.812249,-7.244264
2024-02-21 14:42:20.631000+00:00,0.017361,-1.806226
2024-02-23 16:06:12.247000+00:00,5.074236,4.071793
