# Description

- Initialize with returns, alpha, and spread
- Evaluate portfolios generated from the alpha

# Imports

In [None]:
%load_ext autoreload
%autoreload 2

import logging

import core.config as cconfig
import core.dataflow_model.model_evaluator as modeval
import core.dataflow_model.utils as cdmu
import helpers.dbg as dbg
import helpers.printing as hprint

In [None]:
dbg.init_logger(verbosity=logging.INFO)
# dbg.init_logger(verbosity=logging.DEBUG)

_LOG = logging.getLogger(__name__)

# _LOG.info("%s", env.get_system_signature()[0])

hprint.config_notebook()

# Notebook config

In [None]:
# config = cconfig.Config.from_env_var("AM_CONFIG_CODE")
config = None

if config is None:
    experiment_dir = "/cache/experiments/oos_experiment.RH2Eg.v2_0-all.5T.run2.hacked"
    aws_profile = None
    #selected_idxs = range(200)
    selected_idxs = None

    eval_config = cconfig.get_config_from_nested_dict(
        {
            "load_experiment_kwargs": {
                "src_dir": experiment_dir,
                "file_name": "result_bundle.v2_0.pkl",
                "experiment_type": "ins_oos",
                "selected_idxs": selected_idxs,
                "aws_profile": aws_profile,
            },
            "strategy_evaluator_kwargs": {
                "returns_col": "mid_ret_0",
                "position_intent_col": "position_intent_1",
                "spread_col": "spread",
            },
            "bh_adj_threshold": 0.1,
            "resample_rule": "W",
        }
    )

print(str(eval_config))

In [None]:
result_bundle_dict[0]

In [None]:
load_config = eval_config["load_experiment_kwargs"].to_dict()

# Load only the columns needed by the StrategyEvaluator.
load_config["load_rb_kwargs"] = {
    "columns": [
        eval_config["strategy_evaluator_kwargs"]["returns_col"],
        eval_config["strategy_evaluator_kwargs"]["position_intent_col"],
        eval_config["strategy_evaluator_kwargs"]["spread_col"],
    ]
}
result_bundle_dict = cdmu.load_experiment_artifacts(**load_config)

# Build the StrategyEvaluator.
evaluator = modeval.StrategyEvaluator.from_result_bundle_dict(
    result_bundle_dict,
    # abort_on_error=False,
    abort_on_error=True,
    **eval_config["strategy_evaluator_kwargs"].to_dict(),
)

In [None]:
if False:
    import helpers.pickle_ as hpickle

    hpickle.to_pickle(evaluator, "evaluator.pkl")

In [None]:
assert 0

# Restart from pickle

In [None]:
!du -h evaluator.pkl

In [None]:
spread_fraction_paid = 0
#keys = range(3)
keys = None
#result = evaluator.compute_pnl(key_type="attribute", keys=keys)
pnl_dict = evaluator.compute_pnl(spread_fraction_paid, keys=keys, key_type="instrument")

#pnl_dict[0]

In [None]:
#spread_fraction_paid = 0
#evaluator.calculate_stats(spread_fraction_paid)

In [None]:
import pandas as pd

import numpy as np

In [None]:
print(dbg.get_memory_usage_as_str(None))

#del pnl_dict

import gc

gc.collect()

print(dbg.get_memory_usage_as_str(None))

In [None]:
def _compute_pnl_dict(spread_fraction_paid):
    #keys = range(3)
    keys = None
    #result = evaluator.compute_pnl(key_type="attribute", keys=keys)
    pnl_dict = evaluator.compute_pnl(spread_fraction_paid, keys=keys, key_type="instrument")
    return pnl_dict
    
    
def _get_pnl_df(pnl_dict):
    dfs = []
    for key in list(pnl_dict.keys()):
        srs = pnl_dict[key]["pnl_0"] - pnl_dict[key]["spread_cost_0"]
        srs.name = key
        dfs.append(srs)
    df = pd.concat(dfs, axis=1)
    #df.resample("1B").sum
    return df


def _aggregate_pnl(df):
    aggr_pnl = df.resample("1B").sum().drop([224, 554, 311, 384, 589, 404], axis=1).sum(axis=1).cumsum()
    return aggr_pnl


final_df = []
for sfp in [-0.05, -0.03, -0.01, 0.0, 0.01, 0.02, 0.03]:
#for sfp in [-0.05, -0.03]:
    pnl_dict = _compute_pnl_dict(sfp)

    df = _get_pnl_df(pnl_dict)
    #print(df.shape)
    #df.head()

    aggr_df = _aggregate_pnl(df)
    #aggr_df.plot()
    aggr_df.name = sfp
    final_df.append(aggr_df)
    
    print(dbg.get_memory_usage_as_str(None))

In [None]:
final_df2 = pd.concat(final_df, axis=1)

final_df2.plot()

In [None]:
def sr(srs):
    return srs.mean() / srs.std() * np.sqrt(252)
    
print("ins", sr(final_df2[:"2017-01-01"].diff()))
print("oos", sr(final_df2["2017-01-01":].diff()))

# Compare to event-based

In [None]:
sfp_gp = [0.45, 0.5, 0.51, 0.52, 0.53]
sfp_paul = [(x - 0.5) * 2 for x in sfp_gp]
print(sfp_paul)
final_df = []
for sfp in sfp_paul:
    #keys = range(3)
    keys = [0]
    #result = evaluator.compute_pnl(key_type="attribute", keys=keys)
    pnl_dict = evaluator.compute_pnl(sfp, keys=keys, key_type="instrument")

    key = keys[0]
    srs = pnl_dict[key]["pnl_0"] - pnl_dict[key]["spread_cost_0"]
    srs.name = sfp
    
    final_df.append(srs)

final_df = pd.concat(final_df, axis=1)

final_df.resample("1B").sum().cumsum().plot()

In [None]:
srs.cumsum().plot()

# Remove crap

In [None]:
pnlf_ = df.resample("1B").sum().diff()

pos = abs(pnl_).max()
pos
#mask = pnl_.tail(1) < 0
#pnl_.tail(1)[mask]

In [None]:
#pos.iloc[0].sort_values()
pos.sort_values().tail(10)

In [None]:
#df.resample("1B").sum().sum(axis=0).argmin()

In [None]:
#dbg.get_memory_usage_as_str(None)

In [None]:
# #df.sum(axis=1).resample("1B").sum().cumsum().plot(color="k")
# df.resample("1B").sum().sum(axis=1).cumsum().plot(color="k")

In [None]:
aggr_pnl = df.resample("1B").sum().drop([224, 554, 311, 384, 589, 404], axis=1).sum(axis=1).cumsum()

aggr_pnl.plot(color="k")

In [None]:
import numpy as np

def sr(srs):
    return srs.mean() / srs.std() * np.sqrt(252)
    
print("ins", sr(aggr_pnl[:"2017-01-01"].diff()))
print("oos", sr(aggr_pnl["2017-01-01":].diff()))

In [None]:
aggr_pnl["2018-06-06":].plot()