# Evaluate model for US Stock Env

In [None]:
import torch as th
import vectorbt as vbt
import gymnasium as gym
from copy import deepcopy
from stock_env.common.common_utils import create_performance, plot_trade_log_v2
from stock_env.envs import *
from stock_env.common.evaluation import evaluate_agent, play_an_episode
from stock_env.algos.agent import MetaAgent
from stock_env.common.common_utils import open_config
import seaborn as sns

sns.set()
import plotly.graph_objects as go

In [None]:
env_id = "SP500-v0"
state_dict_path = "../model/maml_sp500_20221217_141044.pth"
num_tasks = num_envs = 5
env_config = open_config("../configs/envs.yaml", env_id=env_id)

_env = MetaVectorEnv([lambda: gym.make(env_id) for _ in range(1)])
meta_agent = MetaAgent(_env)

# meta agent
meta_agent.load_state_dict(th.load(state_dict_path))
meta_agent.eval()

# random
random_agent = deepcopy(meta_agent)

## Evaluation on random period

In [None]:
num_envs = num_tasks = 5

eval_envs = MetaVectorEnv([lambda: gym.make(env_id) for _ in range(num_envs)])
eval_envs.train(False)

tasks = eval_envs.sample_task(num_tasks)
for env, task in zip(eval_envs.envs, tasks):
    env.reset_task(task)

mean, std = evaluate_agent(meta_agent, eval_envs, n_eval_episodes=100)
print(f"Mean reward: {mean:.2f} +/- {std: .2f}")

# Evaluation on specific stocks

In [None]:
# INPUTS
# prepare tables
metrics = [
    "annual_return",
    "cum_returns_final",
    "sharpe_ratio",
    "max_drawdown",
    "annual_volatility",
    "value_at_risk",
]
agents = {
    "MAML": meta_agent,
    "Random": random_agent,
}
N_TASKS = 10

# Main process
single_eval_envs = MetaVectorEnv([lambda: gym.make(env_id) for _ in range(1)])
single_eval_envs.train(False)
tasks = single_eval_envs.sample_task(N_TASKS)

perf_df = pd.DataFrame(
    index=pd.MultiIndex.from_product([tasks, metrics]), columns=agents.keys()
)
for task in tasks:
    single_eval_envs.reset_task(task)

    for agent_key, agent in agents.items():
        # run model to get detailed information in the enviroment
        info = play_an_episode(agent, single_eval_envs)
        df = info["final_info"][0]["final_history"]
        returns = df.set_index("time")["portfolio_value"].pct_change()
        perf = create_performance(returns, plot=False)

        for metric, value in perf.items():
            perf_df.loc[(task, metric), agent_key] = round(value * 100, 2)

    # buy and hold performance
    holding_returns = (
        vbt.Portfolio.from_holding(df.close, init_cash=env_config.init_cash)
        .value()
        .pct_change()
    )
    perf = create_performance(holding_returns, plot=False)
    for metric, value in perf.items():
        perf_df.loc[(task, metric), "Buy-n-Hold"] = round(value * 100, 2)
perf_df

In [None]:
perf_df["is_better"] = perf_df["MAML"] > perf_df["Buy-n-Hold"]
perf_df.loc[(tasks, ("sharpe_ratio", "max_drawdown")), :].sort_values(
    "MAML", ascending=False
)

In [None]:
plot_trade_log_v2(df)

# Compare with buy-and-hold strategy and feature strategy

In [None]:
# portfolio value from hodling
df["pv_from_holding"] = vbt.Portfolio.from_holding(
    df.close, init_cash=env_config.init_cash
).value()
df = df.set_index("time")
ticker = df["ticker_x"].unique()[0]

In [None]:
fig = go.Figure()
fig.update_layout(width=800, height=500)
fig.update_layout(title=f"Buy-n-Hold Comparision, Ticker: {ticker}", title_x=0.5)
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Portfolio Value ($)")

df["portfolio_value"].vbt.plot_against(
    other=df["pv_from_holding"],
    other_trace_kwargs=dict(
        line=dict(width=2, color="dimgray"), mode="lines", name="Buy-n-Hold"
    ),
    trace_kwargs=dict(
        mode="lines+markers",
        line=dict(width=3, color="lightslategray"),
        name="MAML",
    ),
    pos_trace_kwargs=dict(fillcolor="palegreen"),
    neg_trace_kwargs=dict(fillcolor="salmon"),
    fig=fig,
)
fig.show()

# After refactor

In [None]:
from stock_env.algos.agent import Agent
from stock_env.envs import *
from stock_env.common.common_utils import open_config
from stock_env.common.env_utils import make_vec_env
import torch as th
from stock_env.common.common_utils import create_performance, plot_trade_log_v2
from stock_env.common.evaluation import play_an_episode
from stock_env.common.common_utils import open_config
import seaborn as sns
sns.set()
import plotly.graph_objects as go

def buy_and_hold_plot(df, algo_name, init_cash=1000000):
    import vectorbt as vbt
    
    # portfolio value from hodling
    df["pv_from_holding"] = vbt.Portfolio.from_holding(
        df.close, init_cash=init_cash
    ).value()
    df = df.set_index("time")
    ticker = df["ticker_x"].unique()[0]
    
    fig = go.Figure()
    fig.update_layout(width=800, height=500)
    fig.update_layout(title=f"Buy-n-Hold Comparision, Ticker: {ticker}", title_x=0.5)
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Portfolio Value ($)")

    df["portfolio_value"].vbt.plot_against(
        other=df["pv_from_holding"],
        other_trace_kwargs=dict(
            line=dict(width=2, color="dimgray"), mode="lines", name="Buy-n-Hold"
        ),
        trace_kwargs=dict(
            mode="lines+markers",
            line=dict(width=3, color="lightslategray"),
            name="MAML",
        ),
        pos_trace_kwargs=dict(fillcolor="palegreen"),
        neg_trace_kwargs=dict(fillcolor="salmon"),
        fig=fig,
    )
    fig.show()

In [None]:
env_id = "SP500-v0"
model_path = "../model/ppo_sp500_20221230_000928.pth"
config_path = "../configs/ppo.yaml"

# setting up
envs = make_vec_env(env_id, num_envs=1, task="AAPL")
args = open_config("../configs/ppo.yaml", env_id=env_id)
agent = Agent(envs, hiddens=args.hiddens)
agent.load_state_dict(th.load(model_path))

In [None]:
# evaluate
envs.train(False)
info = play_an_episode(agent, envs)
df = info["final_info"][0]["final_history"]
returns = df.set_index("time")["portfolio_value"].pct_change()
results = create_performance(returns)
plot_trade_log_v2(df)

buy_and_hold_plot(df, "MAML", init_cash=envs.envs[0].init_cash)