In [1]:
from stock_env.algos.agent import Agent
from stock_env.envs import *
from stock_env.common.common_utils import open_config
from stock_env.common.env_utils import make_vec_env
import torch as th
from stock_env.common.common_utils import create_performance
from stock_env.common.evaluation import play_an_episode
from stock_env.common.common_utils import open_config
import plotly.graph_objects as go

# Reward function analysis

In [2]:
def buy_and_hold_plot(df, algo_name, sharpe, init_cash=1000000):
    import vectorbt as vbt
    
    # portfolio value from hodling
    df["pv_from_holding"] = vbt.Portfolio.from_holding(
        df.close, init_cash=init_cash
    ).value()
    df = df.set_index("time")
    ticker = df["ticker_x"].unique()[0]
    
    fig = go.Figure()
    fig.update_layout(width=800, height=500)
    fig.update_layout(
        title=f"""Buy-n-Hold Comparision, Ticker: {ticker}, Sharpe = {sharpe:.2f}""", title_x=0.5)
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Portfolio Value (thounsand VND)")

    df["portfolio_value"].vbt.plot_against(
        other=df["pv_from_holding"],
        other_trace_kwargs=dict(
            line=dict(width=2, color="dimgray"), mode="lines", name="Buy-n-Hold"
        ),
        trace_kwargs=dict(
            mode="lines+markers",
            line=dict(width=3, color="lightslategray"),
            name=algo_name,
        ),
        pos_trace_kwargs=dict(fillcolor="palegreen"),
        neg_trace_kwargs=dict(fillcolor="salmon"),
        fig=fig,
    )
    fig.show()

In [3]:
task = "HPG"
algo = "ppo_adapt"
env_id = "VNALL-v0"
config_path = f"../configs/{algo}.yaml"
ppo_model_folder = f"../model/evaluation_MBB/"

path_score = f"../model/evaluation_MBB/ppo_MBB_score_reward.pth"
path_diff = f"../model/evaluation_MBB/ppo_MBB_diff_reward.pth"
path_log = f"../model/evaluation_MBB/ppo_MBB_log_reward.pth"
path_sharpe = f"../model/evaluation_MBB/ppo_MBB_sharpe_reward.pth"

In [4]:
# _model_path = path_score
# _model_path = path_diff
# _model_path = path_log
# _model_path = path_sharpe
_model_path = "../model/adapt_evaluation_M/ppo_adapt_HPG_20230302_124226.pth"

args = open_config(config_path, env_id=env_id)
envs = make_vec_env(env_id, num_envs=1, task=task)
agent = Agent(envs, hiddens=args.hiddens)
random_agent = Agent(envs, hiddens=args.hiddens)
agent.load_state_dict(th.load(_model_path))

<All keys matched successfully>

In [18]:
agent

Agent(
  (shared_net): Sequential(
    (0): Linear(in_features=420, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=512, bias=True)
    (3): ReLU()
  )
  (critic): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=1, bias=True)
  )
  (actor_mean): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=1, bias=True)
  )
)

In [17]:
_agent = agent

# evaluate
envs.train(False)
info = play_an_episode(_agent, envs)
df = info["final_info"][0]["final_history"]
returns = df.set_index("time")["portfolio_value"].pct_change()
results = create_performance(returns, plot=False)
buy_and_hold_plot(df, "HPG (PPO-adapt)", sharpe=results['sharpe_ratio'], init_cash=envs.envs[0].init_cash)

Ticker: HPG, from date 2020-11-10 00:00:00 to date 2021-11-10 00:00:00

    Annual return     :  20.93%
    Cumulative return :  20.75%
    Sharpe ratio      :  2.26
    Maximum Drawdown  : -5.47%
    Annual Volatility :  8.63%
    Value-At-Risk     :  nan%
    
