In [1]:
from stable_baselines3.common.utils import set_random_seed
import os
import numpy as np
import quantstats as qs
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
random_seed = 12346
set_random_seed(random_seed)  # Seed for Stable Baselines3
np.random.seed(random_seed)   # Seed for numpy which might be used by the environment

In [4]:
# /scripts/evaluate_model.py

def evaluate(model, test_df, test_env):
    reward_results = {"sharpe_ratios": [], "max_drawdowns": [], "total_rewards": [], "total_profits": []}
    
    obs, info = test_env.reset(seed=random_seed,options=None)
    done = False
    
    while not done:
        action, _states = model.predict(obs)
        obs, rewards, terminated, truncated, info = test_env.step(action)
        done = terminated or truncated

        if done:
            qs.extend_pandas()
            net_worth = pd.Series(test_env.unwrapped.history["total_profit"], index=test_df.index[30+ 1:test_df.shape[0]])

            returns = net_worth.pct_change().iloc[1:]
            returns.index = pd.to_datetime(returns.index)
            sharpe_ratio = returns.sharpe()
            max_drawdown = returns.max_drawdown()
            # Append results for this run
            reward_results["sharpe_ratios"].append(sharpe_ratio)
            reward_results["max_drawdowns"].append(max_drawdown)
            reward_results["total_rewards"].append(info["total_reward"])
            reward_results["total_profits"].append(info["total_profit"])
            break
        
    print(reward_results)
    plt.cla()
    test_env.unwrapped.render_all()
    plt.show()
    return reward_results