In [None]:
!pip install alpha_vantage

Collecting alpha_vantage
  Downloading alpha_vantage-3.0.0-py3-none-any.whl.metadata (12 kB)
Downloading alpha_vantage-3.0.0-py3-none-any.whl (35 kB)
Installing collected packages: alpha_vantage
Successfully installed alpha_vantage-3.0.0


In [None]:
import os
import random
from collections import deque
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from alpha_vantage.timeseries import TimeSeries
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from datetime import datetime
import argparse

# Ensure model save directory
# MODEL_DIR = 'saved_models'
# os.makedirs(MODEL_DIR, exist_ok=True)

# --- Environment ---
class TradingEnv:
    def __init__(self, states, prices, initial_balance=10000):
        self.states = states
        self.prices = prices
        self.initial_balance = initial_balance
        self.reset()

    def reset(self):
        self.idx = 0
        self.balance = self.initial_balance
        self.shares = 0
        self.net_worth = self.initial_balance
        self.done = False
        return self.states[self.idx]

    def step(self, action):
        price = self.prices[self.idx]
        # 0 = hold, 1 = buy, 2 = sell
        if action == 1 and self.balance >= price:
            self.shares += 1
            self.balance -= price
        elif action == 2 and self.shares > 0:
            self.shares -= 1
            self.balance += price

        self.idx += 1
        if self.idx >= len(self.states):
            self.done = True
            self.idx = len(self.states) - 1
            next_state = self.states[self.idx]
            new_worth = self.balance + self.shares * self.prices[self.idx]
        else:
            next_state = self.states[self.idx]
            new_worth = self.balance #+ self.shares * self.prices[self.idx]

        reward = new_worth - self.net_worth
        self.net_worth = new_worth
        return next_state, reward, self.done, {}

# --- Data Loading ---
def load_data(symbol, api_key='None', window=10, split_date='2023-03-01'):
    ts = TimeSeries(key=api_key, output_format='pandas')
    if api_key=='None':
      df = pd.read_csv('/content/MSFT_daily_data.csv', index_col=0, parse_dates=True)
    else:
      df, _ = ts.get_daily(symbol=symbol, outputsize='full')

    df = df.rename(columns={'1. open':'open','2. high':'high','3. low':'low','4. close':'close','5. volume':'volume'})
    df = df.sort_index()

    df['MA5'] = df['close'].rolling(window=5).mean()
    df['MA10'] = df['close'].rolling(window=10).mean()
    df.dropna(inplace=True)

    scaler = StandardScaler()
    feats = ['open','high','low','close','volume','MA5','MA10']
    scaled = scaler.fit_transform(df[feats])
    arr = pd.DataFrame(scaled, index=df.index)

    seq = np.array([arr.values[i:i+window] for i in range(len(arr)-window)])
    prices = df['close'].values[window:]
    dates = df.index[window:]
    mask = dates < split_date

    return seq[mask], prices[mask], seq[~mask], prices[~mask]

# --- Base LSTM + FC Policy ---
class LSTMPolicy(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, action_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 64)
        self.fc2 = nn.Linear(64, action_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        h = out[:, -1]
        x = F.relu(self.fc1(h))
        return self.fc2(x)

# --- DQN Agent ---
class DQNAgent:
    def __init__(self, state_dim, hidden_size, action_size, lr=1e-3,
                 buffer_capacity=10000, target_update=10):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.policy_net = LSTMPolicy(state_dim, hidden_size, num_layers=1, action_size=action_size).to(self.device)
        self.target_net = LSTMPolicy(state_dim, hidden_size, num_layers=1, action_size=action_size).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()

        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
        self.criterion = nn.MSELoss()
        self.buffer = deque(maxlen=buffer_capacity)
        self.gamma = 0.99
        self.target_update = target_update
        self.steps_done = 0

    def select_action(self, state, epsilon):
        if random.random() < epsilon:
            return random.randrange(self.policy_net.fc2.out_features)
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        with torch.no_grad():
            q_vals = self.policy_net(state)
        return q_vals.argmax().item()

    def store(self, transition):
        self.buffer.append(transition)

    def optimize(self, batch_size):
        if len(self.buffer) < batch_size:
            return
        batch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        s = torch.FloatTensor(np.array(states)).to(self.device)
        a = torch.LongTensor(actions).unsqueeze(1).to(self.device)
        r = torch.FloatTensor(rewards).to(self.device)
        ns = torch.FloatTensor(np.array(next_states)).to(self.device)
        d = torch.FloatTensor(dones).to(self.device)

        q_values = self.policy_net(s).gather(1, a).squeeze()
        next_q = self.target_net(ns).max(1)[0].detach()
        expected_q = r + self.gamma * next_q * (1 - d)

        loss = self.criterion(q_values, expected_q)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.steps_done += 1
        if self.steps_done % self.target_update == 0:
            self.target_net.load_state_dict(self.policy_net.state_dict())

# --- PPO & A2C Actor-Critic ---
class ActorCritic(nn.Module):
    def __init__(self, state_dim, hidden_size, action_size):
        super().__init__()
        self.lstm = nn.LSTM(state_dim, hidden_size, num_layers=1, batch_first=True)
        self.actor = nn.Linear(hidden_size, action_size)
        self.critic = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        h = out[:, -1]
        return F.softmax(self.actor(h), dim=-1), self.critic(h)

class PPOAgent:
    def __init__(self, state_dim, hidden_size, action_size, lr=3e-4, clip_eps=0.2):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = ActorCritic(state_dim, hidden_size, action_size).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.clip_eps = clip_eps

    def select_action(self, state):
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        probs, _ = self.model(state)
        dist = torch.distributions.Categorical(probs)
        action = dist.sample()
        return action.item(), dist.log_prob(action)

    def update(self, batches, epochs=4, batch_size=64, gamma=0.99):
        states, actions, old_logps, returns, advs = batches
        for _ in range(epochs):
            for i in range(0, len(states), batch_size):
                s = states[i:i+batch_size].detach()
                a = actions[i:i+batch_size].detach()
                oldp = old_logps[i:i+batch_size].detach()
                ret = returns[i:i+batch_size].detach()
                adv = advs[i:i+batch_size].detach()

                probs, vals = self.model(s)
                newp = probs.gather(1, a.unsqueeze(1)).log().squeeze()
                ratio = (newp - oldp).exp()

                surr1 = ratio * adv
                surr2 = torch.clamp(ratio, 1-self.clip_eps, 1+self.clip_eps) * adv
                actor_loss = -torch.min(surr1, surr2).mean()
                critic_loss = F.mse_loss(ret, vals.squeeze())
                loss = actor_loss + 0.5 * critic_loss

                self.optimizer.zero_grad()
                loss.backward()  # Retain the computation graph
                self.optimizer.step()

class A2CAgent(PPOAgent):
    def update(self, batches):
        states, actions, logps, returns, advs = batches
        probs, vals = self.model(states)
        dist = torch.distributions.Categorical(probs)
        new_logp = dist.log_prob(actions)

        actor_loss = -(new_logp * advs).mean()
        critic_loss = F.mse_loss(returns, vals.squeeze())
        loss = actor_loss + 0.5 * critic_loss

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

# --- Helpers ---
def collect_trajectories(env, agent, steps, gamma=0.99):
    state = env.reset()
    states, actions, logps, rewards = [], [], [], []
    for _ in range(steps):
        action, logp = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        states.append(state)
        actions.append(action)
        logps.append(logp)
        rewards.append(reward)
        state = next_state
        if done:
            break

    returns, advs = [], []
    G = 0
    for r in reversed(rewards):
        G = r + gamma * G
        returns.insert(0, G)
    returns = torch.Tensor(returns).to(agent.device)

    _, vals = agent.model(torch.FloatTensor(np.array(states)).to(agent.device))
    advs = returns - vals.squeeze().detach()

    return (
        torch.FloatTensor(np.array(states)).to(agent.device),
        torch.LongTensor(actions).to(agent.device),
        torch.stack(logps).to(agent.device),
        returns,
        advs
    )

def test_agent(agent, states, prices):
    env = TradingEnv(states, prices)
    state = env.reset()
    done = False
    net_vals, actions = [], []
    while not done:
        if isinstance(agent, DQNAgent):
            a = agent.select_action(state, epsilon=0)
        else:
            a, _ = agent.select_action(state)
        state, _, done, _ = env.step(a)
        net_vals.append(env.net_worth)
        actions.append(a)

    # plt.plot(net_vals)
    # plt.title("Portfolio Value Over Time")
    # plt.xlabel("Step")
    # plt.ylabel("Net Worth")
    # plt.show()

    return net_vals, actions



In [None]:
import os, time, torch
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def compute_truth_regions(price):
    """
    Given a flat list of prices, return a list of dict-shapes for Plotly
    coloring each [t, t+1] interval green if price rises, red if it falls.
    """
    shapes = []
    ymin, ymax = min(price), max(price)
    for t in range(len(price)-1):
        color = 'rgba(0,200,0,0.1)' if price[t+1] >= price[t] else 'rgba(200,0,0,0.1)'
        shapes.append({
            'type': 'rect',
            'x0': t, 'x1': t+1,
            'y0': ymin, 'y1': ymax,
            'fillcolor': color, 'line': {'width': 0},
            'layer': 'below'
        })
    return shapes

def plot_ground_truth(price, shapes):
    fig = go.Figure()
    fig.update_layout(shapes=shapes)
    fig.add_trace(go.Scatter(
        y=price, mode='lines', name='Price', line=dict(color='black', width=2)
    ))
    fig.update_layout(
        title="Ground Truth: Test‐Price with Buy (green) / Sell (red) Regions",
        xaxis_title="Time Step", yaxis_title="Price",
        template="plotly_white"
    )
    fig.show()

def plot_model_on_truth(price, acts, pv, shapes, algo):
    """
    Creates a 2-row interactive plot:
     - Top: ground-truth regions + price + your model's BUY/SELL markers
     - Bottom: PnL curve
    """
    # build subplot
    fig = make_subplots(
        rows=2, cols=1, shared_xaxes=True,
        row_heights=[0.6, 0.4],
        vertical_spacing=0.05,
        subplot_titles=[f"{algo.upper()} Actions on Price", f"{algo.upper()} Portfolio Value (PnL)"]
    )

    # apply ground-truth shapes to top row only
    fig.update_layout(shapes=shapes)

    # Top: price line
    fig.add_trace(go.Scatter(
        y=price, mode='lines', name='Price', line=dict(color='black', width=2)
    ), row=1, col=1)

    # Top: your actions
    holds  = [i for i,a in enumerate(acts) if a==0]
    buys  = [i for i,a in enumerate(acts) if a==1]
    sells = [i for i,a in enumerate(acts) if a==2]
    if holds:
        fig.add_trace(go.Scatter(
            x=holds, y=[price[i] for i in holds],
            mode='markers', name='Model HOLD',
            marker=dict(symbol='circle-dot', size=10, color='black')
        ), row=1, col=1)
    if buys:
        fig.add_trace(go.Scatter(
            x=buys, y=[price[i] for i in buys],
            mode='markers', name='Model BUY',
            marker=dict(symbol='triangle-up', size=10, color='green')
        ), row=1, col=1)
    if sells:
        fig.add_trace(go.Scatter(
            x=sells, y=[price[i] for i in sells],
            mode='markers', name='Model SELL',
            marker=dict(symbol='triangle-down', size=10, color='red')
        ), row=1, col=1)

    # Bottom: PnL curve
    fig.add_trace(go.Scatter(
        y=pv, mode='lines', name='PnL', line=dict(dash='dash', width=2)
    ), row=2, col=1)

    # axes and layout
    fig.update_xaxes(title_text="Time Step", row=2, col=1)
    fig.update_yaxes(title_text="Price", row=1, col=1)
    fig.update_yaxes(title_text="Portfolio Value", row=2, col=1)
    fig.update_layout(template="plotly_white", legend=dict(orientation='h'))
    fig.show()

'''
def main():
    # —— Hyperparams & Setup —— #
    symbol     = 'MSFT'
    #key        = 'CI2NW9Y32PYR9VRI'
    key ='None'
    episodes   = 1
    window     = 10
    batch_size = 32
    MODEL_DIR  = "./models"
    os.makedirs(MODEL_DIR, exist_ok=True)
    split_date='2023-03-01'
    # —— Load data —— #
    train_s, train_p, test_s, test_p = load_data(symbol, api_key=key, window=window,split_date=split_date)
    state_dim, action_size = train_s.shape[2], 3


    Flag=input('Do you want to train the model or ')
    # —— Train all models —— #
    algorithms = ['dqn','ppo', 'a2c']
    for algo in algorithms:
        env = TradingEnv(train_s, train_p)
        print(f"Training {algo.upper()}...")
        t0 = time.time()
        if algo=='dqn':
            agent = DQNAgent(state_dim, 64, action_size)
            eps, eps_end, decay = 1.0, 0.01, 0.995
            for ep in range(episodes):
                s, done = env.reset(), False
                while not done:
                    a = agent.select_action(s, eps)
                    ns, r, done, _ = env.step(a)
                    agent.store((s,a,r,ns,done))
                    agent.optimize(batch_size)
                    s = ns
                eps = max(eps_end, eps*decay)
                print(f" Ep {ep+1}/{episodes} — {time.time()-t0:.1f}s")
            torch.save(agent.policy_net.state_dict(), os.path.join(MODEL_DIR, f"{algo}.pth"))
        else:
            AgentCls = PPOAgent if algo=='ppo' else A2CAgent
            agent = AgentCls(state_dim, 64, action_size)
            for ep in range(episodes):
                batches = collect_trajectories(env, agent, steps=1000)
                agent.update(batches)
                print(f" Ep {ep+1}/{episodes} — {time.time()-t0:.1f}s")
            torch.save(agent.model.state_dict(), os.path.join(MODEL_DIR, f"{algo}.pth"))

    # —— Test & collect actions + PnL —— #
    price = test_p.flatten().tolist()
    acts_dict, pv_dict = {}, {}
    for algo in algorithms:
        env = TradingEnv(test_s, test_p)
        if algo=='dqn':
            agent = DQNAgent(state_dim, 64, action_size)
            agent.policy_net.load_state_dict(torch.load(os.path.join(MODEL_DIR, f"{algo}.pth")))
        else:
            AgentCls = PPOAgent if algo=='ppo' else A2CAgent
            agent = AgentCls(state_dim, 64, action_size)
            agent.model.load_state_dict(torch.load(os.path.join(MODEL_DIR, f"{algo}.pth")))
        pv, acts = test_agent(agent, test_s, test_p)
        pv_dict[algo]   = pv
        acts_dict[algo] = acts
    # —— Compute & plot ground truth —— #
    shapes = compute_truth_regions(price)
    plot_ground_truth(price, shapes)

    # —— Plot each model on the same ground truth —— #
    for algo in algorithms:
        plot_model_on_truth(price, acts_dict[algo], pv_dict[algo], shapes, algo)


if __name__ == '__main__':
    main()

'''









import os, time, torch

def main():
    # —— Hyperparams & Setup —— #
    symbol     = 'MSFT'
    key        = 'CI2NW9Y32PYR9VRI'
    episodes   = 10
    split_date = '2023-05-01'
    window     = 10
    batch_size = 64
    MODEL_DIR  = "/content"      # <-- Colab’s working directory
    os.makedirs(MODEL_DIR, exist_ok=True)
    # —— Load data —— #
    train_s, train_p, test_s, test_p = load_data(
        symbol, api_key=key, window=window, split_date=split_date
    )
    state_dim, action_size = train_s.shape[2], 3

    # —— Ask user whether to train or just test —— #

    algorithms = ['ppo', 'a2c']

    # —— TRAINING (only if requested) —— #
    for algo in algorithms:
        env = TradingEnv(train_s, train_p)
        print(f"Training {algo.upper()}...")
        t0 = time.time()

        if algo == 'dqn':
            agent = DQNAgent(state_dim, 64, action_size)
            eps, eps_end, decay = 1.0, 0.01, 0.995
            for ep in range(episodes):
                s, done = env.reset(), False
                while not done:
                    a = agent.select_action(s, eps)
                    ns, r, done, _ = env.step(a)
                    agent.store((s,a,r,ns,done))
                    agent.optimize(batch_size)
                    s = ns
                eps = max(eps_end, eps * decay)
                print(f" Ep {ep+1}/{episodes} — {time.time()-t0:.1f}s")
            torch.save(
                agent.policy_net.state_dict(),
                os.path.join(MODEL_DIR, f"{algo}.pth")
            )

        else:
            AgentCls = PPOAgent if algo=='ppo' else A2CAgent
            agent = AgentCls(state_dim, 64, action_size)
            for ep in range(episodes):
                batches = collect_trajectories(env, agent, steps=1000)
                agent.update(batches)
                print(f" Ep {ep+1}/{episodes} — {time.time()-t0:.1f}s")
            torch.save(
                agent.model.state_dict(),
                os.path.join(MODEL_DIR, f"{algo}.pth")
            )

    print("Training complete—all models saved to", MODEL_DIR)


if __name__ == '__main__':
    main()


Training PPO...
 Ep 1/10 — 2.4s
 Ep 2/10 — 4.4s
 Ep 3/10 — 6.0s
 Ep 4/10 — 7.6s
 Ep 5/10 — 9.2s
 Ep 6/10 — 10.8s
 Ep 7/10 — 12.4s
 Ep 8/10 — 14.2s
 Ep 9/10 — 16.3s
 Ep 10/10 — 18.0s
Training A2C...
 Ep 1/10 — 1.5s
 Ep 2/10 — 2.7s
 Ep 3/10 — 4.0s
 Ep 4/10 — 5.2s
 Ep 5/10 — 6.4s
 Ep 6/10 — 7.7s
 Ep 7/10 — 9.0s
 Ep 8/10 — 10.6s
 Ep 9/10 — 12.0s
 Ep 10/10 — 13.3s
Training complete—all models saved to /content


In [None]:
    # —— TESTING & PLOTTING —— #
    #  We’ll load each .pth from MODEL_DIR (whether just-trained or previously uploaded)

    algo='ppo'
    symbol     = 'MSFT'
    key        = 'CI2NW9Y32PYR9VRI'
    episodes   = 2
    split_date = '2023-05-01'
    window     = 10
    batch_size = 64
    MODEL_DIR  = "/content/sample_data"   #/content/sample_data/a2c.pth  # <-- Colab’s working directory
    os.makedirs(MODEL_DIR, exist_ok=True)
    train_s, train_p, test_s, test_p = load_data(
      symbol, api_key=key, window=window, split_date=split_date
    )
    state_dim, action_size = train_s.shape[2], 3

    balance=10000
    price = test_p.flatten().tolist()
    acts_dict, pv_dict = {}, {}
    if algo =='ppo':
        model_path = os.path.join(MODEL_DIR, f"{algo}.pth")
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Expected model file not found: {model_path}")

        env = TradingEnv(test_s, test_p,initial_balance=balance)
        print(f"Loading & testing {algo.upper()} from {model_path}...")

        if algo == 'dqn':
            agent = DQNAgent(state_dim, 64, action_size)
            agent.policy_net.load_state_dict(torch.load(model_path))
        else:
            AgentCls = PPOAgent if algo=='ppo' else A2CAgent
            agent = AgentCls(state_dim, 64, action_size)
            agent.model.load_state_dict(torch.load(model_path))

        pv, acts = test_agent(agent, test_s, test_p)
        pv_dict[algo], acts_dict[algo] = pv, acts
        print(f"  {algo.upper()} test complete.")
        print(pv)
        print(acts)

    elif algo =='a2c':
        model_path = os.path.join(MODEL_DIR, f"{algo}.pth")
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Expected model file not found: {model_path}")

        env = TradingEnv(test_s, test_p,initial_balance=balance)
        print(f"Loading & testing {algo.upper()} from {model_path}...")

        if algo == 'dqn':
            agent = DQNAgent(state_dim, 64, action_size)
            agent.policy_net.load_state_dict(torch.load(model_path))
        else:
            AgentCls = PPOAgent if algo=='ppo' else A2CAgent
            agent = AgentCls(state_dim, 64, action_size)
            agent.model.load_state_dict(torch.load(model_path))

        pv, acts = test_agent(agent, test_s, test_p)
        pv_dict[algo], acts_dict[algo] = pv, acts
        print(f"  {algo.upper()} test complete.")
        print(pv)
        print(acts)


    if algo=='ppo':
      # —— Now plot —— #
      shapes = compute_truth_regions(price)
      plot_ground_truth(price, shapes)
      plot_model_on_truth(price, acts_dict[algo], pv_dict[algo], shapes, algo)

    elif algo=='a2c':
      # —— Now plot —— #
      shapes = compute_truth_regions(price)
      plot_ground_truth(price, shapes)
      plot_model_on_truth(price, acts_dict[algo], pv_dict[algo], shapes, algo)

Loading & testing PPO from /content/sample_data/ppo.pth...
  PPO test complete.
[np.float64(9830.41), np.float64(9830.41), np.float64(9997.86), np.float64(9997.86), np.float64(9997.86), np.float64(9824.36), np.float64(9824.36), np.float64(9997.915), np.float64(9824.165), np.float64(9996.735), np.float64(9824.665), np.float64(9824.665), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9997.355000000001), np.float64(9817.265000000001), np.float64(9998.215000000002), np.float64(9818.635000000002), np.float64(9639.425000000003), np.float64(9461.605000000003), np.float64(9281.035000000003), np.float64(9100.075000000004), np.float64(9100.075000000004), np.float64(8916.765000000005), np.float64(8732.815000000004), np.float64(8918.825000000004), np.float64(

In [None]:
    # —— TESTING & PLOTTING —— #
    #  We’ll load each .pth from MODEL_DIR (whether just-trained or previously uploaded)

    algo='a2c'
    symbol     = 'MSFT'
    key        = 'CI2NW9Y32PYR9VRI'
    episodes   = 2
    split_date = '2023-05-01'
    window     = 10
    batch_size = 64
    MODEL_DIR  = "/content/sample_data"   #/content/sample_data/a2c.pth  # <-- Colab’s working directory
    os.makedirs(MODEL_DIR, exist_ok=True)
    train_s, train_p, test_s, test_p = load_data(
      symbol, api_key=key, window=window, split_date=split_date
    )
    state_dim, action_size = train_s.shape[2], 3

    balance=10000
    price = test_p.flatten().tolist()
    acts_dict, pv_dict = {}, {}
    if algo =='ppo':
        model_path = os.path.join(MODEL_DIR, f"{algo}.pth")
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Expected model file not found: {model_path}")

        env = TradingEnv(test_s, test_p,initial_balance=balance)
        print(f"Loading & testing {algo.upper()} from {model_path}...")

        if algo == 'dqn':
            agent = DQNAgent(state_dim, 64, action_size)
            agent.policy_net.load_state_dict(torch.load(model_path))
        else:
            AgentCls = PPOAgent if algo=='ppo' else A2CAgent
            agent = AgentCls(state_dim, 64, action_size)
            agent.model.load_state_dict(torch.load(model_path))

        pv, acts = test_agent(agent, test_s, test_p)
        pv_dict[algo], acts_dict[algo] = pv, acts
        print(f"  {algo.upper()} test complete.")
        print(pv)
        print(acts)

    elif algo =='a2c':
        model_path = os.path.join(MODEL_DIR, f"{algo}.pth")
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Expected model file not found: {model_path}")

        env = TradingEnv(test_s, test_p,initial_balance=balance)
        print(f"Loading & testing {algo.upper()} from {model_path}...")

        if algo == 'dqn':
            agent = DQNAgent(state_dim, 64, action_size)
            agent.policy_net.load_state_dict(torch.load(model_path))
        else:
            AgentCls = PPOAgent if algo=='ppo' else A2CAgent
            agent = AgentCls(state_dim, 64, action_size)
            agent.model.load_state_dict(torch.load(model_path))

        pv, acts = test_agent(agent, test_s, test_p)
        pv_dict[algo], acts_dict[algo] = pv, acts
        print(f"  {algo.upper()} test complete.")
        print(pv)
        print(acts)


    if algo=='ppo':
      # —— Now plot —— #
      shapes = compute_truth_regions(price)
      plot_ground_truth(price, shapes)
      plot_model_on_truth(price, acts_dict[algo], pv_dict[algo], shapes, algo)

    elif algo=='a2c':
      # —— Now plot —— #
      shapes = compute_truth_regions(price)
      plot_ground_truth(price, shapes)
      plot_model_on_truth(price, acts_dict[algo], pv_dict[algo], shapes, algo)

Loading & testing A2C from /content/sample_data/a2c.pth...
  A2C test complete.
[10000, np.float64(9831.46), np.float64(9831.46), np.float64(9997.25), np.float64(9823.68), np.float64(9650.18), np.float64(9478.41), np.float64(9478.41), np.float64(9304.66), np.float64(9477.23), np.float64(9649.3), np.float64(9477.23), np.float64(9649.92), np.float64(9649.92), np.float64(9825.08), np.float64(9650.88), np.float64(9479.32), np.float64(9479.32), np.float64(9652.31), np.float64(9652.31), np.float64(9829.609999999999), np.float64(10006.859999999999), np.float64(10006.859999999999), np.float64(10006.859999999999), np.float64(10006.859999999999), np.float64(9827.65), np.float64(9827.65), np.float64(9827.65), np.float64(10008.609999999999), np.float64(9824.819999999998), np.float64(9641.509999999998), np.float64(9825.46), np.float64(9825.46), np.float64(9825.46), np.float64(10010.47), np.float64(10010.47), np.float64(10010.47), np.float64(10010.47), np.float64(9825.199999999999), np.float64(9825.