In [None]:
import pandas as pd
from datetime import datetime
from meteostat import Point, Daily

locations = {'New York': Point(40.7128, -74.0060)}

weather_train_start, weather_train_end = datetime(2013, 3, 3), datetime(2023, 3, 3)
weather_val_start, weather_val_end = datetime(2023, 3, 3), datetime(2025, 3, 3)
train_frames, val_frames = [], []

for city, point in locations.items():
    try:
        for start, end, frames in ((weather_train_start, weather_train_end, train_frames),
                                   (weather_val_start, weather_val_end, val_frames)):
            data = Daily(point, start, end).fetch().reset_index()
            data['city'] = city; frames.append(data)
    except Exception as e:
        print(f"Error fetching weather data for {city}: {e}")

weather_train_all = pd.concat(train_frames).reset_index(drop=True)
weather_val_all   = pd.concat(val_frames).reset_index(drop=True)
for df in (weather_train_all, weather_val_all):
    if 'time' in df.columns: df.rename(columns={'time': 'date'}, inplace=True)
    df['date'] = pd.to_datetime(df['date'])
weather_train_ny = weather_train_all[weather_train_all['city'] == 'New York']
weather_val_ny   = weather_val_all[weather_val_all['city'] == 'New York'

for df in [weather_train_ny, weather_val_ny]:
    if 'tavg' not in df.columns:
        df['tavg'] = (df['tmin'] + df['tmax']) / 2 if all(x in df.columns for x in ['tmin', 'tmax']) else pd.NA
    if 'tsun' not in df.columns: df['tsun'] = pd.NA

from pybroker import YFinance

yfinance_instance = YFinance()
train_start_date, train_end_date = '3/3/2013', '3/3/2023'
val_start_date, val_end_date = '3/3/2023', '3/3/2025'

df_train = yfinance_instance.query(["^GSPC"], start_date=train_start_date, end_date=train_end_date)
df_train['date'] = pd.to_datetime(df_train['date'])

df_val = yfinance_instance.query(["^GSPC"], start_date=val_start_date, end_date=val_end_date)
df_val['date'] = pd.to_datetime(df_val['date'])

merged_train = pd.merge(df_train, weather_train_ny, on='date', how='left')
merged_val = pd.merge(df_val, weather_val_ny, on='date', how='left')

# Ensure the data is sorted by date so that the daily return is calculated properly.
merged_train = merged_train.sort_values("date").reset_index(drop=True)

# Calculate the daily return using the formula:
# daily_return = (adjusted_close_today - adjusted_close_yesterday) / adjusted_close_yesterday
merged_train["daily_return"] = merged_train["adj_close"].pct_change()

# Select the 4 weather variables for correlation: average temperature, precipitation, snowfall, windspeed.
vars = ["tavg", "prcp", "snow", "wspd","daily_return"]
corr_matrix = merged_train[vars].corr()

# Plot the correlation matrix using seaborn.
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Matrix of Weather Variables")
plt.show()

import os
import pickle
import random
import pandas as pd
import gym
from gym import spaces
import torch
from stable_baselines3 import PPO

def set_random_seed(seed: int) -> None:
    """
    Helper function to set random seeds for reproducibility.
    This ensures our experiments can be replicated.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def _get_val(series, idx):
    """
    Return the value at idx using .iloc if it's a pandas Series,
    otherwise index directly (if it's a list or numpy array).
    """
    return series.iloc[idx] if hasattr(series, 'iloc') else series[idx]

class Environment(gym.Env):
    """
    A stock trading environment for OpenAI Gym with custom logging.
    Expects a DataFrame with at least the columns: 'date', 'open', and 'adj_close'.
    The observation now includes portfolio state: balance and number of stocks owned.
    """
    metadata = {'render.modes': ['human']}

    def __init__(self, data: pd.DataFrame, start_balance: float = 10000000,
                 commission: float = 0.001, slippage: float = 0.001,
                 verbose: bool = False, render_mode: str = None):
        super().__init__()
        self.data = data
        self.start_balance = start_balance
        self.commission = commission
        self.slippage = slippage
        self.verbose = verbose
        self.render_mode = render_mode

        self.balance = start_balance
        self.current_step = 1 if len(data) > 1 else 0
        self.stock_owned = 0

        # Expected columns.
        self.date = data['date']
        self.open_prices = data['open']
        self.adj_close_prices = data['adj_close']

        self.render_df = pd.DataFrame()
        self.done = False
        self.current_portfolio_value = start_balance

        # Observation space now includes: [open, adj_close, balance, stock_owned]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
        # Action space: first element in [-1, 1] (buy/sell signal) and second in [0, 1] (amount)
        self.action_space = spaces.Box(low=np.array([-1, 0]), high=np.array([1, 1]), dtype=np.float64)
        # Define a threshold for triggering a trade (adjustable if needed)
        self.trade_threshold = 0.1

    def reset(self, seed: int = None, options: dict = None):
        if seed is not None:
            self.seed(seed)
        self.balance = self.start_balance
        self.current_step = 1 if len(self.adj_close_prices) > 1 else 0
        self.stock_owned = 0
        self.done = False
        self.current_portfolio_value = self.start_balance
        self.render_df = pd.DataFrame()  # Reset trade log
        observation = self._get_observation()
        return observation, {}

    def _get_observation(self) -> np.ndarray:
        """Returns an observation consisting of the current market features and portfolio state."""
        idx = self.current_step - 1 if self.current_step > 0 else 0
        open_val = self.open_prices.iloc[idx] if hasattr(self.open_prices, 'iloc') else self.open_prices[idx]
        adj_close_val = self.adj_close_prices.iloc[idx] if hasattr(self.adj_close_prices, 'iloc') else self.adj_close_prices[idx]
        return np.array([open_val, adj_close_val, self.balance, self.stock_owned], dtype=np.float64)

    def _log_trade(self, action: np.ndarray, amount: int, current_portfolio_value: float):
        if self.current_step >= len(self.date):
            current_date = self.date.iloc[-1] if hasattr(self.date, 'iloc') else self.date[-1]
            current_price = self.adj_close_prices.iloc[-1] if hasattr(self.adj_close_prices, 'iloc') else self.adj_close_prices[-1]
        else:
            current_date = self.date.iloc[self.current_step] if hasattr(self.date, 'iloc') else self.date[self.current_step]
            current_price = self.adj_close_prices.iloc[self.current_step] if hasattr(self.adj_close_prices, 'iloc') else self.adj_close_prices[self.current_step]

        if action[0] > self.trade_threshold:
            today_action = 'buy'
        elif action[0] < -self.trade_threshold:
            today_action = 'sell'
        else:
            today_action = 'hold'

        record = {
            'Date': current_date,
            'market_value': current_portfolio_value,
            'balance': self.balance,
            'stock_owned': self.stock_owned,
            'price': current_price,
            'action': today_action,
            'amount': amount
        }
        new_row = pd.DataFrame([record])
        self.render_df = pd.concat([self.render_df, new_row], ignore_index=True)

    def render(self, **kwargs):
        return self.render_all()

    def render_all(self):
        if self.render_mode == 'human':
            print(self.render_df)
        else:
            return self.render_df

       def step(self, action: np.ndarray):
        if not self.action_space.contains(action):
            raise ValueError(f"Invalid action: {action}")

        prev_idx = self.current_step - 1 if self.current_step > 0 else 0
        prev_price = self.adj_close_prices.iloc[prev_idx] if hasattr(self.adj_close_prices, 'iloc') else self.adj_close_prices[prev_idx]
        prev_portfolio_value = self.balance + self.stock_owned * prev_price

        if self.current_step >= len(self.adj_close_prices):
            return self._get_observation(), 0, True, False, {}

        current_price = self.adj_close_prices.iloc[self.current_step] if hasattr(self.adj_close_prices, 'iloc') else self.adj_close_prices[self.current_step]
        executed_amount = 0

        # Use a lower threshold to increase the probability of executing a trade.
        if action[0] > self.trade_threshold:  # Buy signal.
            max_affordable = self.balance / current_price
            amount = int(max_affordable * action[1])
            cost = amount * current_price * (1 + self.commission + self.slippage)
            if cost > self.balance:
                amount = int(self.balance / (current_price * (1 + self.commission + self.slippage)))
            executed_amount = amount
            self.stock_owned += executed_amount
            self.balance -= executed_amount * current_price * (1 + self.commission + self.slippage)
        elif action[0] < -self.trade_threshold:  # Sell signal.
            amount = int(self.stock_owned * action[1])
            executed_amount = min(amount, self.stock_owned)
            if self.stock_owned > 0:
                self.stock_owned -= executed_amount
                self.balance += executed_amount * current_price * (1 - self.commission - self.slippage)
        # Else, if action[0] is in [-self.trade_threshold, self.trade_threshold], then we hold.

        current_portfolio_value = self.balance + self.stock_owned * current_price
        reward = 100 * ((current_portfolio_value - prev_portfolio_value) / prev_portfolio_value)

        self._log_trade(action, executed_amount, current_portfolio_value)
        self.current_step += 1
        done = self.current_step >= len(self.adj_close_prices)
        info = {}
        return self._get_observation(), reward, done, False, info

class WeatherTrader(Environment):
    """
    An extension of Environment that includes weather data as additional features.
    In addition to the base financial features and portfolio state, the observation now also contains weather features.
    """
    def __init__(self, data: pd.DataFrame, start_balance: float = 10000000,
                 commission: float = 0.001, slippage: float = 0.001,
                 verbose: bool = False, render_mode: str = None):
        super().__init__(data, start_balance, commission, slippage, verbose, render_mode)
        # Extract weather features or default to NaN.
        self.tavg = data['tavg'] if 'tavg' in data.columns else pd.Series([np.nan] * len(data))
        self.prcp = data['prcp'] if 'prcp' in data.columns else pd.Series([np.nan] * len(data))
        self.snow = data['snow'] if 'snow' in data.columns else pd.Series([np.nan] * len(data))
        self.wspd = data['wspd'] if 'wspd' in data.columns else pd.Series([np.nan] * len(data))
        # Update observation: [open, adj_close, tavg, prcp, snow, wspd, balance, stock_owned]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64)

    def _get_observation(self) -> np.ndarray:
        idx = self.current_step - 1 if self.current_step > 0 else 0
        # Financial features.
        open_val = self.open_prices.iloc[idx] if hasattr(self.open_prices, 'iloc') else self.open_prices[idx]
        adj_close_val = self.adj_close_prices.iloc[idx] if hasattr(self.adj_close_prices, 'iloc') else self.adj_close_prices[idx]
        # Weather features.
        tavg_val = self.tavg.iloc[idx] if hasattr(self.tavg, 'iloc') else self.tavg[idx]
        prcp_val = self.prcp.iloc[idx] if hasattr(self.prcp, 'iloc') else self.prcp[idx]
        snow_val = self.snow.iloc[idx] if hasattr(self.snow, 'iloc') else self.snow[idx]
        wspd_val = self.wspd.iloc[idx] if hasattr(self.wspd, 'iloc') else self.wspd[idx]
        # Include portfolio information.
        return np.array([open_val, adj_close_val, tavg_val, prcp_val, snow_val, wspd_val, self.balance, self.stock_owned], dtype=np.float64)

# Training configuration.
markets = [
    {"name": "S&P 500", "ticker": "^GSPC"}
]

train_steps      = 200000  # Adjust total timesteps as needed.
total_runs       = 20

checkpoint_file = "checkpoint.pkl"
results_df = pd.DataFrame(columns=["Market", "Agent", "Phase", "Run", "Date", "market_value"])

# Training configuration.
markets = [
    {"name": "S&P 500", "ticker": "^GSPC"}
]

train_steps      = 200000  # Adjust total timesteps as needed.
total_runs       = 20

checkpoint_file = "checkpoint.pkl"
results_df = pd.DataFrame(columns=["Market", "Agent", "Phase", "Run", "Date", "market_value"])

# Load checkpoint if it exists.
start_run = 0
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'rb') as f:
        checkpoint = pickle.load(f)
        start_run = checkpoint.get('run', 0)
        results_df = checkpoint.get('results_df', results_df)
    print(f"Resuming from run {start_run+1}")
else:
    print("Starting fresh runs...")

# Training configuration.
markets = [
    {"name": "S&P 500", "ticker": "^GSPC"}
]

train_steps      = 200000  # Adjust total timesteps as needed.
total_runs       = 20

checkpoint_file = "checkpoint.pkl"
results_df = pd.DataFrame(columns=["Market", "Agent", "Phase", "Run", "Date", "market_value"])

# Load checkpoint if it exists.
start_run = 0
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'rb') as f:
        checkpoint = pickle.load(f)
        start_run = checkpoint.get('run', 0)
        results_df = checkpoint.get('results_df', results_df)
    print(f"Resuming from run {start_run+1}")
else:
    print("Starting fresh runs...")

for market in markets:
    market_name = market["name"]
    ticker = market["ticker"]
    print(f"Processing market: {market_name} ({ticker})")

    for run in range(start_run, total_runs):
        print(f"  Run {run+1}")
        seed = np.random.randint(0, 10000)
        print(f"    Using seed: {seed}")
        set_random_seed(seed)

        # WeatherTrader Agent: Training Phase.
        merged_train_copy = merged_train.copy()  # 'merged_train' from Cell 3.
        weather_train_env = WeatherTrader(
            data=merged_train_copy,
            initial_balance=10000000,
            commission_fee=0.001,
            slippage_cost=0.001,
            verbose=False,
        )
        weather_train_env.reset(seed=seed)
        model_weather = PPO("MlpPolicy", weather_train_env, seed=seed, verbose=0)
        model_weather.learn(total_timesteps=train_steps, progress_bar=False)
        model_weather.save("temp_model_weather")

        # WeatherTrader Agent: Validation Phase.
        merged_val_copy = merged_val.copy()  # 'merged_val' from Cell 3.
        weather_val_env = WeatherTrader(
            data=merged_val_copy,
            initial_balance=10000000,
            commission_fee=0.001,
            slippage_cost=0.001,
            verbose=False,
            render_mode=''
        )
        weather_val_env.reset(seed=seed)
        model_weather = PPO.load("temp_model_weather", env=weather_val_env)
        obs, _ = weather_val_env.reset()
        done = False
        while not done:
            action, _ = model_weather.predict(obs)
            obs, reward, done, truncated, info = weather_val_env.step(action)
            if done or truncated:
                break

        weather_val_df = weather_val_env.render_all().copy()
        weather_val_df["Market"] = market_name
        weather_val_df["Agent"] = "WeatherTrader"
        weather_val_df["Phase"] = "Validation"
        weather_val_df["Run"] = run + 1
        if "date" in weather_val_df.columns:
            weather_val_df.rename(columns={"date": "Date"}, inplace=True)
        results_df = pd.concat(
            [results_df, weather_val_df[["Market", "Agent", "Phase", "Run", "Date", "market_value"]]],
            ignore_index=True
        )

        # Trading Agent with base environment (optional).
        trading_train_env = StockTradingEnv(
            data=merged_train_copy,
            initial_balance=10000000,
            commission_fee=0.001,
            slippage_cost=0.001,
            verbose=False,
            render_mode=''
        )
        trading_train_env.reset(seed=seed)
        model_trading = PPO("MlpPolicy", trading_train_env, seed=seed, verbose=0)
        model_trading.learn(total_timesteps=train_steps, progress_bar=False)
        model_trading.save("temp_model_trading")

        trading_val_env = StockTradingEnv(
            data=merged_val_copy,
            initial_balance=10000000,
            commission_fee=0.001,
            slippage_cost=0.001,
            verbose=False,
            render_mode=''
        )
        trading_val_env.reset(seed=seed)
        model_trading = PPO.load("temp_model_trading", env=trading_val_env)
        obs, _ = trading_val_env.reset()
        done = False
        while not done:
            action, _ = model_trading.predict(obs)
            obs, reward, done, truncated, info = trading_val_env.step(action)
            if done or truncated:
                break

        trading_val_df = trading_val_env.render_all().copy()
        trading_val_df["Market"] = market_name
        trading_val_df["Agent"] = "Trading"
        trading_val_df["Phase"] = "Validation"
        trading_val_df["Run"] = run + 1
        if "date" in trading_val_df.columns:
            trading_val_df.rename(columns={"date": "Date"}, inplace=True)
        results_df = pd.concat(
            [results_df, trading_val_df[["Market", "Agent", "Phase", "Run", "Date", "market_value"]]],
            ignore_index=True
        )

# Save cumulative results and update checkpoint.
        csv_filename = f"results_{market_name.replace(' ', '_')}.csv"
        results_df.to_csv(csv_filename, index=False)
        try:
            # For Google Colab.
            from google.colab import files
            files.download(csv_filename)
        except ImportError:
            pass

        with open(checkpoint_file, 'wb') as f:
            pickle.dump({'run': run + 1, 'results_df': results_df}, f)
        print(f"Checkpoint saved after run {run+1}.")

    start_run = 0

print("All runs complete!")

import re
from google.colab import files
from IPython.display import display

unique_markets = results_df["Market"].unique()
for market_name in unique_markets:
    # Create a new figure and axis.
    fig, ax = plt.subplots(figsize=(12, 6))

    # Filter the data for this market.
    market_data = results_df[results_df["Market"] == market_name]

    # Define colors and labels (using contrasting academic colors).
    agent_info = {
        "WeatherTrader": {"color": "firebrick", "label": "With Weather Data"},
        "Trading": {"color": "royalblue", "label": "Without Weather Data"}
    }

    # Dictionary to accumulate curves for computing mean performance.
    mean_curves = {"WeatherTrader": [], "Trading": []}

    for agent in ["WeatherTrader", "Trading"]:
        agent_data = market_data[(market_data["Agent"] == agent) & (market_data["Phase"] == "Validation")]
        # Plot each run's curve.
        for run, group in agent_data.groupby("Run"):
            group_sorted = group.sort_values("Date")
            x = group_sorted["Date"]
            y = group_sorted["market_value"]
            mean_curves[agent].append(np.array(y))
            ax.plot(
                x,
                y,
                color=agent_info[agent]["color"],
                linewidth=0.25,
                alpha=0.7  # Slightly lower opacity for individual runs.
            )
        # Compute and plot the mean curve (assumes consistent time axes).
        if len(mean_curves[agent]) > 0:
            mean_y = np.mean(np.vstack(mean_curves[agent]), axis=0)
            ax.plot(
                x,  # Use the x-axis from the last run (assumed consistent).
                mean_y,
                color=agent_info[agent]["color"],
                linewidth=1,
                label=agent_info[agent]["label"]
            )

    ax.grid(True, linestyle='--', alpha=0.5)
    ax.set_xlabel("Date", fontsize=12)
    ax.set_ylabel("Portfolio Value of RL Agents", fontsize=12)
    ax.set_title(
        f"Validation Portfolio Value of RL Agent Over Time for {market_name}\n(With vs. Without Weather Data)",
        fontsize=14
    )
    ax.legend(fontsize=12)
    fig.tight_layout()

    # Display the figure inline.
    display(fig)

    # Save the figure to a file.
    safe_market_name = re.sub(r'\W+', '_', market_name)  # Sanitize market name.
    filename = f"plot_{safe_market_name}.png"
    fig.savefig(filename, dpi=300, format='png')
    plt.close(fig)  # Close the figure to free memory.

    # Trigger a file download.
    files.download(filename)

unique_markets = results_df["Market"].unique()

for market_name in unique_markets:
    # Create a new figure and axis.
    fig, ax = plt.subplots(figsize=(12, 6))

    # Filter the data for this market.
    market_data = results_df[results_df["Market"] == market_name]

    # Define colors and labels.
    agent_info = {
        "WeatherTrader": {"color": "firebrick", "label": "Variance With Weather Data"},
        "Trading": {"color": "royalblue", "label": "Variance Without Weather Data"}
    }

    # Dictionary to accumulate the portfolio value curves for each run.
    run_curves = {"WeatherTrader": [], "Trading": []}

    # Process each agent type.
    for agent in ["WeatherTrader", "Trading"]:
        # Filter data for the specific agent and the Validation phase.
        agent_data = market_data[(market_data["Agent"] == agent) & (market_data["Phase"] == "Validation")]

        # Collect the portfolio curves from each run.
        for run, group in agent_data.groupby("Run"):
            group_sorted = group.sort_values("Date")
            x = group_sorted["Date"]         # Assumed consistent across runs.
            y = group_sorted["market_value"]
            run_curves[agent].append(np.array(y))

        # If we have data for at least one run, compute and plot the variance curve.
        if len(run_curves[agent]) > 0:
            # Stack the curves vertically (each row is one run).
            y_stack = np.vstack(run_curves[agent])
            # Compute the variance across runs at each time point.
            variance_y = np.var(y_stack, axis=0)
            # Plot the variance.
            ax.plot(
                x,
                variance_y,
                color=agent_info[agent]["color"],
                linewidth=1,
                label=agent_info[agent]["label"]
            )

    # Customize the plot.
    ax.grid(True, linestyle="--", alpha=0.5)
    ax.set_xlabel("Date", fontsize=12)
    ax.set_ylabel("Portfolio Value Variance", fontsize=12)
    ax.set_title(
        f"Validation Portfolio Value Variance Over Time for {market_name}\n(With vs. Without Weather Data)",
        fontsize=14
    )
    ax.legend(fontsize=12)
    fig.tight_layout()

    # Display the figure inline.
    display(fig)