In [1]:
import sys
import os

# Ensure Python finds config.py
sys.path.append(os.path.abspath(".."))  # Moves up one level to find config.py

In [2]:
import sys
import os

# Add the src directory to Python's search path
sys.path.append(os.path.join(os.getcwd(), "src"))


In [3]:
from central_bank_env import CentralBankEnv


ModuleNotFoundError: No module named 'central_bank_env'

In [None]:
import os
import gym
import numpy as np
import pandas as pd
import pickle
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from config import MODELS_DIR, PROCESSED_DATA_DIR, LOGS_DIR  # Import directory setup
from central_bank_env import CentralBankEnv  # Import the environment

# ✅ Define paths
varmax_model_path = os.path.join(MODELS_DIR, "varmax_model.pkl")
rl_train_data_path = os.path.join(PROCESSED_DATA_DIR, "rl_train_data.csv")

# ✅ Create the environment function (needed for DummyVecEnv)
def make_env():
    return CentralBankEnv(varmax_model_path, rl_train_data_path, optimal_lag=10, episode_length=60)

# ✅ Wrap the environment with DummyVecEnv
env = DummyVecEnv([make_env])

# ✅ Define RL model (using PPO)
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=os.path.join(LOGS_DIR, "ppo_central_bank/"))

# ✅ Train the model
num_timesteps = 100_000  # Adjust based on computing power
model.learn(total_timesteps=num_timesteps)

# ✅ Save trained model
model_path = os.path.join(MODELS_DIR, "ppo_central_bank")
os.makedirs(os.path.dirname(model_path), exist_ok=True)
model.save(model_path)
print(f"RL Model saved at: {model_path}")

# ✅ Log episode rewards
env_rewards = env.get_attr("episode_rewards")[0]
reward_log_path = os.path.join(LOGS_DIR, "rl_rewards.csv")
pd.DataFrame(env_rewards, columns=["Total Reward"]).to_csv(reward_log_path, index=False)
print(f"Episode rewards logged at: {reward_log_path}")
