## 0. Libraries

In [None]:
!pip install stable_baselines3==2.3.2
!pip install gymnasium==0.29.1
!pip install shimmy>=0.2.1

In [None]:
pip install --upgrade stable-baselines3 gym gymnasium

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
sys.path.append('/content/drive/My Drive/Codis_TFM/')
import os
#import gym
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt

from stable_baselines3 import PPO, A2C, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
from trading_environment import StockTradingEnv

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

## 1. Reading data

In [None]:
data = pd.read_csv('/content/drive/My Drive/Codis_TFM/data/preprocessed_data_1d.csv')
print(data.shape)
print(data.columns)
data.head()

In [None]:
start_date = '2014-01-01'
end_date = '2023-12-31'

## 3. Functions training

In [None]:
def data_filtering(df,start,end):

    df = df.reset_index(drop=True)
    df["datadate"] = pd.to_datetime(df["datadate"])
    data = df[(df.datadate >= start) & (df.datadate < end)]
    data=data.sort_values(['datadate','tic'],ignore_index=True)
    data.index = data.datadate.factorize()[0]
    return data

def create_timeline(start_date, end_date, initial_training_months):
    date_range = pd.date_range(start=start_date, end=end_date, freq="ME")

    # Initialize lists to store the periods
    training_periods = []
    validation_periods = []
    trading_periods = []

    # Iterate through quarters starting from the end of the initial training window
    for i in range(initial_training_months, len(date_range) - 6, 3):  # Ensure 3 months for validation and trading remain
        # Define training period
        training_start = pd.Timestamp("2014-01-01")  # Always starts on 1st January 2014
        training_end = date_range[i - 1]  # Up to the last month before validation
        training_periods.append((training_start, training_end))

        # Define validation period (next 3 months)
        validation_start = date_range[i].replace(day=1)
        validation_end = date_range[i + 2].replace(day=date_range[i + 2].days_in_month)
        validation_periods.append((validation_start, validation_end))

        # Define trading period (next 3 months after validation)
        trading_start = date_range[i + 3].replace(day=1)
        trading_end = date_range[i + 5].replace(day=date_range[i + 5].days_in_month)
        trading_periods.append((trading_start, trading_end))

    # Handle the last period if it exists
    if len(date_range) >= initial_training_months + 6:
        last_training_end = date_range[-7]  # Correct Training End for the last period
        if last_training_end < date_range[-1]:
            validation_start = date_range[-6].replace(day=1)
            validation_end = date_range[-4].replace(day=date_range[-4].days_in_month)
            trading_start = date_range[-3].replace(day=1)
            trading_end = date_range[-1].replace(day=date_range[-1].days_in_month)

            training_periods.append((pd.Timestamp("2014-01-01"), last_training_end))
            validation_periods.append((validation_start, validation_end))
            trading_periods.append((trading_start, trading_end))

    # Create a DataFrame to organize the results
    timeline_df = pd.DataFrame({
        "Training_Start": [p[0] for p in training_periods],
        "Training_End": [p[1] for p in training_periods],
        "Validation_Start": [p[0] for p in validation_periods],
        "Validation_End": [p[1] for p in validation_periods],
        "Trading_Start": [p[0] for p in trading_periods],
        "Trading_End": [p[1] for p in trading_periods],
    })

    return timeline_df

In [None]:
# Configurar el entorno
def make_env(df, stock_dim, state_space, action_space, tech_indicators, macro_variables, initial_amount=1000000):
    return DummyVecEnv([lambda: StockTradingEnv(
        df=df,
        stock_dim=stock_dim,
        state_space=state_space,
        action_space=action_space,
        tech_indicator_list=tech_indicators,
        macro_variable_list=macro_variables,
        initial_amount=initial_amount
    )])

# Función para entrenar A2C
def train_load_a2c(env_train, model_name, timesteps=25000):

    model_path = os.path.join(MODEL_DIR, f"{model_name}.zip")
    csv_path = os.path.join(MODEL_DIR, f"{model_name}_asset_memory.csv")

    if os.path.exists(model_path):
        print(f"[INFO] Cargando modelo existente: {model_path}")
        model = A2C.load(model_path, env=env_train)
        df_asset_memory = pd.read_csv(csv_path)
    else:
        print(f"[INFO] Entrenando nuevo modelo: {model_name} ({start_date} a {end_date})")
        model = A2C(
            "MlpPolicy",
            env_train,
            learning_rate=0.0007,
            n_steps=20,
            gamma=0.99,
            verbose=0
        )
        model.learn(total_timesteps=timesteps)
        model.save(model_path)

        original_env = env_train.envs[0]
        df_asset_memory = pd.DataFrame(original_env.asset_memory, columns=["Portfolio Value"])
        df_asset_memory.to_csv(csv_path, index=False)
    return model, df_asset_memory

# Función para entrenar PPO
def train_load_ppo(env_train, model_name, timesteps=50000):

    model_path = os.path.join(MODEL_DIR, f"{model_name}.zip")
    csv_path = os.path.join(MODEL_DIR, f"{model_name}_asset_memory.csv")

    if os.path.exists(model_path):
        print(f"[INFO] Cargando modelo existente: {model_path}")
        model = PPO.load(model_path, env=env_train)
        df_asset_memory = pd.read_csv(csv_path)
    else:
        print(f"[INFO] Entrenando nuevo modelo: {model_name} ({start_date} a {end_date})")
        model = PPO(
            "MlpPolicy",
            env_train,
            learning_rate=0.0003,
            ent_coef=0.01,
            n_steps=128,
            clip_range=0.2,
            verbose=0
        )
        model.learn(total_timesteps=timesteps)
        model.save(model_path)

        original_env = env_train.envs[0]
        df_asset_memory = pd.DataFrame(original_env.asset_memory, columns=["Portfolio Value"])
        df_asset_memory.to_csv(csv_path, index=False)
    return model, df_asset_memory

# Función para entrenar DDPG
def train_load_ddpg(env_train, model_name, timesteps=10000):

    model_path = os.path.join(MODEL_DIR, f"{model_name}.zip")
    csv_path = os.path.join(MODEL_DIR, f"{model_name}_asset_memory.csv")

    if os.path.exists(model_path):
        print(f"[INFO] Cargando modelo existente: {model_path}")
        model = DDPG.load(model_path, env=env_train)
        df_asset_memory = pd.read_csv(csv_path)
    else:
        print(f"[INFO] Entrenando nuevo modelo: {model_name} ({start_date} a {end_date})")
        n_actions = env_train.action_space.shape[-1]
        action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.2 * np.ones(n_actions))
        model = DDPG(
            "MlpPolicy",
            env_train,
            learning_rate=0.001,
            batch_size=64,
            action_noise=action_noise,
            verbose=0
        )
        model.learn(total_timesteps=timesteps)
        model.save(model_path)

        original_env = env_train.envs[0]
        df_asset_memory = pd.DataFrame(original_env.asset_memory, columns=["Portfolio Value"])
        df_asset_memory.to_csv(csv_path, index=False)
    return model, df_asset_memory

# Función para evaluar mejor modelo por sharpe ratio
def evaluate_model(model, env):
    obs = env.reset()
    total_rewards = []

    for _ in range(len(env.get_attr('df')[0].index.unique())):
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)

        # Asegurarse de que 'rewards' es un número escalar
        if isinstance(rewards, np.ndarray):
            rewards = rewards.item()  # Convierte el array a un escalar

        total_rewards.append(rewards)

        if done:
            break

    # Calcular Sharpe Ratio
    returns = pd.Series(total_rewards)
    sharpe_ratio = returns.mean() / returns.std() * (252**0.5)
    return sharpe_ratio

In [None]:
def predict_with_model(model, model_name, env):
    obs = env.reset()
    df_asset_memory = []

    for _ in range(len(env.envs[0].df.index.unique())):
        action, _ = model.predict(obs)
        obs, rewards, done, info = env.step(action)

        # Guardar información del valor del portafolio
        portfolio_value = env.envs[0].asset_memory[-1] if hasattr(env.envs[0], 'asset_memory') else 0
        df_asset_memory.append(portfolio_value)

        if done:
            break

    # Convertir a DataFrame
    df_asset_memory = pd.DataFrame(df_asset_memory, columns=[f"Portfolio_value_{model_name}"])
    return df_asset_memory

In [None]:
# Función para ejecutar la estrategia de ensamble
def ensemble_strategy(MODEL_DIR, timeline_df, df, stock_dim, state_space, action_space, tech_indicators, macro_variables):
    print("[INFO] Iniciando estrategia de ensamble con hiperparámetros generales...")

    # DataFrames acumulativos para almacenar las predicciones
    test_data = []
    a2c_data = []
    ppo_data = []
    ddpg_data = []

    for index, row in timeline_df.iterrows():
        # Extraer períodos
        start_train = row["Training_Start"].strftime("%Y-%m-%d")
        end_train = row["Training_End"].strftime("%Y-%m-%d")
        start_val = row["Validation_Start"].strftime("%Y-%m-%d")
        end_val = row["Validation_End"].strftime("%Y-%m-%d")
        start_trade = row["Trading_Start"].strftime("%Y-%m-%d")
        end_trade = row["Trading_End"].strftime("%Y-%m-%d")

        print(f"[INFO] Procesando período {start_train} a {end_val}...")

        # Filtrar datos para entrenamiento y validación
        df_train = data_filtering(df, start_train, end_train)
        df_val = data_filtering(df, start_val, end_val)
        df_trade = data_filtering(df, start_trade, end_trade)

        # Crear entornos
        print('Creación de entornos')
        env_train = make_env(df_train, stock_dim, state_space, action_space, tech_indicators, macro_variables)
        env_val = make_env(df_val, stock_dim, state_space, action_space, tech_indicators, macro_variables)

        if len(test_data) > 0:
            print('dentro del if')
            print(previous_final_value)
            previous_final_value = test_data[-1]["Portfolio_value_ensemble"].iloc[-1]
        else:
            previous_final_value = 1000000  # Valor inicial por defecto

        # Crear entorno con capital inicial ajustado
        env_trade = make_env(
            df_trade, stock_dim, state_space, action_space, tech_indicators, macro_variables,
            initial_amount=previous_final_value
        )

        # Entrenar modelos
        print("[INFO] Entrenando A2C...")
        a2c_model, _ = train_load_a2c(env_train, f"A2C_{start_train}_{end_train}")

        print("[INFO] Entrenando PPO...")
        ppo_model, _ = train_load_ppo(env_train, f"PPO_{start_train}_{end_train}")

        print("[INFO] Entrenando DDPG...")
        ddpg_model, _ = train_load_ddpg(env_train, f"DDPG_{start_train}_{end_train}")

        # Evaluar modelos
        print("[INFO] Evaluando modelos...")
        a2c_sharpe = evaluate_model(a2c_model, env_val)
        ppo_sharpe = evaluate_model(ppo_model, env_val)
        ddpg_sharpe = evaluate_model(ddpg_model, env_val)

        # Seleccionar el mejor modelo
        sharpe_ratios = {"A2C": a2c_sharpe, "PPO": ppo_sharpe, "DDPG": ddpg_sharpe}
        best_model_name = max(sharpe_ratios, key=sharpe_ratios.get)
        best_model = {"A2C": a2c_model, "PPO": ppo_model, "DDPG": ddpg_model}[best_model_name]
        print(f"[INFO] Mejor modelo para período {start_train} a {end_val}: {best_model_name} con Sharpe Ratio {sharpe_ratios[best_model_name]:.2f}")

        # Predicción con cada modelo
        print(f"[INFO] Predicción con modelos individuales para período {start_trade} a {end_trade}...")
        for model_name, model, data_accumulator in zip(
            ["A2C", "PPO", "DDPG"],
            [a2c_model, ppo_model, ddpg_model],
            [a2c_data, ppo_data, ddpg_data]
        ):
            df_predicted_asset_memory = predict_with_model(model, model_name, env_trade)
            df_predicted_asset_memory = df_predicted_asset_memory.iloc[:-1]

            # Agregar al acumulador correspondiente
            data_accumulator.append(df_predicted_asset_memory)

        # Predicción con el modelo seleccionado
        print(f"[INFO] Predicción con el mejor modelo ({best_model_name}) para período {start_trade} a {end_trade}...")
        df_predicted_asset_memory = predict_with_model(best_model, 'ensemble', env_trade)

        df_predicted_asset_memory = df_predicted_asset_memory.iloc[:-1]
        test_data.append(df_predicted_asset_memory)

        print(f"[INFO] Completado período {start_train} a {end_trade}.")
        print("===============================================")

    # Guardar CSV acumulativos
    test_data = pd.concat(test_data, ignore_index=True)
    a2c_data = pd.concat(a2c_data, ignore_index=True)
    ppo_data = pd.concat(ppo_data, ignore_index=True)
    ddpg_data = pd.concat(ddpg_data, ignore_index=True)

    summary_test_data = pd.concat([test_data, a2c_data, ppo_data, ddpg_data], axis=1)
    summary_test_data.to_csv(f"{MODEL_DIR}/summary_test_data.csv", index=False)

    print("[INFO] CSV combinados guardados con éxito.")

In [None]:
##################################################################################
timeline_df = create_timeline(start_date, end_date, 60)

# Configuración de rutas para guardar los modelos
MODEL_DIR = '/content/drive/My Drive/Codis_TFM/saved_models_v1'
os.makedirs(MODEL_DIR, exist_ok=True)

# Configuración del entorno
stock_dim = 10
tech_indicator = ['macd', 'rsi', 'bandwidth', 'ichimoku', 'stoch_k', 'roc', 'vr', 'atr_14']
macro_variable = ['GDP_growth_developed', 'GDP_growth_emerging', 'GDP_growth_us', 'inflation_developed', 'inflation_emerging', 'inflation_us']
state_space = 1 + stock_dim * (1 + 1 + len(tech_indicator)) + len(macro_variable)
action_space = stock_dim

init_time = time.time()
# Ejecutar la estrategia con la línea de tiempo
ensemble_strategy(MODEL_DIR, timeline_df, data, stock_dim, state_space, action_space, tech_indicator, macro_variable)
finish_time = time.time()
print(f"Tiempo ejecución{(finish_time - init_time)/60} en minutos.")