In [1]:
import pandas as pd
import numpy as np
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl import config_tickers

# ==============================================================================
# 1. Get Data (Fixed for WBA error)
# ==============================================================================
# WBA was delisted/taken private, so we remove it to prevent 404 errors
ticker_list = [ticker for ticker in config_tickers.DOW_30_TICKER if ticker != 'WBA']

print(f"Downloading data for {len(ticker_list)} stocks...")
df = YahooDownloader(start_date='2020-01-01', 
                     end_date='2023-01-01', 
                     ticker_list=ticker_list).fetch_data()

# ==============================================================================
# 2. Add Technical Indicators & Covariance Matrix
# ==============================================================================
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=['macd', 'rsi_30', 'cci_30', 'dx_30'],
                     use_turbulence=True,
                     user_defined_feature=False)

df = fe.preprocess_data(df)

# Sort to ensure alignment
df = df.sort_values(['date','tic'], ignore_index=True)
df.index = df.date.factorize()[0]

cov_list = []
return_list = []
dates_with_cov = [] # We must track dates to map them back later
lookback = 252 

# Custom loop to generate covariance matrix for each day
unique_dates = df.date.unique()

print("Calculating Covariance Matrices...")
for i in range(lookback, len(unique_dates)):
    current_date = unique_dates[i]
    
    # Slice lookback window
    data_lookback = df.loc[i-lookback:i, :]
    price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
    
    return_lookback = price_lookback.pct_change().dropna()
    covs = return_lookback.cov().values 
    
    cov_list.append(covs)
    return_list.append(return_lookback)
    dates_with_cov.append(current_date)

Downloading data for 29 stocks...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Shape of DataFrame:  (21924, 8)
Successfully added technical indicators
Successfully added turbulence index
Calculating Covariance Matrices...


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gymnasium as gym
from gymnasium import spaces

# ==============================================================================
# MODIFIED ENVIRONMENT: With FED Interest Rate + Turnover Penalty
# ==============================================================================
class StockPortfolioEnv(gym.Env):
    """
    A portfolio allocation environment for FinRL.
    CUSTOMIZED: 
    1. Action Space = N Stocks + 1 Cash.
    2. Reward = Log Returns - Transaction Costs (Turnover).
    3. Cash earns Risk-Free Rate (e.g., 3.75%).
    """
    metadata = {"render.modes": ["human"]}

    def __init__(self, df, stock_dim, hmax, initial_amount, transaction_cost_pct, 
                 reward_scaling, state_space, action_space, tech_indicator_list, 
                 turbulence_threshold=None, lookback=252, day=0, 
                 risk_free_rate=0.0375):
        
        self.day = day
        self.lookback = lookback
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.transaction_cost_pct = transaction_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.tech_indicator_list = tech_indicator_list
        self.risk_free_rate = risk_free_rate
        
        # Calculate Daily Risk-Free Return
        self.daily_rf_rate = (1 + self.risk_free_rate) ** (1 / 252) - 1

        # N+1 Action Space (Stocks + Cash)
        self.action_space = spaces.Box(low=0, high=1, shape=(self.stock_dim + 1,))
        
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(self.state_space + len(self.tech_indicator_list), self.state_space),
        )

        self.data = self.df.loc[self.day, :]
        self.covs = self.data["cov_list"].values[0]
        self.state = np.append(
            np.array(self.covs),
            [self.data[tech].values.tolist() for tech in self.tech_indicator_list],
            axis=0,
        )
        self.terminal = False
        self.portfolio_value = self.initial_amount
        self.asset_memory = [self.initial_amount]
        self.portfolio_return_memory = [0]
        
        # Inicializamos la memoria de acciones con pesos iguales
        self.actions_memory = [[1 / (self.stock_dim + 1)] * (self.stock_dim + 1)]
        self.date_memory = [self.data.date.unique()[0]]

    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique()) - 1

        if self.terminal:
            df = pd.DataFrame(self.portfolio_return_memory)
            df.columns = ["daily_return"]
            if df["daily_return"].std() != 0:
                sharpe = (252**0.5) * df["daily_return"].mean() / df["daily_return"].std()
                print(f"Episode Finished. Sharpe: {sharpe:.2f}")
            return self.state, self.reward, self.terminal, False, {}

        else:
            # 1. Normalizar Pesos
            weights = self.softmax_normalization(actions)
            self.actions_memory.append(weights) # Guardamos los pesos actuales
            
            stock_weights = weights[:-1]
            cash_weight = weights[-1]

            last_day_memory = self.data

            # 2. Avanzar el Día
            self.day += 1
            self.data = self.df.loc[self.day, :]
            
            self.covs = self.data["cov_list"].values[0]
            self.state = np.append(
                np.array(self.covs),
                [self.data[tech].values.tolist() for tech in self.tech_indicator_list],
                axis=0,
            )

            # 3. Calcular Retorno Financiero (Stocks + Cash Interest)
            stock_returns = ((self.data.close.values / last_day_memory.close.values) - 1)
            
            weighted_stock_return = np.sum(stock_returns * stock_weights)
            weighted_cash_return = cash_weight * self.daily_rf_rate
            
            portfolio_return = weighted_stock_return + weighted_cash_return
            
            # Actualizar Valor del Portafolio
            new_portfolio_value = self.portfolio_value * (1 + portfolio_return)
            self.portfolio_value = new_portfolio_value

            self.portfolio_return_memory.append(portfolio_return)
            self.date_memory.append(self.data.date.unique()[0])
            self.asset_memory.append(new_portfolio_value)

            # ==================================================================
            # ### NUEVO: CÁLCULO DE COSTOS DE TRANSACCIÓN (TURNOVER)
            # ==================================================================
            
            # Recuperamos los pesos de ayer (índice -2) y hoy (índice -1 o 'weights')
            # Nota: Como hicimos .append(weights) al inicio del 'else', 
            # self.actions_memory[-1] son los actuales y [-2] los anteriores.
            current_weights = weights
            prev_weights = self.actions_memory[-2]
            
            # Turnover: Suma del valor absoluto de los cambios en cada activo
            turnover = np.sum(np.abs(current_weights - prev_weights))
            
            # Costo: El turnover multiplicado por tu % de comisión
            cost_penalty = turnover * self.transaction_cost_pct

            # ==================================================================
            # 4. CÁLCULO DE RECOMPENSA FINAL
            # ==================================================================
            
            # Retorno Logarítmico Puro
            log_return = np.log(new_portfolio_value / self.asset_memory[-2])
            
            # Recompensa = (Retorno - Costo) * Escala
            # Restamos el costo ANTES de escalar para mantener la proporción
            self.reward = (log_return - cost_penalty) * 100

        return self.state, self.reward, self.terminal, False, {}

    def reset(self, seed=None, options=None):
        self.day = 0
        self.data = self.df.loc[self.day, :]
        self.covs = self.data["cov_list"].values[0]
        self.state = np.append(
            np.array(self.covs),
            [self.data[tech].values.tolist() for tech in self.tech_indicator_list],
            axis=0,
        )
        self.portfolio_value = self.initial_amount
        self.terminal = False
        self.portfolio_return_memory = [0]
        self.actions_memory = [[1 / (self.stock_dim + 1)] * (self.stock_dim + 1)]
        self.date_memory = [self.data.date.unique()[0]]
        self.asset_memory = [self.initial_amount]
        return self.state, {}

    def render(self, mode="human"):
        return self.state

    def softmax_normalization(self, actions):
        numerator = np.exp(actions)
        denominator = np.sum(np.exp(actions))
        return numerator / denominator

In [3]:
# ==============================================================================
# 3. THE FIX: Correct Indexing for Portfolio Env
# ==============================================================================

# Filter to valid dates
df_portfolio = df[df.date.isin(dates_with_cov)].copy()

# Map covariance
cov_dict = dict(zip(dates_with_cov, cov_list))
df_portfolio['cov_list'] = df_portfolio['date'].map(cov_dict)

# CRITICAL FIX: Sort by date/ticker, THEN set index to factorized date
# This ensures df.loc[0] returns ALL stocks for the first day
df_portfolio = df_portfolio.sort_values(['date', 'tic'], ignore_index=True)
df_portfolio.index = df_portfolio.date.factorize()[0]

print("Dataframe ready. Index represents Day ID (0 to N).")

# ==============================================================================
# 4. Instantiate & Train
# ==============================================================================
stock_dimension = len(ticker_list)

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": stock_dimension, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": ['macd', 'rsi_30', 'cci_30', 'dx_30'], 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4,
    "risk_free_rate": 0.0375, # 3.75%
}

e_train_gym = StockPortfolioEnv(df=df_portfolio, **env_kwargs)

Dataframe ready. Index represents Day ID (0 to N).


In [4]:
from finrl.agents.stablebaselines3.models import DRLAgent

# We assume 'e_train_gym' is already created from your previous steps

print("Starting PPO Training...")

# 1. Initialize the Agent using the SAME environment
agent = DRLAgent(env=e_train_gym)

# 2. Get the PPO Model (instead of A2C)
# ent_coef=0.01 encourages exploration (prevents getting stuck)
model_ppo = agent.get_model("ppo", model_kwargs = {"ent_coef": 0.01}) 

# 3. Train
# PPO often benefits from slightly longer training than A2C
trained_ppo = agent.train_model(model=model_ppo, 
                                tb_log_name="ppo_portfolio",
                                total_timesteps=500000)

print("PPO Training Finished!")

# 4. Save
model_ppo.save("trained_portfolio_agent_ppo")

Starting PPO Training...
{'ent_coef': 0.01}
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Episode Finished. Sharpe: 0.45
Episode Finished. Sharpe: 0.27
Episode Finished. Sharpe: 0.34
Episode Finished. Sharpe: 0.45
-------------------------------------
| rollout/           |              |
|    ep_len_mean     | 504          |
|    ep_rew_mean     | -13.5        |
| time/              |              |
|    fps             | 958          |
|    iterations      | 1            |
|    time_elapsed    | 2            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | -0.41568267  |
|    reward_max      | 4.4054117    |
|    reward_mean     | -0.024526345 |
|    reward_min      | -4.225025    |
-------------------------------------
Episode Finished. Sharpe: 0.54
Episode Finished. Sharpe: 0.46
Episode Finished. Sharpe: 0.43
Episode Finished. Sharpe: 0.42
------------------------------------------
|

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl import config_tickers

# ==============================================================================
# 1. CONFIGURACIÓN DE FECHAS (Q1 2023)
# ==============================================================================
TEST_START_DATE = '2024-01-01'
TEST_END_DATE = '2025-01-01' # Hasta el 1 de Abril para incluir todo Marzo

print(f"1. Preparando datos para Q1 2023 ({TEST_START_DATE} a {TEST_END_DATE})...")

# Descargamos desde 2022 para tener el contexto (lookback) necesario
ticker_list = [ticker for ticker in config_tickers.DOW_30_TICKER if ticker != 'WBA']
df_q1 = YahooDownloader(start_date='2022-01-01', 
                        end_date=TEST_END_DATE, 
                        ticker_list=ticker_list).fetch_data()

# Indicadores
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=['macd', 'rsi_30', 'cci_30', 'dx_30'],
                     use_turbulence=True,
                     user_defined_feature=False)
df_q1 = fe.preprocess_data(df_q1)

# Ordenar
df_q1 = df_q1.sort_values(['date','tic'], ignore_index=True)
df_q1.index = df_q1.date.factorize()[0]

# Covarianzas
print("2. Calculando Covarianzas...")
cov_list = []
dates_with_cov = []
lookback = 252 
unique_dates = df_q1.date.unique()

for i in range(lookback, len(unique_dates)):
    current_date = unique_dates[i]
    data_lookback = df_q1.loc[i-lookback:i, :]
    price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
    return_lookback = price_lookback.pct_change().dropna()
    covs = return_lookback.cov().values 
    cov_list.append(covs)
    dates_with_cov.append(current_date)

# Filtrar para el periodo de prueba exacto
df_test_q1 = df_q1[df_q1.date.isin(dates_with_cov)].copy()
# Aseguramos que solo tomamos fechas >= 2023-01-01 para la simulación
df_test_q1 = df_test_q1[df_test_q1.date >= TEST_START_DATE]

cov_dict = dict(zip(dates_with_cov, cov_list))
df_test_q1['cov_list'] = df_test_q1['date'].map(cov_dict)

df_test_q1 = df_test_q1.sort_values(['date', 'tic'], ignore_index=True)
df_test_q1.index = df_test_q1.date.factorize()[0]

print(f"Días de Trading en Q1: {len(df_test_q1.index.unique())}")

# ==============================================================================
# 2. EJECUTAR AGENTE EN Q1
# ==============================================================================
print("3. Ejecutando simulación...")

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": len(ticker_list), 
    "stock_dim": len(ticker_list), 
    "tech_indicator_list": ['macd', 'rsi_30', 'cci_30', 'dx_30'], 
    "action_space": len(ticker_list), 
    "reward_scaling": 1e-4,
    "risk_free_rate": 0.0375 # Tu tasa FED
}

env_q1 = StockPortfolioEnv(df=df_test_q1, **env_kwargs)
obs, _ = env_q1.reset()
done = False

# Asegúrate de tener tu modelo cargado (ej: model = PPO.load("..."))
# Si ya lo tienes en memoria, usa 'model' o 'model_a2c' / 'model_ppo'
while not done:
    action, _states = model_ppo.predict(obs, deterministic=True) # Cambia 'model_ppo' si usas otro nombre
    obs, rewards, done, truncated, info = env_q1.step(action)

# ==============================================================================
# 3. COMPARACIÓN CONTRA S&P 500
# ==============================================================================
print("4. Generando comparación contra S&P 500...")

# Resultados del Agente
df_result_q1 = pd.DataFrame(env_q1.asset_memory)
df_result_q1.columns = ['account_value']
df_result_q1['date'] = env_q1.date_memory
df_result_q1['daily_return'] = df_result_q1['account_value'].pct_change()
df_result_q1['cumulative_return'] = (1 + df_result_q1['daily_return']).cumprod()

# Descargar Benchmark S&P 500 para Q1
baseline_df = YahooDownloader(start_date=TEST_START_DATE, 
                              end_date=TEST_END_DATE, 
                              ticker_list=['^GSPC']).fetch_data()

baseline_df = baseline_df.sort_values('date')
# Alinear fechas
baseline_df = baseline_df[baseline_df.date.isin(df_result_q1.date.unique())]
baseline_df['daily_return'] = baseline_df['close'].pct_change().fillna(0)
baseline_df['cumulative_return'] = (1 + baseline_df['daily_return']).cumprod()

# ==============================================================================
# 4. GRÁFICO Y ESTADÍSTICAS
# ==============================================================================
plt.figure(figsize=(14, 7))

# Agente
plt.plot(df_result_q1['date'], df_result_q1['cumulative_return'], 
         label='Tu Agente (IA)', color='green', linewidth=2)

# S&P 500
limit = min(len(df_result_q1), len(baseline_df))
plt.plot(df_result_q1['date'][:limit], baseline_df['cumulative_return'][:limit], 
         label='S&P 500 (Benchmark)', color='grey', linestyle='--')

plt.title('Primer Trimestre 2023: IA vs S&P 500', fontsize=16)
plt.xlabel('Fecha')
plt.ylabel('Retorno Acumulado')
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.show()

# Métricas Finales Q1
agent_ret_q1 = (df_result_q1['cumulative_return'].iloc[-1] - 1) * 100
sp500_ret_q1 = (baseline_df['cumulative_return'].iloc[-1] - 1) * 100

print(f"--- RESULTADOS Q1 2023 ---")
print(f"Rendimiento Agente:   {agent_ret_q1:.2f}%")
print(f"Rendimiento S&P 500:  {sp500_ret_q1:.2f}%")
delta = agent_ret_q1 - sp500_ret_q1
if delta > 0:
    print(f"✅ Superaste al mercado por: {delta:.2f}%")
else:
    print(f"⚠️ Por debajo del mercado por: {delta:.2f}%")

1. Preparando datos para Q1 2023 (2023-01-01 a 2024-01-01)...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Shape of DataFrame:  (14529, 8)
Successfully added technical indicators
Successfully added turbulence index
2. Calculando Covarianzas...
Días de Trading en Q1: 249
3. Ejecutando simulación...
Episode Finished. Sharpe: 1.12
4. Generando comparación contra S&P 500...


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (250, 8)
--- RESULTADOS Q1 2023 ---
Rendimiento Agente:   12.48%
Rendimiento S&P 500:  23.80%
⚠️ Por debajo del mercado por: -11.32%


  plt.show()


In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO

# ==============================================================================
# 1. TU SELECCIÓN DE ACTIVOS
# ==============================================================================
my_tickers = [
    "MSFT", "JNJ", "KO", "JPM", "CVX", 
    "UNH", "PG", "DE", "WMT", "O"
]

print(f"Entrenando con {len(my_tickers)} acciones seleccionadas.")

# ==============================================================================
# 2. PREPARACIÓN DE DATOS PARA ENTRENAMIENTO (2015 - 2025)
# ==============================================================================
print("\n1. Descargando Datos de Entrenamiento (Hasta 2025)...")
# CAMBIO: end_date extendido hasta 2025-01-01
df_train = YahooDownloader(start_date='2015-01-01', 
                           end_date='2025-01-01', 
                           ticker_list=my_tickers).fetch_data()

fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=['macd', 'rsi_30', 'cci_30', 'dx_30'],
                     use_turbulence=True,
                     user_defined_feature=False)
df_train = fe.preprocess_data(df_train)

df_train = df_train.sort_values(['date','tic'], ignore_index=True)
df_train.index = df_train.date.factorize()[0]

# Covarianzas
cov_list = []
dates_with_cov = []
lookback = 252 
unique_dates = df_train.date.unique()

print("Calculando Covarianzas (Entrenamiento)...")
for i in range(lookback, len(unique_dates)):
    current_date = unique_dates[i]
    data_lookback = df_train.loc[i-lookback:i, :]
    price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
    return_lookback = price_lookback.pct_change().dropna()
    covs = return_lookback.cov().values 
    cov_list.append(covs)
    dates_with_cov.append(current_date)

df_train_gym = df_train[df_train.date.isin(dates_with_cov)].copy()
cov_dict = dict(zip(dates_with_cov, cov_list))
df_train_gym['cov_list'] = df_train_gym['date'].map(cov_dict)
df_train_gym = df_train_gym.sort_values(['date', 'tic'], ignore_index=True)
df_train_gym.index = df_train_gym.date.factorize()[0]

# ==============================================================================
# 3. ENTRENAR EL AGENTE
# ==============================================================================
print("\n2. Entrenando Nuevo Modelo...")

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": len(my_tickers), 
    "stock_dim": len(my_tickers), 
    "tech_indicator_list": ['macd', 'rsi_30', 'cci_30', 'dx_30'], 
    "action_space": len(my_tickers), 
    "reward_scaling": 1e-4,
    "risk_free_rate": 0.0375 
}

e_train_gym = StockPortfolioEnv(df=df_train_gym, **env_kwargs)
agent = DRLAgent(env=e_train_gym)

model_custom = agent.get_model("ppo", model_kwargs = {"ent_coef": 0.01, "n_steps": 2048})
# Puedes aumentar los timesteps si tienes más datos ahora
trained_custom = agent.train_model(model=model_custom, 
                                   tb_log_name="ppo_custom_2025",
                                   total_timesteps=50000)

# ==============================================================================
# 4. PREPARAR DATOS DE PRUEBA (Q1 2025)
# ==============================================================================
print("\n3. Preparando Test Q1 2025...")

# CAMBIO: Descargamos desde 2024 para tener los 252 días previos al 1 de Enero de 2025
# Si descargáramos desde 2025-01-01, el código fallaría por falta de histórico para covarianza.
df_test = YahooDownloader(start_date='2024-01-01', 
                          end_date='2025-04-05', # Un poco más allá de Abril para asegurar datos
                          ticker_list=my_tickers).fetch_data()

df_test = fe.preprocess_data(df_test)
df_test = df_test.sort_values(['date','tic'], ignore_index=True)
df_test.index = df_test.date.factorize()[0]

# Covarianzas Test
cov_list_test = []
dates_test = []
unique_dates_test = df_test.date.unique()

for i in range(lookback, len(unique_dates_test)):
    current_date = unique_dates_test[i]
    data_lookback = df_test.loc[i-lookback:i, :]
    price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
    return_lookback = price_lookback.pct_change().dropna()
    covs = return_lookback.cov().values 
    cov_list_test.append(covs)
    dates_test.append(current_date)

df_test_gym = df_test[df_test.date.isin(dates_test)].copy()

# CAMBIO: FILTRO EXACTO PARA Q1 2025
df_test_gym = df_test_gym[(df_test_gym.date >= '2025-01-01') & (df_test_gym.date <= '2025-04-01')]

cov_dict_test = dict(zip(dates_test, cov_list_test))
df_test_gym['cov_list'] = df_test_gym['date'].map(cov_dict_test)
df_test_gym = df_test_gym.sort_values(['date', 'tic'], ignore_index=True)
df_test_gym.index = df_test_gym.date.factorize()[0]

# ==============================================================================
# 5. EJECUTAR BACKTEST
# ==============================================================================
print(f"Corriendo simulación sobre {len(df_test_gym.index.unique())} días...")

env_test = StockPortfolioEnv(df=df_test_gym, **env_kwargs)
obs, _ = env_test.reset()
done = False

while not done:
    action, _states = trained_custom.predict(obs, deterministic=True)
    obs, rewards, done, truncated, info = env_test.step(action)

# ==============================================================================
# 6. COMPARACIÓN Y GRÁFICO
# ==============================================================================
df_res = pd.DataFrame(env_test.asset_memory)
df_res.columns = ['account_value']
df_res['date'] = env_test.date_memory
df_res['daily_return'] = df_res['account_value'].pct_change()
df_res['cum_return'] = (1 + df_res['daily_return']).cumprod()

# CAMBIO: Benchmark para 2025
bench = YahooDownloader(start_date='2025-01-01', end_date='2025-04-01', ticker_list=['^GSPC']).fetch_data()
bench = bench.sort_values('date')
bench = bench[bench.date.isin(df_res.date.unique())]
bench['cum_return'] = (1 + bench['close'].pct_change().fillna(0)).cumprod()

# Plot
plt.figure(figsize=(12,6))
plt.plot(df_res['date'], df_res['cum_return'], label='Mi Portafolio (10 Stocks)', color='blue', linewidth=2)
plt.plot(df_res['date'][:len(bench)], bench['cum_return'], label='S&P 500', color='gray', linestyle='--')
plt.title('Estrategia Personalizada vs S&P 500 (Q1 2025)', fontsize=16)
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.show()

final_perf = (df_res['cum_return'].iloc[-1] - 1) * 100
bench_perf = (bench['cum_return'].iloc[-1] - 1) * 100

print(f"Tus 10 Acciones: {final_perf:.2f}%")
print(f"S&P 500:         {bench_perf:.2f}%")

Entrenando con 10 acciones seleccionadas.

1. Descargando Datos de Entrenamiento (Hasta 2025)...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (25160, 8)
Successfully added technical indicators
Successfully added turbulence index
Calculando Covarianzas (Entrenamiento)...

2. Entrenando Nuevo Modelo...
{'ent_coef': 0.01, 'n_steps': 2048}
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
------------------------------------
| time/              |             |
|    fps             | 941         |
|    iterations      | 1           |
|    time_elapsed    | 2           |
|    total_timesteps | 2048        |
| train/             |             |
|    reward          | 0.19170164  |
|    reward_max      | 9.346372    |
|    reward_mean     | 0.022023974 |
|    reward_min      | -11.082478  |
------------------------------------
Episode Finished. Sharpe: 1.17
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.26e+03    |
|    ep_rew_mean          | 46.6        |
| time/                   |             |
|   

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (3160, 8)
Successfully added technical indicators
Successfully added turbulence index
Corriendo simulación sobre 61 días...
Episode Finished. Sharpe: 2.06


[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (60, 8)
Tus 10 Acciones: 5.50%
S&P 500:         -4.37%



  plt.show()
