In [1]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Adjust path to find 'src'
sys.path.append(os.path.abspath(".."))

from src.data.loader import load_prices, compute_returns
from src.models.lr import expected_return_from_predictions, evaluate_prediction_series
from outputs.charts.markowitz_plot import compare_time_series, compare_frontiers

import pandas as pd
import numpy as np
from src.data.loader import load_prices
from src.models.lr import predict_daily_series_lr
from src.models.rnn import predict_daily_series_rnn

In [2]:


# 1. Setup
tickers = ["PETR4.SA", "VALE3.SA", "ITUB4.SA", "BBDC4.SA", "BBAS3.SA", "ABEV3.SA", "WEGE3.SA", "B3SA3.SA", "GGBR4.SA"]
prices = load_prices(tickers, start="2017-01-01", end="2025-06-01")

# 2. Run Linear Regression
print("Running LR...")
lr_preds = predict_daily_series_lr(prices, window_features=[5, 21], training_window=252)
lr_preds.to_csv("../outputs/models/lr/pred_daily_series_lr.csv")
print("LR Saved.")

# 3. Run RNN/LSTM
print("Running RNN...")
rnn_preds = predict_daily_series_rnn(prices, seq_len=20, epochs=30)
rnn_preds.to_csv("../outputs/models/rnn/pred_daily_series_rnn.csv")
print("RNN Saved.")

  df = yf.download(tickers, start=start, end=end)['Close']
[*********************100%***********************]  9 of 9 completed


Running LR...
Starting LR Walk-Forward (Train Window: 252, Refit: 21)...
LR Saved.
Running RNN...
Training LSTM on cpu...




KeyboardInterrupt: 

In [None]:


# 1. Setup Data
tickers = ["PETR4.SA", "VALE3.SA", "ITUB4.SA", "BBDC4.SA", "BBAS3.SA", "ABEV3.SA", "WEGE3.SA", "B3SA3.SA", "GGBR4.SA"]
prices = load_prices(tickers, start="2017-01-01", end="2025-06-01")
returns_daily = compute_returns(prices, freq="daily")

# Historical Stats (for Benchmarks)
hist_mean_daily = returns_daily.mean()
hist_cov_daily = returns_daily.cov()

# 2. Load Predictions
# Ensure you ran the generation notebook first!
try:
    pred_lr = pd.read_csv("../outputs/models/lr/pred_daily_series_lr.csv", index_col=0, parse_dates=True)
    pred_rnn = pd.read_csv("../outputs/models/rnn/pred_daily_series_rnn.csv", index_col=0, parse_dates=True)
except FileNotFoundError:
    print("Error: Prediction CSVs not found. Run '1_generate_models.ipynb' first.")
    pred_lr = pd.DataFrame()
    pred_rnn = pd.DataFrame()

# 3. Calculate "Average" Expectations for Frontier Plot
# (We assume the average prediction represents the asset's 'true' nature for the static plot)
mu_lr_daily, _ = expected_return_from_predictions(pred_lr)
mu_rnn_daily, _ = expected_return_from_predictions(pred_rnn)

# 4. Define Models Configuration
models_config = [
    {
        "name": "Historical Benchmark",
        "mean_returns": hist_mean_daily,
        "cov": hist_cov_daily,
        "is_monthly": False,
        "color": "black",
        "pred_series": None # Static
    },
    {
        "name": "Linear Regression",
        "mean_returns": mu_lr_daily, # For Frontier
        "cov": hist_cov_daily,       # Using Hist Cov (Standard practice)
        "is_monthly": False,
        "color": "blue",
        "pred_series": pred_lr       # For Backtest
    },
    {
        "name": "RNN / LSTM",
        "mean_returns": mu_rnn_daily,
        "cov": hist_cov_daily,
        "is_monthly": False,
        "color": "orange",
        "pred_series": pred_rnn
    }
]

# 5. Plot Efficient Frontiers (Static Comparison)
print("--- Efficient Frontiers (Average Predicted Return vs Risk) ---")
compare_frontiers(models_config)

# 6. Plot Time Series (Dynamic Backtest)
# This uses the 'pred_series' to rebalance monthly
print("--- Dynamic Backtest (Monthly Rebalancing) ---")
compare_time_series(returns_daily, models_config, target_risk_annual=0.20)

# 7. Metrics Table
print("--- Model Accuracy Metrics ---")
metrics_lr = evaluate_prediction_series(returns_daily, pred_lr)
metrics_lr['Model'] = 'LR'

metrics_rnn = evaluate_prediction_series(returns_daily, pred_rnn)
metrics_rnn['Model'] = 'RNN'

all_metrics = pd.concat([metrics_lr, metrics_rnn])
display(all_metrics.groupby("Model")[['MSE', 'R2', 'Corr']].mean())

# Optional: Histogram of Predictions
plt.figure(figsize=(10,5))
plt.hist(pred_lr.values.flatten(), bins=50, alpha=0.5, label='LR Preds', density=True)
plt.hist(pred_rnn.values.flatten(), bins=50, alpha=0.5, label='RNN Preds', density=True)
plt.hist(returns_daily.values.flatten(), bins=100, alpha=0.3, color='gray', label='Real Returns', density=True, range=(-0.05, 0.05))
plt.legend()
plt.title("Distribution of Predicted vs Real Returns")
plt.show()