In [1]:
# =========================================================
# 1) IMPORTS
# =========================================================
import os
import pandas as pd, numpy as np
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# =========================================================
# 2) CAMINHOS / LEITURA
# =========================================================
INPUT_PATH = r"C:\Users\kinagaki\OneDrive - Digicorner\Desktop\Trabalho de Conclusão de Curso - UFRJ.2025\Correção\versão atualizada\TESTE BITCOIN\bitcoin_ate_2023.csv"
BASE_DIR = os.path.dirname(INPUT_PATH)
OUTPUT_PATH = os.path.join(BASE_DIR, "bitcoin_pred_2024_forest.xlsx")
df = pd.read_csv(INPUT_PATH, sep=";", decimal=",", thousands=".")
# =========================================================
# 3) PADRONIZAÇÃO DE COLUNAS / TIPOS
# =========================================================
df.columns = (df.columns
  .str.strip()
  .str.replace(r"\s+", "_", regex=True)
  .str.lower()
)
rename_map = {
  'data': 'date',
  'ultimo': 'close', 'último': 'close',
  'retorno_mensal_(%)': 'ret_m', 'retorno_mensal': 'ret_m',
  'média_móvel_3m': 'mm3', 'media_movel_3m': 'mm3',
  'média_móvel_6m': 'mm6', 'media_movel_6m': 'mm6',
  'média_móvel_12m': 'mm12','media_movel_12m': 'mm12',
  'volatilidade_3m': 'vol3', 'vol3': 'vol3',
  'volatilidade_6m': 'vol6', 'vol6': 'vol6',
  'momentum': 'momentum3', 'momentum3': 'momentum3',
  'volume_relativo': 'volrel', 'volrel': 'volrel',
  'close_plus1': 'close_plus1', 'close_+1': 'close_plus1'
}
df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
df['date'] = pd.to_datetime(df['date'], dayfirst=True, errors='coerce')
df = df.sort_values('date').reset_index(drop=True)
# Conversão numérica das colunas
num_cols = [
   'close','ret_m','mm3','mm6','mm12','vol3','vol6',
   'momentum3','volrel','close_plus1','selic_m','ipca_m'
]
for c in num_cols:
  if c in df.columns:
      df[c] = (df[c].astype(str)
                      .str.replace('\u00A0','',regex=False)
                      .str.replace('%','',regex=False)
                      .str.replace(' ','',regex=False))
      df[c] = pd.to_numeric(df[c], errors='coerce')
# =========================================================
# 4) TRATAMENTO CASO close_plus1 NÃO EXISTA
# =========================================================
if 'close_plus1' not in df.columns:
  df['close_plus1'] = df['close'].shift(-1)
# =========================================================
# 5) SEPARAR HISTÓRICO (até 2023-12) e REAIS DE 2024 (se houver)
# =========================================================
limite_hist = pd.Timestamp('2023-12-01')
df_hist = df[df['date'] <= limite_hist].copy()
df_2024 = df[(df['date'] >= pd.Timestamp('2024-01-01')) &
            (df['date'] <= pd.Timestamp('2024-12-31'))][['date','close']].copy()
df_2024.rename(columns={'close':'close_real'}, inplace=True)
if 'close_plus1' in df_hist.columns:
  assert pd.isna(df_hist.loc[len(df_hist)-1, 'close_plus1']), \
      "A última linha de 2023 deve estar vazia em close_plus1 (alvo desconhecido)."
print("Linhas histórico (<=2023-12):", len(df_hist))
print("Linhas 2024 (se houver no CSV):", len(df_2024))
# =========================================================
# 6) TREINO (Random Forest) COM VALIDAÇÃO TEMPORAL
# =========================================================
FEATS = ['ret_m','mm3','mm6','mm12','vol3','vol6',
        'momentum3','volrel','selic_m','ipca_m']
TARGET = 'close_plus1'
train = df_hist.dropna(subset=FEATS + [TARGET]).copy()
print("Linhas após dropna (treino):", len(train))
assert len(train) >= 12, "Poucos dados após limpeza (verifique NaNs)."
X = train[FEATS].values
y = train[TARGET].values
# Define o modelo e o grid de parâmetros
forest = RandomForestRegressor(random_state=42)
grid = {
   'n_estimators': [100, 200],
   'max_depth': [3, 5, 7, 9],
   'min_samples_split': [2, 3, 4],
   'min_samples_leaf': [1, 2]
}
tscv = TimeSeriesSplit(n_splits=5)
g = GridSearchCV(forest, grid, cv=tscv, scoring='neg_mean_absolute_error', n_jobs=-1)
g.fit(X, y)
best_forest = g.best_estimator_
# =========================================================
# 7) OUT-OF-FOLD MÉTRICAS (HISTÓRICO)
# =========================================================
y_oof = np.full_like(y, np.nan, dtype=float)
for tr, te in tscv.split(X):
  best_forest.fit(X[tr], y[tr])
  y_oof[te] = best_forest.predict(X[te])
mask = ~np.isnan(y_oof)
mae  = mean_absolute_error(y[mask], y_oof[mask])
mse  = mean_squared_error(y[mask], y_oof[mask])
rmse = np.sqrt(mse)
r2   = r2_score(y[mask], y_oof[mask])
print(f"\nRandomForest | MAE={mae:.3f} RMSE={rmse:.3f} R²={r2:.3f} best={g.best_params_}")
# =========================================================
# 8) REFIT NO HISTÓRICO COMPLETO
# =========================================================
best_forest.fit(X, y)
# =========================================================
# 9) PROJEÇÃO RECURSIVA DE 12 MESES (2024)
# =========================================================
def roll_mean(a,n): return np.mean(a[-n:]) if len(a)>=n else np.nan
def roll_std(a,n):  return np.std(a[-n:], ddof=1) if len(a)>=n else np.nan
selic_last = df_hist['selic_m'].dropna().iloc[-1] if 'selic_m' in df_hist.columns else np.nan
ipca_last  = df_hist['ipca_m'].dropna().iloc[-1]  if 'ipca_m'  in df_hist.columns else np.nan
work = df_hist[['date','close','ret_m','mm3','mm6','mm12','vol3','vol6',
               'momentum3','volrel','selic_m','ipca_m']].copy()
volrel_last3 = work['volrel'].tail(3).mean()
last_close   = work['close'].iloc[-1]
rows = []
alvos_2024 = pd.date_range('2024-01-01', '2024-12-01', freq='MS')
for next_date in alvos_2024:
  feats_now = work.iloc[-1][FEATS].to_dict()
  feats_now['selic_m'] = selic_last
  feats_now['ipca_m']  = ipca_last
  feats_now['volrel']  = volrel_last3
  x = np.array([feats_now[f] for f in FEATS], dtype=float).reshape(1, -1)
  pred_close_next = float(best_forest.predict(x)[0])
  ret_next = (pred_close_next / last_close) - 1.0
  work = pd.concat([work, pd.DataFrame([{
      'date': next_date,
      'close': pred_close_next,
      'ret_m': ret_next,
      'volrel': volrel_last3,
      'selic_m': selic_last,
      'ipca_m': ipca_last
  }])], ignore_index=True)
  closes = work['close'].values
  rets   = work['ret_m'].values
  work.loc[work.index[-1], 'mm3']  = roll_mean(closes, 3)
  work.loc[work.index[-1], 'mm6']  = roll_mean(closes, 6)
  work.loc[work.index[-1], 'mm12'] = roll_mean(closes, 12)
  work.loc[work.index[-1], 'vol3'] = roll_std(rets, 3)
  work.loc[work.index[-1], 'vol6'] = roll_std(rets, 6)
  if len(closes) >= 4:
      work.loc[work.index[-1], 'momentum3'] = (closes[-1]/closes[-4]) - 1
  last_close = pred_close_next
  rows.append({'date': next_date, 'close_pred': pred_close_next})
pred_2024 = pd.DataFrame(rows).sort_values('date').reset_index(drop=True)
# =========================================================
# 10) AVALIAÇÃO CONTRA REAIS DE 2024 (SE EXISTIREM)
# =========================================================
avaliacao_2024 = None
if len(df_2024) > 0:
  avaliacao_2024 = pred_2024.merge(df_2024, on='date', how='left')
  if 'close_real' in avaliacao_2024.columns and avaliacao_2024['close_real'].notna().any():
      avaliacao_2024['erro_abs'] = (avaliacao_2024['close_pred'] - avaliacao_2024['close_real']).abs()
      avaliacao_2024['erro_%']   = (avaliacao_2024['close_pred'] / avaliacao_2024['close_real'] - 1.0) * 100
      mae_24  = avaliacao_2024['erro_abs'].mean(skipna=True)
      rmse_24 = np.sqrt(((avaliacao_2024['close_pred'] - avaliacao_2024['close_real'])**2).mean(skipna=True))
      print(f"\n(Comparação holdout 2024) MAE={mae_24:.3f} RMSE={rmse_24:.3f}")
  else:
      print("\nSem closes reais de 2024 no CSV; apenas previsões foram geradas.")
# =========================================================
# 11) SALVAR RESULTADOS NO EXCEL
# =========================================================
try:
  with pd.ExcelWriter(OUTPUT_PATH, engine='xlsxwriter') as w:
      pred_2024.to_excel(w, index=False, sheet_name='predicoes_2024')
      pd.DataFrame([{'MAE_hist': mae, 'RMSE_hist': rmse, 'R2_hist': r2, **g.best_params_}]).to_excel(
          w, index=False, sheet_name='metricas_hist')
      if avaliacao_2024 is not None:
          avaliacao_2024.to_excel(w, index=False, sheet_name='avaliacao_2024')
except Exception:
  with pd.ExcelWriter(OUTPUT_PATH, engine='openpyxl') as w:
      pred_2024.to_excel(w, index=False, sheet_name='predicoes_2024')
      pd.DataFrame([{'MAE_hist': mae, 'RMSE_hist': rmse, 'R2_hist': r2, **g.best_params_}]).to_excel(
          w, index=False, sheet_name='metricas_hist')
      if avaliacao_2024 is not None:
          avaliacao_2024.to_excel(w, index=False, sheet_name='avaliacao_2024')
print(f"\n✅ Arquivo gerado com sucesso em:\n{OUTPUT_PATH}")

Linhas histórico (<=2023-12): 48
Linhas 2024 (se houver no CSV): 0


KeyError: ['selic_m', 'ipca_m']

In [2]:
# ==============================
# RANDOM FOREST – BITCOIN 2024
# ==============================
import os
import pandas as pd, numpy as np
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
INPUT_PATH = r"C:\Users\kinagaki\OneDrive - Digicorner\Desktop\Trabalho de Conclusão de Curso - UFRJ.2025\Correção\versão atualizada\TESTE BITCOIN\bitcoin_ate_2023.csv"
BASE_DIR   = os.path.dirname(INPUT_PATH)
OUTPUT_PATH = os.path.join(BASE_DIR, "bitcoin_pred_2024_forest.xlsx")
df = pd.read_csv(INPUT_PATH, sep=";", decimal=",", thousands=".")
df.columns = (df.columns.str.strip()
                       .str.replace(r"\s+","_", regex=True)
                       .str.lower())
rename_map = {
   'data':'date','ultimo':'close','último':'close',
   'retorno_mensal_(%)':'ret_m','retorno_mensal':'ret_m',
   'média_móvel_3m':'mm3','media_movel_3m':'mm3',
   'média_móvel_6m':'mm6','media_movel_6m':'mm6',
   'média_móvel_12m':'mm12','media_movel_12m':'mm12',
   'volatilidade_3m':'vol3','vol3':'vol3',
   'volatilidade_6m':'vol6','vol6':'vol6',
   'momentum':'momentum3','momentum3':'momentum3',
   'volume_relativo':'volrel','volrel':'volrel',
   'close_plus1':'close_plus1','close_+1':'close_plus1'
}
df = df.rename(columns={k:v for k,v in rename_map.items() if k in df.columns})
df['date'] = pd.to_datetime(df['date'], dayfirst=True, errors='coerce')
df = df.sort_values('date').reset_index(drop=True)
num_cols = ['close','ret_m','mm3','mm6','mm12','vol3','vol6','momentum3','volrel','close_plus1']
for c in num_cols:
   if c in df.columns:
       df[c] = (df[c].astype(str).str.replace('\u00A0','',regex=False)
                             .str.replace('%','',regex=False)
                             .str.replace(' ','',regex=False))
       df[c] = pd.to_numeric(df[c], errors='coerce')
if 'close_plus1' not in df.columns:
   df['close_plus1'] = df['close'].shift(-1)
limite_hist = pd.Timestamp('2023-12-01')
df_hist = df[df['date'] <= limite_hist].copy()
df_2024 = df[(df['date'] >= '2024-01-01') & (df['date'] <= '2024-12-31')][['date','close']].copy().rename(columns={'close':'close_real'})
if 'close_plus1' in df_hist.columns:
   assert pd.isna(df_hist.loc[len(df_hist)-1,'close_plus1'])
FEATS  = ['ret_m','mm3','mm6','mm12','vol3','vol6','momentum3','volrel']
TARGET = 'close_plus1'
train = df_hist.dropna(subset=FEATS+[TARGET]).copy()
X, y = train[FEATS].values, train[TARGET].values
forest = RandomForestRegressor(random_state=42)
grid = {
   'n_estimators':[100,200],
   'max_depth':[3,5,7,9],
   'min_samples_split':[2,3,4],
   'min_samples_leaf':[1,2]
}
tscv = TimeSeriesSplit(n_splits=5)
g = GridSearchCV(forest, grid, cv=tscv, scoring='neg_mean_absolute_error', n_jobs=-1)
g.fit(X,y)
best_forest = g.best_estimator_
# OOF
y_oof = np.full_like(y, np.nan, dtype=float)
for tr,te in tscv.split(X):
   best_forest.fit(X[tr], y[tr])
   y_oof[te] = best_forest.predict(X[te])
mask = ~np.isnan(y_oof)
mae  = mean_absolute_error(y[mask], y_oof[mask])
rmse = mean_squared_error(y[mask], y_oof[mask], squared=False)
r2   = r2_score(y[mask], y_oof[mask])
print(f"Forest HIST | MAE={mae:.3f} RMSE={rmse:.3f} R²={r2:.3f} best={g.best_params_}")
best_forest.fit(X,y)
# Projeção recursiva
def roll_mean(a,n): return np.mean(a[-n:]) if len(a)>=n else np.nan
def roll_std(a,n):  return np.std(a[-n:], ddof=1) if len(a)>=n else np.nan
work = df_hist[['date','close','ret_m','mm3','mm6','mm12','vol3','vol6','momentum3','volrel']].copy()
volrel_last3 = work['volrel'].tail(3).mean()
last_close   = work['close'].iloc[-1]
rows = []
for next_date in pd.date_range('2024-01-01','2024-12-01',freq='MS'):
   feats_now = work.iloc[-1][FEATS].to_dict()
   feats_now['volrel'] = volrel_last3
   x = np.array([feats_now[f] for f in FEATS], dtype=float).reshape(1,-1)
   pred_close_next = float(best_forest.predict(x)[0])
   ret_next = (pred_close_next/last_close) - 1.0
   work = pd.concat([work, pd.DataFrame([{
       'date': next_date, 'close': pred_close_next, 'ret_m': ret_next, 'volrel': volrel_last3
   }])], ignore_index=True)
   closes = work['close'].values
   rets   = work['ret_m'].values
   work.loc[work.index[-1],'mm3']  = roll_mean(closes,3)
   work.loc[work.index[-1],'mm6']  = roll_mean(closes,6)
   work.loc[work.index[-1],'mm12'] = roll_mean(closes,12)
   work.loc[work.index[-1],'vol3'] = roll_std(rets,3)
   work.loc[work.index[-1],'vol6'] = roll_std(rets,6)
   if len(closes)>=4:
       work.loc[work.index[-1],'momentum3'] = (closes[-1]/closes[-4]) - 1
   last_close = pred_close_next
   rows.append({'date': next_date, 'close_pred': pred_close_next})
pred_2024 = pd.DataFrame(rows).sort_values('date').reset_index(drop=True)
avaliacao_2024 = None
if len(df_2024)>0:
   avaliacao_2024 = pred_2024.merge(df_2024, on='date', how='left')
   if avaliacao_2024['close_real'].notna().any():
       avaliacao_2024['erro_abs'] = (avaliacao_2024['close_pred']-avaliacao_2024['close_real']).abs()
       avaliacao_2024['erro_%']   = (avaliacao_2024['close_pred']/avaliacao_2024['close_real']-1)*100
       mae_24  = avaliacao_2024['erro_abs'].mean(skipna=True)
       rmse_24 = np.sqrt(((avaliacao_2024['close_pred']-avaliacao_2024['close_real'])**2).mean(skipna=True))
       print(f"(Holdout 2024) MAE={mae_24:.3f} RMSE={rmse_24:.3f}")
try:
   with pd.ExcelWriter(OUTPUT_PATH, engine='xlsxwriter') as w:
       pred_2024.to_excel(w, index=False, sheet_name='predicoes_2024')
       pd.DataFrame([{'MAE_hist':mae,'RMSE_hist':rmse,'R2_hist':r2, **g.best_params_}]).to_excel(
           w, index=False, sheet_name='metricas_hist')
       if avaliacao_2024 is not None:
           avaliacao_2024.to_excel(w, index=False, sheet_name='avaliacao_2024')
except Exception:
   with pd.ExcelWriter(OUTPUT_PATH, engine='openpyxl') as w:
       pred_2024.to_excel(w, index=False, sheet_name='predicoes_2024')
       pd.DataFrame([{'MAE_hist':mae,'RMSE_hist':rmse,'R2_hist':r2, **g.best_params_}]).to_excel(
           w, index=False, sheet_name='metricas_hist')
       if avaliacao_2024 is not None:
           avaliacao_2024.to_excel(w, index=False, sheet_name='avaliacao_2024')
print(f"✅ Arquivo gerado em:\n{OUTPUT_PATH}")

TypeError: got an unexpected keyword argument 'squared'

In [3]:
# =========================================================
# 1) IMPORTS
# =========================================================
import os
import pandas as pd, numpy as np
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# =========================================================
# 2) CAMINHOS / LEITURA
# =========================================================
INPUT_PATH = r"C:\Users\kinagaki\OneDrive - Digicorner\Desktop\Trabalho de Conclusão de Curso - UFRJ.2025\Correção\versão atualizada\TESTE BITCOIN\bitcoin_ate_2023.csv"
BASE_DIR = os.path.dirname(INPUT_PATH)
OUTPUT_PATH = os.path.join(BASE_DIR, "bitcoin_pred_2024_forest.xlsx")
df = pd.read_csv(INPUT_PATH, sep=";", decimal=",", thousands=".")
# =========================================================
# 3) PADRONIZAÇÃO DE COLUNAS / TIPOS
# =========================================================
df.columns = (df.columns
 .str.strip()
 .str.replace(r"\s+", "_", regex=True)
 .str.lower()
)
rename_map = {
 'data': 'date',
 'ultimo': 'close', 'último': 'close',
 'retorno_mensal_(%)': 'ret_m', 'retorno_mensal': 'ret_m',
 'média_móvel_3m': 'mm3', 'media_movel_3m': 'mm3',
 'média_móvel_6m': 'mm6', 'media_movel_6m': 'mm6',
 'média_móvel_12m': 'mm12','media_movel_12m': 'mm12',
 'volatilidade_3m': 'vol3', 'vol3': 'vol3',
 'volatilidade_6m': 'vol6', 'vol6': 'vol6',
 'momentum': 'momentum3', 'momentum3': 'momentum3',
 'volume_relativo': 'volrel', 'volrel': 'volrel',
 'close_plus1': 'close_plus1', 'close_+1': 'close_plus1'
}
df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
df['date'] = pd.to_datetime(df['date'], dayfirst=True, errors='coerce')
df = df.sort_values('date').reset_index(drop=True)
# Conversão numérica das colunas
num_cols = ['close','ret_m','mm3','mm6','mm12','vol3','vol6',
           'momentum3','volrel','close_plus1']
for c in num_cols:
 if c in df.columns:
     df[c] = (df[c].astype(str)
                     .str.replace('\u00A0','',regex=False)
                     .str.replace('%','',regex=False)
                     .str.replace(' ','',regex=False))
     df[c] = pd.to_numeric(df[c], errors='coerce')
# =========================================================
# 4) TRATAMENTO CASO close_plus1 NÃO EXISTA
# =========================================================
if 'close_plus1' not in df.columns:
 df['close_plus1'] = df['close'].shift(-1)
# =========================================================
# 5) SEPARAR HISTÓRICO (até 2023-12) e REAIS DE 2024 (se houver)
# =========================================================
limite_hist = pd.Timestamp('2023-12-01')
df_hist = df[df['date'] <= limite_hist].copy()
df_2024 = df[(df['date'] >= pd.Timestamp('2024-01-01')) &
           (df['date'] <= pd.Timestamp('2024-12-31'))][['date','close']].copy()
df_2024.rename(columns={'close':'close_real'}, inplace=True)
if 'close_plus1' in df_hist.columns:
 assert pd.isna(df_hist.loc[len(df_hist)-1, 'close_plus1']), \
     "A última linha de 2023 deve estar vazia em close_plus1 (alvo desconhecido)."
print("Linhas histórico (<=2023-12):", len(df_hist))
print("Linhas 2024 (se houver no CSV):", len(df_2024))
# =========================================================
# 6) TREINO (Random Forest) COM VALIDAÇÃO TEMPORAL
# =========================================================
FEATS = ['ret_m','mm3','mm6','mm12','vol3','vol6','momentum3','volrel']
TARGET = 'close_plus1'
train = df_hist.dropna(subset=FEATS + [TARGET]).copy()
print("Linhas após dropna (treino):", len(train))
assert len(train) >= 12, "Poucos dados após limpeza (verifique NaNs)."
X = train[FEATS].values
y = train[TARGET].values
forest = RandomForestRegressor(random_state=42)
grid = {
  'n_estimators': [100, 200],
  'max_depth': [3, 5, 7, 9],
  'min_samples_split': [2, 3, 4],
  'min_samples_leaf': [1, 2]
}
tscv = TimeSeriesSplit(n_splits=5)
g = GridSearchCV(forest, grid, cv=tscv, scoring='neg_mean_absolute_error', n_jobs=-1)
g.fit(X, y)
best_forest = g.best_estimator_
# =========================================================
# 7) OUT-OF-FOLD MÉTRICAS (HISTÓRICO)
# =========================================================
y_oof = np.full_like(y, np.nan, dtype=float)
for tr, te in tscv.split(X):
 best_forest.fit(X[tr], y[tr])
 y_oof[te] = best_forest.predict(X[te])
mask = ~np.isnan(y_oof)
mae  = mean_absolute_error(y[mask], y_oof[mask])
mse  = mean_squared_error(y[mask], y_oof[mask])
rmse = np.sqrt(mse)
r2   = r2_score(y[mask], y_oof[mask])
print(f"\nRandomForest | MAE={mae:.3f} RMSE={rmse:.3f} R²={r2:.3f} best={g.best_params_}")
# =========================================================
# 8) REFIT NO HISTÓRICO COMPLETO
# =========================================================
best_forest.fit(X, y)
# =========================================================
# 9) PROJEÇÃO RECURSIVA DE 12 MESES (2024)
# =========================================================
def roll_mean(a,n): return np.mean(a[-n:]) if len(a)>=n else np.nan
def roll_std(a,n):  return np.std(a[-n:], ddof=1) if len(a)>=n else np.nan
work = df_hist[['date','close','ret_m','mm3','mm6','mm12',
              'vol3','vol6','momentum3','volrel']].copy()
volrel_last3 = work['volrel'].tail(3).mean()
last_close   = work['close'].iloc[-1]
rows = []
alvos_2024 = pd.date_range('2024-01-01', '2024-12-01', freq='MS')
for next_date in alvos_2024:
 feats_now = work.iloc[-1][FEATS].to_dict()
 feats_now['volrel'] = volrel_last3
 x = np.array([feats_now[f] for f in FEATS], dtype=float).reshape(1, -1)
 pred_close_next = float(best_forest.predict(x)[0])
 ret_next = (pred_close_next / last_close) - 1.0
 work = pd.concat([work, pd.DataFrame([{
     'date': next_date,
     'close': pred_close_next,
     'ret_m': ret_next,
     'volrel': volrel_last3
 }])], ignore_index=True)
 closes = work['close'].values
 rets   = work['ret_m'].values
 work.loc[work.index[-1], 'mm3']  = roll_mean(closes, 3)
 work.loc[work.index[-1], 'mm6']  = roll_mean(closes, 6)
 work.loc[work.index[-1], 'mm12'] = roll_mean(closes, 12)
 work.loc[work.index[-1], 'vol3'] = roll_std(rets, 3)
 work.loc[work.index[-1], 'vol6'] = roll_std(rets, 6)
 if len(closes) >= 4:
     work.loc[work.index[-1], 'momentum3'] = (closes[-1]/closes[-4]) - 1
 last_close = pred_close_next
 rows.append({'date': next_date, 'close_pred': pred_close_next})
pred_2024 = pd.DataFrame(rows).sort_values('date').reset_index(drop=True)
# =========================================================
# 10) AVALIAÇÃO CONTRA REAIS DE 2024 (SE EXISTIREM)
# =========================================================
avaliacao_2024 = None
if len(df_2024) > 0:
 avaliacao_2024 = pred_2024.merge(df_2024, on='date', how='left')
 if 'close_real' in avaliacao_2024.columns and avaliacao_2024['close_real'].notna().any():
     avaliacao_2024['erro_abs'] = (avaliacao_2024['close_pred'] - avaliacao_2024['close_real']).abs()
     avaliacao_2024['erro_%']   = (avaliacao_2024['close_pred'] / avaliacao_2024['close_real'] - 1.0) * 100
     mae_24  = avaliacao_2024['erro_abs'].mean(skipna=True)
     rmse_24 = np.sqrt(((avaliacao_2024['close_pred'] - avaliacao_2024['close_real'])**2).mean(skipna=True))
     print(f"\n(Comparação holdout 2024) MAE={mae_24:.3f} RMSE={rmse_24:.3f}")
 else:
     print("\nSem closes reais de 2024 no CSV; apenas previsões foram geradas.")
# =========================================================
# 11) SALVAR RESULTADOS NO EXCEL
# =========================================================
try:
 with pd.ExcelWriter(OUTPUT_PATH, engine='xlsxwriter') as w:
     pred_2024.to_excel(w, index=False, sheet_name='predicoes_2024')
     pd.DataFrame([{'MAE_hist': mae, 'RMSE_hist': rmse, 'R2_hist': r2, **g.best_params_}]).to_excel(
         w, index=False, sheet_name='metricas_hist')
     if avaliacao_2024 is not None:
         avaliacao_2024.to_excel(w, index=False, sheet_name='avaliacao_2024')
except Exception:
 with pd.ExcelWriter(OUTPUT_PATH, engine='openpyxl') as w:
     pred_2024.to_excel(w, index=False, sheet_name='predicoes_2024')
     pd.DataFrame([{'MAE_hist': mae, 'RMSE_hist': rmse, 'R2_hist': r2, **g.best_params_}]).to_excel(
         w, index=False, sheet_name='metricas_hist')
     if avaliacao_2024 is not None:
         avaliacao_2024.to_excel(w, index=False, sheet_name='avaliacao_2024')
print(f"\n✅ Arquivo gerado com sucesso em:\n{OUTPUT_PATH}")

Linhas histórico (<=2023-12): 48
Linhas 2024 (se houver no CSV): 0
Linhas após dropna (treino): 47

RandomForest | MAE=10971.123 RMSE=15376.446 R²=-0.565 best={'max_depth': 9, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}

✅ Arquivo gerado com sucesso em:
C:\Users\kinagaki\OneDrive - Digicorner\Desktop\Trabalho de Conclusão de Curso - UFRJ.2025\Correção\versão atualizada\TESTE BITCOIN\bitcoin_pred_2024_forest.xlsx
