<a href="https://colab.research.google.com/github/laribar/TechChallenge2/blob/main/TechChallenge_2_Rev01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [343]:
# Importações de bibliotecas
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, train_test_split
from sklearn.preprocessing import StandardScaler

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima

from datetime import datetime
from pandas.tseries.offsets import BDay

import warnings
import logging
from tqdm import tqdm

# Configurações de logging e warnings
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
warnings.filterwarnings('ignore')

# Baixando dados
print("Baixando dados...")
df = yf.download('^BVSP', start='1994-07-01', end=datetime.now().strftime('%Y-%m-%d'))

# Preenchendo valores ausentes
df = df.ffill()


[*********************100%***********************]  1 of 1 completed

Baixando dados...





In [330]:
df.head()

Price,Close,High,Low,Open,Volume
Ticker,^BVSP,^BVSP,^BVSP,^BVSP,^BVSP
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1994-07-04,3580.899902,3712.100098,3579.699951,3623.199951,0
1994-07-05,3564.300049,3599.600098,3486.0,3580.899902,0
1994-07-06,3753.5,3781.600098,3564.300049,3564.300049,0
1994-07-07,3905.0,3923.300049,3753.5,3753.5,0
1994-07-08,4051.899902,4051.899902,3860.699951,3905.0,0


In [331]:
# Criar Features
for i in range(1, 6):
    df[f'Lag{i}'] = df['Close'].shift(i)

df['Daily_Return'] = df['Close'].pct_change()
df['MA_7'] = df['Close'].rolling(window=7).mean()
df['MA_30'] = df['Close'].rolling(window=30).mean()
df['Volatility'] = df['Close'].pct_change().rolling(window=7).std()
df['Log_Return'] = np.log(df['Close'] / df['Close'].shift(1))

df['BB_Mid'] = df['Close'].rolling(window=20).mean()
std_dev = df['Close'].rolling(window=20).std().squeeze()
df['BB_Upper'] = df['BB_Mid'] + (std_dev * 2)
df['BB_Lower'] = df['BB_Mid'] - (std_dev * 2)

delta = df['Close'].diff(1)
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))

df.dropna(inplace=True)

X = df.drop(columns=['Close'])
y = df['Close']

train_size = int(len(df) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


In [332]:
# Teste de estacionariedade
if len(y_train) > 0:
    result = adfuller(y_train)
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')
    if result[1] > 0.05:
        y_train = y_train.diff().dropna()
        y_test = y_test.diff().dropna()
        X_train = X_train.iloc[len(X_train) - len(y_train):, :]
        X_test = X_test.iloc[len(X_test) - len(y_test):, :]
else:
    print("Erro: y_train está vazio.")

ADF Statistic: -0.20446113752621162
p-value: 0.9379877684088995


In [342]:
df.head()


Price,Close,High,Low,Open,Volume,Lag1,Lag2,Lag3,Lag4,Lag5,Daily_Return,MA_7,MA_30,Volatility,Log_Return,BB_Mid,BB_Upper,BB_Lower,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1994-11-08,4782.100098,4952.200195,4778.0,4808.799805,0,4808.799805,4666.700195,4508.0,4700.899902,4797.899902,-0.005552,4741.714286,4914.103369,0.034475,-0.005568,4746.275024,5083.736839,4408.81321,46.637835
1994-11-09,4885.0,4891.700195,4735.799805,4782.100098,0,4782.100098,4808.799805,4666.700195,4508.0,4700.899902,0.021518,4735.628557,4895.163363,0.030113,0.021289,4738.92002,5055.945334,4421.894705,53.417018
1994-11-10,4992.600098,5040.600098,4885.0,4885.0,0,4885.0,4782.100098,4808.799805,4666.700195,4508.0,0.022027,4763.442871,4886.080029,0.028814,0.021788,4742.250024,5068.805209,4415.69484,56.976325
1994-11-11,4952.0,5064.100098,4936.5,4992.600098,0,4992.600098,4885.0,4782.100098,4808.799805,4666.700195,-0.008132,4799.314314,4877.230029,0.027299,-0.008165,4742.62002,5070.155838,4415.084201,60.839128
1994-11-14,4948.700195,4983.299805,4887.299805,4952.0,0,4952.0,4992.600098,4885.0,4782.100098,4808.799805,-0.000666,4862.271484,4866.283366,0.017921,-0.000667,4748.365039,5086.4942,4410.235879,69.072564


In [333]:
# Normalização para modelos lineares
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Random Forest com GridSearch
param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20]}
grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)
print("Melhores parâmetros do Random Forest:", grid_search.best_params_)

# Auto ARIMA
auto_model = auto_arima(y_train, seasonal=True, m=12, trace=True, suppress_warnings=True)
print("Melhores parâmetros do ARIMA:", auto_model.order)


Melhores parâmetros do Random Forest: {'max_depth': None, 'n_estimators': 200}
Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[12] intercept   : AIC=inf, Time=17.93 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=96403.061, Time=0.13 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=96405.251, Time=0.87 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=96405.174, Time=0.91 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=96403.845, Time=0.06 sec
 ARIMA(0,0,0)(1,0,0)[12] intercept   : AIC=96404.701, Time=0.49 sec
 ARIMA(0,0,0)(0,0,1)[12] intercept   : AIC=96404.702, Time=0.76 sec
 ARIMA(0,0,0)(1,0,1)[12] intercept   : AIC=96406.963, Time=1.10 sec
 ARIMA(1,0,0)(0,0,0)[12] intercept   : AIC=96403.613, Time=0.14 sec
 ARIMA(0,0,1)(0,0,0)[12] intercept   : AIC=96403.536, Time=0.28 sec
 ARIMA(1,0,1)(0,0,0)[12] intercept   : AIC=96403.099, Time=0.97 sec

Best model:  ARIMA(0,0,0)(0,0,0)[12] intercept
Total fit time: 23.682 seconds
Melhores parâmetros do ARIMA: (0, 0, 0)


In [334]:



# Verifique o DataFrame original
print("Dados originais:")
print(df.head())

# Feature Engineering: executar passo a passo e analisar
try:
    # Calculando Lag Features
    for i in range(1, 6):
        df[f'Lag{i}'] = df['Close'].shift(i)

    print("Após Lags:", df.drop(columns='Close').head())

    # Calculando Retornos Diários
    df['Daily_Return'] = df['Close'].pct_change()

    # Médias Móveis
    df['MA_7'] = df['Close'].rolling(window=7).mean()
    df['MA_30'] = df['Close'].rolling(window=30).mean()

    # Volatilidade
    df['Volatility'] = df['Close'].pct_change().rolling(window=7).std()

    # Retorno Logarítmico
    df['Log_Return'] = np.log(df['Close'] / df['Close'].shift(1))

    # Bandas de Bollinger
    df['BB_Mid'] = df['Close'].rolling(window=20).mean()
    std_dev = df['Close'].rolling(window=20).std()
    df['BB_Upper'] = df['BB_Mid'] + (std_dev * 2)
    df['BB_Lower'] = df['BB_Mid'] - (std_dev * 2)

    # Índice de Força Relativa (RSI)
    delta = df['Close'].diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # Mostra uma descrição do depth antes do NaN removal
    print("Antes de dropna():\n", df.head(20))
    print(df.isna().sum())

    # Drop de NaNs depois de verificar todas as mudanças
    df.dropna(inplace=True)

    # Verifique o resultado
    print("Dados após dropna():")
    print(df.head())
except Exception as e:
    print("Erro durante a engineering de features:", str(e))

Dados originais:
Price             Close         High          Low         Open Volume  \
Ticker            ^BVSP        ^BVSP        ^BVSP        ^BVSP  ^BVSP   
Date                                                                    
1994-08-12  4547.100098  4559.299805  4463.799805  4463.799805      0   
1994-08-15  4651.000000  4651.000000  4540.600098  4547.100098      0   
1994-08-16  4744.100098  4748.600098  4651.000000  4651.000000      0   
1994-08-17  4968.899902  4968.899902  4733.799805  4744.100098      0   
1994-08-18  5026.600098  5090.600098  4968.899902  4968.899902      0   

Price              Lag1         Lag2         Lag3         Lag4         Lag5  \
Ticker                                                                        
Date                                                                          
1994-08-12  4463.799805  4513.200195  4558.299805  4580.899902  4469.500000   
1994-08-15  4547.100098  4463.799805  4513.200195  4558.299805  4580.899902   
199

In [335]:
# Exemplo: Código para transformar MultiIndex em uma estrutura de colunas única
df.columns = df.columns.get_level_values(0)

# Verificando após ajustes
#> print("Dados ajustados:")
#> print(df.head())

# Feature Engineering: Executando de forma mais robusta para DataFrame ajustado
try:
    # Calculando Lag Features
    for i in range(1, 6):
        df[f'Lag{i}'] = df['Close'].shift(i)

    # Calculando Retornos Diários
    df['Daily_Return'] = df['Close'].pct_change()

    # Médias Móveis
    df['MA_7'] = df['Close'].rolling(window=7).mean()
    df['MA_30'] = df['Close'].rolling(window=30).mean()

    # Volatilidade
    df['Volatility'] = df['Close'].pct_change().rolling(window=7).std()

    # Retorno Logarítmico
    df['Log_Return'] = np.log(df['Close'] / df['Close'].shift(1))

    # Bandas de Bollinger
    df['BB_Mid'] = df['Close'].rolling(window=20).mean()
    std_dev = df['Close'].rolling(window=20).std()

    # Calcular individualmente para evitar erros de atribuição incorreta
    df['BB_Upper'] = df['BB_Mid'] + (std_dev * 2)
    df['BB_Lower'] = df['BB_Mid'] - (std_dev * 2)

    # Índice de Força Relativa (RSI)
    delta = df['Close'].diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # Mostra uma descrição do depth antes do NaN removal
   #> print("Antes de dropna():\n", df.head(20))
   #> print(df.isna().sum())

    # Drop de NaNs depois de verificar todas as mudanças
    df.dropna(inplace=True)

    # Verifique o resultado
   #> print("Dados após dropna():")
   #> print(df.head())
except Exception as e:
    print("Erro durante a engenharia de features:", str(e))

In [336]:
# Feature Engineering
df['Lag1'] = df['Close'].shift(1)
df['Lag2'] = df['Close'].shift(2)
df['Lag3'] = df['Close'].shift(3)
df['Lag4'] = df['Close'].shift(4)
df['Lag5'] = df['Close'].shift(5)
df['Daily_Return'] = df['Close'].pct_change()

# Criar features
df['MA_7'] = df['Close'].rolling(window=7).mean()
df['MA_30'] = df['Close'].rolling(window=30).mean()
df['Volatility'] = df['Close'].pct_change().rolling(window=7).std()
df['Daily_Return'] = df['Close'].pct_change()
df['Log_Return'] = np.log(df['Close'] / df['Close'].shift(1))

# Bandas de Bollinger
df['BB_Mid'] = df['Close'].rolling(window=20).mean()
std_dev = df['Close'].rolling(window=20).std().squeeze()
df['BB_Upper'] = df['BB_Mid'] + (std_dev * 2)
df['BB_Lower'] = df['BB_Mid'] - (std_dev * 2)

# Índice de Força Relativa (RSI)
delta = df['Close'].diff(1)
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))

# Remover valores nulos remanescentes
df.dropna(inplace=True)
print("Dados após dropna:", len(df))

# Definir variáveis preditoras e alvo
X = df.drop(columns=['Close'])
y = df['Close']

Dados após dropna: 7502


In [337]:
df.head(
)

Price,Close,High,Low,Open,Volume,Lag1,Lag2,Lag3,Lag4,Lag5,Daily_Return,MA_7,MA_30,Volatility,Log_Return,BB_Mid,BB_Upper,BB_Lower,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1994-11-08,4782.100098,4952.200195,4778.0,4808.799805,0,4808.799805,4666.700195,4508.0,4700.899902,4797.899902,-0.005552,4741.714286,4914.103369,0.034475,-0.005568,4746.275024,5083.736839,4408.81321,46.637835
1994-11-09,4885.0,4891.700195,4735.799805,4782.100098,0,4782.100098,4808.799805,4666.700195,4508.0,4700.899902,0.021518,4735.628557,4895.163363,0.030113,0.021289,4738.92002,5055.945334,4421.894705,53.417018
1994-11-10,4992.600098,5040.600098,4885.0,4885.0,0,4885.0,4782.100098,4808.799805,4666.700195,4508.0,0.022027,4763.442871,4886.080029,0.028814,0.021788,4742.250024,5068.805209,4415.69484,56.976325
1994-11-11,4952.0,5064.100098,4936.5,4992.600098,0,4992.600098,4885.0,4782.100098,4808.799805,4666.700195,-0.008132,4799.314314,4877.230029,0.027299,-0.008165,4742.62002,5070.155838,4415.084201,60.839128
1994-11-14,4948.700195,4983.299805,4887.299805,4952.0,0,4952.0,4992.600098,4885.0,4782.100098,4808.799805,-0.000666,4862.271484,4866.283366,0.017921,-0.000667,4748.365039,5086.4942,4410.235879,69.072564


In [338]:
# Definir variáveis preditoras e alvo
X = df.drop(columns=['Close'])
y = df['Close']

# Dividir o conjunto de dados para treino e previsão
train_size = int(len(df) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [339]:
# Teste de estacionariedade
result = adfuller(y_train)
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
if result[1] > 0.05:
    y_train = y_train.diff().dropna()
    y_test = y_test.diff().dropna()

ADF Statistic: -0.25925701550428193
p-value: 0.9310798192729816


In [340]:
# Normalização para modelos lineares
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [341]:
# Random Forest com GridSearch
param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20]}
grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)
print("Melhores parâmetros do Random Forest:", grid_search.best_params_)

ValueError: Found input variables with inconsistent numbers of samples: [6001, 6000]

In [None]:
# Auto ARIMA
auto_model = auto_arima(y_train, seasonal=True, m=12, trace=True, suppress_warnings=True)
print("Melhores parâmetros do ARIMA:", auto_model.order)

In [None]:
#REGRESSÃO LINEAR
# Escalonar os dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Treinar o modelo de Regressão Linear
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)

# Previsões
linear_predictions = linear_model.predict(X_test_scaled)

In [None]:
# Criar o modelo Random Forest usando os melhores parâmetros
best_rf_model = RandomForestRegressor(
    n_estimators=100,
    max_depth=None,
    max_features='sqrt',
    min_samples_leaf=1,
    min_samples_split=2,
    random_state=42  # Defina uma semente para reprodutibilidade, se desejado
)

# Treinar o modelo com os dados de treinamento
best_rf_model.fit(X_train, y_train)



# Avaliar performance com o seu conjunto de métricas
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f"Random Forest Ajustado - MAE: {mae}, MSE: {mse}, RMSE: {rmse}, MAPE: {mape}%")





In [None]:
#SARIMA

# Treinar o modelo SARIMA
sarima_order = (1, 1, 1)
seasonal_order = (1, 1, 1, 12)
sarima_model = SARIMAX(y_train, order=sarima_order, seasonal_order=seasonal_order)
sarima_model_fit = sarima_model.fit()

# Previsões
sarima_predictions = sarima_model_fit.forecast(steps=len(y_test))

In [None]:
# Previsão para os próximos 5 dias com modelos treinados:

# Regressão Linear (note que o escalonamento é necessário)
future_features_scaled = scaler.transform(X_test.tail(5))  # usando últimos disponíveis para simular continuações
future_linear_predictions = linear_model.predict(future_features_scaled)

# Random Forest
# 5. Fazer previsões
rf_predictions = best_rf_model.predict(X_test)

# ARIMA (baseia-se diretamente em valores passados)
future_arima_predictions = arima_model_fit.forecast(steps=5)

# SARIMA
future_sarima_predictions = sarima_model_fit.forecast(steps=5)

print("Previsões Futuros 5 Dias:")
print("Regressão Linear:", future_linear_predictions)
print("Random Forest:", future_rf_predictions)
print("ARIMA:", future_arima_predictions)
print("SARIMA:", future_sarima_predictions)

In [None]:


# Suponha que você já tenha seu DataFrame 'df' e as previsões prontas

# Obter os últimos 15 dias de seu conjunto de dados
last_15_days = df['Close'].tail(15)

# Criar as datas para os próximos 5 dias
future_dates = pd.date_range(start=last_15_days.index[-1] + pd.Timedelta(days=1), periods=5, freq='B')
extended_index_real = last_15_days.index.union(future_dates)

# Criar DataFrame combinando dados reais e previsões
combined_df = pd.DataFrame(index=extended_index_real)
combined_df['Dados Reais'] = pd.concat([last_15_days, pd.Series([None] * 5, index=future_dates)], ignore_index=False)

# Preenchendo previsões apenas para os próximos 5 dias
prediction_index = future_dates

combined_df['Regressão Linear'] = pd.concat([pd.Series([None]*15, index=last_15_days.index), pd.Series(future_linear_predictions, index=prediction_index)])
combined_df['Random Forest'] = pd.concat([pd.Series([None]*15, index=last_15_days.index), pd.Series(future_rf_predictions, index=prediction_index)])
combined_df['ARIMA'] = pd.concat([pd.Series([None]*15, index=last_15_days.index), pd.Series(future_arima_predictions, index=prediction_index)])
combined_df['SARIMA'] = pd.concat([pd.Series([None]*15, index=last_15_days.index), pd.Series(future_sarima_predictions, index=prediction_index)])

# Plotagem
plt.figure(figsize=(14, 8))
markers = {'Dados Reais': 'o', 'Regressão Linear': 's', 'Random Forest': 's', 'ARIMA': 's', 'SARIMA': 's'}
colors = {'Dados Reais': 'black', 'Regressão Linear': 'blue', 'Random Forest': 'green', 'ARIMA': 'red', 'SARIMA': 'purple'}

for column, marker in markers.items():
    plt.plot(combined_df.index, combined_df[column], label=column, color=colors[column], linestyle='--', marker=marker)

    # Adicionar rótulos aos dados
    for x, y in zip(combined_df.index, combined_df[column]):
        if y is not None:  # Apenas rotular se não for None
            plt.annotate(f'{y:.0f}', (x, y), textcoords="offset points", xytext=(0,10), ha='center', fontsize=8)

# Ajuste dos limites do eixo y para que tudo seja visível
plt.ylim(combined_df.min().min() * 0.9, combined_df.max().max() * 1.1)

# Formatação do gráfico
plt.title('Previsão do Preço de Fechamento - Últimos 15 Dias e Próximos 5 Dias')
plt.xlabel('Data')
plt.ylabel('Preço de Fechamento')
plt.xticks(rotation=45)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))
plt.legend()
plt.grid(True)

# Mostrar o gráfico
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Suponha que 'y_test' contenha os valores reais dos dados futuros
# Pegue os últimos 5 valores de y_test para representar y_true
y_true = y_test[:5].tolist()  # Ajustar conforme necessário dependendo de como y_test está ordenado

# Função auxiliar para calcular todas as métricas
def calculate_errors(true_values, predicted_values, model_name="Modelo"):
    mae = mean_absolute_error(true_values, predicted_values)
    mse = mean_squared_error(true_values, predicted_values)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((true_values - predicted_values) / true_values)) * 100

    print(f"{model_name} - MAE: {mae:.2f}, MSE: {mse:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%")

# Avaliação de cada modelo
calculate_errors(y_true, future_linear_predictions, "Regressão Linear")
calculate_errors(y_true, future_rf_predictions, "Random Forest")
calculate_errors(y_true, future_arima_predictions, "ARIMA")
calculate_errors(y_true, future_sarima_predictions, "SARIMA")