In [51]:
import pandas as pd
import time
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# Load and preprocess the dataset
data = pd.read_csv('bitcoin_price_sentiment_addmean.csv')
data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)
data = data[['Close', 'sentiment_scores']]

# Set date frequency to avoid warning
data.index = pd.DatetimeIndex(data.index).to_period('D')

# Split data
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)

In [52]:
# Use auto_arima to find the best ARIMA parameters with sentiment_scores as exogenous variable
start_time = time.time()
model = auto_arima(train_data['Close'], exogenous=train_data[['sentiment_scores']], seasonal=False, trace=True, error_action='ignore', suppress_warnings=True, d=1, start_p=0, start_q=0, max_p=5, max_q=5)
end_time = time.time()

# Summary of the best model chosen
print(model.summary())

# Predict on the test set
predictions = model.predict(n_periods=len(test_data), exogenous=test_data[['sentiment_scores']])

# Calculate metrics
mae = mean_absolute_error(test_data['Close'], predictions)
mape = mean_absolute_percentage_error(test_data['Close'], predictions)
runtime = end_time - start_time

# Print metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}")
print(f"Runtime (seconds): {runtime}")

Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=59329.291, Time=0.07 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=59320.167, Time=0.12 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=59319.980, Time=0.14 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=59328.228, Time=0.06 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=59321.583, Time=0.38 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=59321.728, Time=0.22 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=59323.575, Time=1.37 sec
 ARIMA(0,1,1)(0,0,0)[0]             : AIC=59319.026, Time=0.07 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=59320.564, Time=0.09 sec
 ARIMA(0,1,2)(0,0,0)[0]             : AIC=59320.789, Time=0.11 sec
 ARIMA(1,1,0)(0,0,0)[0]             : AIC=59319.208, Time=0.06 sec
 ARIMA(1,1,2)(0,0,0)[0]             : AIC=59322.570, Time=0.52 sec

Best model:  ARIMA(0,1,1)(0,0,0)[0]          
Total fit time: 3.217 seconds
                               SARIMAX Results                              