In [1]:
import pandas as pd
import time
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# Load and preprocess the dataset
data = pd.read_csv('bitcoin_price_sentiment_addmean.csv')
data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)
data = data[['Close', 'sentiment_scores']]

# Set date frequency to avoid warning
data.index = pd.DatetimeIndex(data.index).to_period('D')

# Split data
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)

In [4]:
import pickle
from statsmodels.tsa.arima.model import ARIMA

# Use auto_arima to find the best ARIMA parameters with sentiment_scores as exogenous variable
# model = auto_arima(train_data['Close'], exogenous=train_data[['sentiment_scores']], seasonal=False, trace=True, error_action='ignore', suppress_warnings=True, d=1, start_p=0, start_q=0, max_p=5, max_q=5)
# print(model.summary())

# Fit the ARIMA model with the best parameters
p, d, q = 3, 2, 3
model = ARIMA(train_data['Close'], order=(p, d, q), exog=train_data[['sentiment_scores']])
model = model.fit()

# Predict on the test set
predictions = model.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1, exog=test_data[['sentiment_scores']], typ="levels")

# Calculate metrics
mae = mean_absolute_error(test_data['Close'], predictions)
mape = mean_absolute_percentage_error(test_data['Close'], predictions)

# Plot the predictions
formatted_mae = round(float(mae), 3)
formatted_mape = round(float(mape) * 100, 3)

'''
model_data = {
    'model': model,
    'mae': formatted_mae,
    'mape': formatted_mape,
}

with open('arima_model.pkl', 'wb') as file:
    pickle.dump(model_data, file)

print("arima_model.pkl saved")
'''

Future Predictions from 2024-10-04 to 2024-11-04:
Date: 2024-10-04, Predicted Price: 39449.329
Date: 2024-10-05, Predicted Price: 39534.497
Date: 2024-10-06, Predicted Price: 39545.711
Date: 2024-10-07, Predicted Price: 39516.027
Date: 2024-10-08, Predicted Price: 39578.944
Date: 2024-10-09, Predicted Price: 39616.513
Date: 2024-10-10, Predicted Price: 39582.735
Date: 2024-10-11, Predicted Price: 39623.694
Date: 2024-10-12, Predicted Price: 39680.653
Date: 2024-10-13, Predicted Price: 39654.348
Date: 2024-10-14, Predicted Price: 39671.363
Date: 2024-10-15, Predicted Price: 39738.013
Date: 2024-10-16, Predicted Price: 39727.933
Date: 2024-10-17, Predicted Price: 39724.171
Date: 2024-10-18, Predicted Price: 39789.849
Date: 2024-10-19, Predicted Price: 39800.465
Date: 2024-10-20, Predicted Price: 39783.042
Date: 2024-10-21, Predicted Price: 39838.424
Date: 2024-10-22, Predicted Price: 39869.459
Date: 2024-10-23, Predicted Price: 39847.549
Date: 2024-10-24, Predicted Price: 39886.431
Date:

