# BTC price prediction with AUTO ARIMA - 1 day period #
by Tanut Apiwong

In [None]:
!pip install pmdarima

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pmdarima as pm
from datetime import date
from pmdarima.arima import ndiffs
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [None]:
filename = '/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv'
df = pd.read_csv(filename)
df.head()

## Convert timestamp to Datetime ##

In [None]:
df.Timestamp = pd.to_datetime(df.Timestamp, unit='s')

## Set Timestamp as an index of the dataframe ##

In [None]:
df.index = df.Timestamp
df.head()

## Resample to daily period ##
## Then replacing missing values with average value between them ##

In [None]:
df_day = df.resample('D').mean()
missing_replacement = df['2015-01-05': '2015-01-09'].mean(numeric_only=True)
df_day.loc['2015-01-06'] = missing_replacement
df_day.loc['2015-01-07'] = missing_replacement
df_day.loc['2015-01-08'] = missing_replacement

df_day.isnull().sum()

In [None]:
print(df_day.shape)
print(df_day.head(), '\n\n', df.tail())

## Train / Test Split and visualize the data ##

In [None]:
df_train = df_day.loc['2011-12-31':'2020-05-10']
df_test = df_day.loc['2020-05-11':]

plt.figure(figsize=(22, 10))
plt.title('Bitcoin Prices')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.plot(df_train.Weighted_Price, 'blue', label='Training Data')
plt.plot(df_test.Weighted_Price, 'green', label='Testing Data')
plt.plot([date(2020, 5, 10), date(2020, 5, 10)], [0, 20000], 'black')

plt.legend()

y_train = df_train.Weighted_Price.values
y_test = df_test.Weighted_Price.values

## Find ARIMA's d parameter and let's AUTO ARIMA find suitable model parameters (p, d, q) ##
(That is minimized AIC)

In [None]:
kpss_diffs = ndiffs(y_train, alpha=0.05, test='kpss', max_d=6)
adf_diffs = ndiffs(y_train, alpha=0.05, test='adf', max_d=6)
n_diffs = max(adf_diffs, kpss_diffs)

print('Estimated differencing term:', n_diffs)

auto = pm.auto_arima(
    y_train, d=n_diffs, 
    seasonal=True, stepwise=True,
    suppress_warnings=True, error_action="ignore", 
    max_p=6, max_order=None, trace=True
)

## Make a prediction day-by-day and then update the model between it's predicting ##
127 days in total

In [None]:
model = auto

def forecast_one_step():
    fc, conf_int = model.predict(n_periods=1, return_conf_int=True)
    return (
        fc.tolist()[0],
        np.asarray(conf_int).tolist()[0])

y_predict = []
y_confidence = []

for i, new_ob in enumerate(y_test):
    fc, conf = forecast_one_step()
    y_predict.append(fc)
    y_confidence.append(conf)
    
    print(i, fc, new_ob, abs(fc - new_ob))

    model.update(new_ob)

print('Mean squared error:', mean_squared_error(y_test, y_predict, squared=False))
print('Root Mean squared error:', mean_squared_error(y_test, y_predict))
print('R-Squared', r2_score(y_test, y_predict))

## MAE: 173.96661339601658 ##
## RMSE: 30264.3825764791 ##
## R2: 0.9712224080205929 ##

## Visualize the prediction values ##

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(22, 30))

axes[0].plot(df_day.index, df_day.Weighted_Price, color='blue', label='Actual Price')
axes[0].plot(df_test.index, y_predict, color='green', label='Predicted Price')
axes[0].plot([date(2020, 5, 10), date(2020, 5, 10)], [0, 20000], 'black')

axes[0].set_title('Bitcoin Prices Prediction')
axes[0].set_xlabel('Dates')
axes[0].set_ylabel('Prices')
axes[0].legend()

axes[1].plot(df_day.index, df_day.Weighted_Price, ls='-', marker='+', color='blue', label='Actual Price')
axes[1].plot(df_test.index, y_predict, ls='-', marker='+', color='green', label='Predicted Price')
axes[1].plot([date(2020, 5, 10), date(2020, 5, 10)], [0, 20000], 'black')

axes[1].set_xlabel('Dates')
axes[1].set_ylabel('Prices')
axes[1].set_xlim(date(2020, 5, 11), date(2020, 9, 14))
axes[1].set_ylim(8400, 12500)
axes[1].legend()

axes[2].plot(df_day.index, df_day.Weighted_Price, ls='-', marker='+', color='blue', label='Actual Price')
axes[2].plot(df_test.index, y_predict, ls='-', marker='+', color='green', label='Predicted Price')
axes[2].plot([date(2020, 5, 10), date(2020, 5, 10)], [0, 20000], 'black')

axes[2].set_xlabel('Dates')
axes[2].set_ylabel('Prices')
axes[2].set_xlim(date(2020, 8, 15), date(2020, 9, 14))
axes[2].set_ylim(8400, 12500)
axes[2].legend()

plt.show()