In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
bitcoin=pd.read_csv('../input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv')

In [None]:
import matplotlib as mpl
from scipy import stats
import statsmodels.api as sm
import warnings
from itertools import product
from datetime import datetime
bitcoin.Timestamp=pd.to_datetime(bitcoin.Timestamp,unit='s')
bitcoin.set_index('Timestamp',inplace=True)
bitcoin=bitcoin.resample('D').mean()
# Resampling to monthly frequency
bitcoin_month = bitcoin.resample('M').mean()

# Resampling to annual frequency
bitcoin_year = bitcoin.resample('A-DEC').mean()

# Resampling to quarterly frequency
bitcoin_Q = bitcoin.resample('Q-DEC').mean()
bitcoin.tail()

In [None]:
bitcoin.tail()

In [None]:
import matplotlib.pyplot as plt
ig = plt.figure(figsize=[15, 7])
plt.suptitle('Bitcoin exchanges, mean USD', fontsize=22)

plt.subplot(221)
plt.plot(bitcoin.Weighted_Price, '-', label='By Days')
plt.legend()

plt.subplot(222)
plt.plot(bitcoin_month.Weighted_Price, '-', label='By Months')
plt.legend()

plt.subplot(223)
plt.plot(bitcoin_Q.Weighted_Price, '-', label='By Quarters')
plt.legend()

plt.subplot(224)
plt.plot(bitcoin_year.Weighted_Price, '-', label='By Years')
plt.legend()

# plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=[15,7])
sm.tsa.seasonal_decompose(bitcoin_month.Weighted_Price).plot()
print("Dickey–Fuller test: p=%f" % sm.tsa.stattools.adfuller(bitcoin_month.Weighted_Price)[1])
plt.show()

In [None]:
# Box-Cox Transformations
bitcoin_month['Weighted_Price_box'], lmbda = stats.boxcox(bitcoin_month.Weighted_Price)
print("Dickey–Fuller test: p=%f" % sm.tsa.stattools.adfuller(bitcoin_month.Weighted_Price)[1])

In [None]:
# Seasonal differentiation
bitcoin_month['prices_box_diff'] = bitcoin_month.Weighted_Price_box - bitcoin_month.Weighted_Price_box.shift(12)
print("Dickey–Fuller test: p=%f" % sm.tsa.stattools.adfuller(bitcoin_month.prices_box_diff[12:])[1])

In [None]:
# Regular differentiation
bitcoin_month['prices_box_diff2'] = bitcoin_month.prices_box_diff - bitcoin_month.prices_box_diff.shift(1)
plt.figure(figsize=(15,7))

# STL-decomposition
sm.tsa.seasonal_decompose(bitcoin_month.prices_box_diff2[13:]).plot()   
print("Dickey–Fuller test: p=%f" % sm.tsa.stattools.adfuller(bitcoin_month.prices_box_diff2[13:])[1])

plt.show()

In [None]:
# Initial approximation of parameters using Autocorrelation and Partial Autocorrelation Plots
plt.figure(figsize=(15,7))
ax = plt.subplot(211)
sm.graphics.tsa.plot_acf(bitcoin_month.prices_box_diff2[13:].values.squeeze(), lags=48, ax=ax)
ax = plt.subplot(212)
sm.graphics.tsa.plot_pacf(bitcoin_month.prices_box_diff2[13:].values.squeeze(), lags=48, ax=ax)
plt.tight_layout()
plt.show()

In [None]:
# Initial approximation of parameters
Qs = range(0, 2)
qs = range(0, 3)
Ps = range(0, 3)
ps = range(0, 3)
D=1
d=1
parameters = product(ps, qs, Ps, Qs)
parameters_list = list(parameters)
len(parameters_list)

# Model Selection
results = []
best_aic = float("inf")
warnings.filterwarnings('ignore')
for param in parameters_list:
    try:
        model=sm.tsa.statespace.SARIMAX(bitcoin_month.Weighted_Price_box, order=(param[0], d, param[1]), 
                                        seasonal_order=(param[2], D, param[3], 12)).fit(disp=-1)
    except ValueError:
        print('wrong parameters:', param)
        continue
    aic = model.aic
    if aic < best_aic:
        best_model = model
        best_aic = aic
        best_param = param
    results.append([param, model.aic])

In [None]:
# STL-decomposition
plt.figure(figsize=(15,7))
plt.subplot(211)
best_model.resid[13:].plot()
plt.ylabel(u'Residuals')
ax = plt.subplot(212)
sm.graphics.tsa.plot_acf(best_model.resid[13:].values.squeeze(), lags=48, ax=ax)

print("Dickey–Fuller test:: p=%f" % sm.tsa.stattools.adfuller(best_model.resid[13:])[1])

plt.tight_layout()
plt.show()

In [None]:
# Inverse Box-Cox Transformation Function
def invboxcox(y,lmbda):
   if lmbda == 0:
      return(np.exp(y))
   else:
      return(np.exp(np.log(lmbda*y+1)/lmbda))

In [None]:
# Prediction
bitcoin_month2 = bitcoin_month[['Weighted_Price']]
date_list = [datetime(2020, 6, 30), datetime(2020, 7, 31), datetime(2020, 8, 31), datetime(2020, 9, 30), 
             datetime(2020, 10, 31), datetime(2020, 11, 30), datetime(2020, 12, 31), datetime(2021, 1, 31),
             datetime(2021, 1, 28)]
future = pd.DataFrame(index=date_list, columns= bitcoin_month.columns)
bitcoin_month2 = pd.concat([bitcoin_month2, future])
bitcoin_month2['forecast'] = invboxcox(best_model.predict(start=0, end=75), lmbda)
plt.figure(figsize=(15,7))
bitcoin_month2.Weighted_Price.plot()
bitcoin_month2.forecast.plot(color='r', ls='--', label='Predicted Weighted_Price')
plt.legend()
plt.title('Bitcoin exchanges, by months')
plt.ylabel('mean USD')
plt.show()

In [None]:
future.tail()
bitcoin_month2.tail()