In [None]:
!pip install pmdarima

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import pmdarima as pm
warnings.filterwarnings('ignore')
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
filepath = '/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv'

df = pd.read_csv(filepath)
df.head(5)

In [None]:
df.info()

In [None]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'],unit = 's')
df.info()
df.tail()

In [None]:
sns.heatmap(df.isna())

In [None]:
df.isna().sum()

The count of missing values are same for all columns. 

In [None]:
df.index = df.Timestamp

In [None]:
df_daily = df.resample('D').mean()
df_daily.head(3)

In [None]:
df_monthly = df.resample('M').mean()
df_monthly.head(3)

In [None]:
df_yearly = df.resample('A-DEC').mean()
df_yearly.head(3)

In [None]:
df_quarterly = df.resample('Q-DEC').mean()
df_quarterly.head(3)

In [None]:
df_list = [df_daily,df_monthly,df_yearly,df_quarterly]
plt.style.use('ggplot')

fig, axs = plt.subplots(2,2,figsize = (15,15))
axs[0,0].plot(df_daily.Weighted_Price)
axs[0,0].set_title('Daily')
axs[0,1].plot(df_monthly.Weighted_Price)
axs[0,1].set_title('Monthly')
axs[1,0].plot(df_yearly.Weighted_Price)
axs[1,0].set_title('Yearly')
axs[1,1].plot(df_quarterly.Weighted_Price)
axs[1,1].set_title('Quarterly')
    
    


Box - Cox Transformation

In [None]:
df_monthly['Weighted_Price_Box'], lmbda = stats.boxcox(df_monthly['Weighted_Price'])

print('Adfuller P Value is = ' + str(sm.tsa.stattools.adfuller(df_monthly.Weighted_Price_Box)[1]))

P value is > 0.05, H0 has rejected, Data is not stationary

Seasonal Transformation

In [None]:
df_monthly['Weighted_Price_Seasonal'] = df_monthly['Weighted_Price_Box'] - df_monthly['Weighted_Price_Box'].shift(12)
print('Adfuller P Value is = ' + str(sm.tsa.stattools.adfuller(df_monthly.Weighted_Price_Seasonal[12:])[1]))

P value is > 0.05, H0 has rejected, Data is not stationary

In [None]:
df_monthly['Weighted_Price_Diff'] = df_monthly['Weighted_Price_Seasonal'] - df_monthly['Weighted_Price_Seasonal'].shift(1)
print('Adfuller P Value is = ' + str(sm.tsa.stattools.adfuller(df_monthly.Weighted_Price_Diff[13:])[1]))

P value is < 0.05, H0 could not rejected, Data is now stationary

In [None]:
plt.figure(figsize = (15,15))

sm.tsa.seasonal_decompose(df_monthly.Weighted_Price_Diff[13:]).plot()

plt.show()

In [None]:
fig, axs = plt.subplots(1,2, figsize = (15,15))
sm.graphics.tsa.plot_acf(df_monthly.Weighted_Price_Diff[13:].values.squeeze(), lags = 48 ,ax = axs[0],zero = False )
sm.graphics.tsa.plot_pacf(df_monthly.Weighted_Price_Diff[13:].values.squeeze(), lags = 48 ,ax = axs[1],zero = False )

plt.show()

In [None]:


df = df_monthly.Weighted_Price_Diff[13:]
df

In [None]:
model = pm.auto_arima(df,
                      
                     seasonal = True,
                     m = 12,
                     d = 1, D = 1,
                     start_p = 1,start_q = 1,
                     max_p = 4, max_q = 4,
                     max_P = 4, max_Q = 4,
                     trace = True,
                     error_action='ignore',
                      suppress_warnings=True)

In [None]:
model.summary()

In [None]:
model = sm.tsa.statespace.SARIMAX(df,order = (1,1,1), seasonal_order = (3,1,1,12))
results = model.fit()


In [None]:


results.plot_diagnostics(figsize = (15,15))
plt.show()

In correlogram chart, KDE and N(0,1) should be plotted similar but it is my first Time Series Notebook, next time i will tunning my model.

Additionally, Q-Q Chart has a little bit problem 


From the model.summary Prob(Q) and Prob(JB) gave that information. 

In [None]:
#Prediction

In [None]:
# Inverse Box-Cox Transformation Function
def invboxcox(y,lmbda):
    if lmbda == 0:
        return(np.exp(y))
    else:
        return(np.exp(np.log(lmbda*y+1)/lmbda))

In [None]:
df_monthly['Forecastll'] = invboxcox(results.predict(end = -1),lmbda)

In [None]:
df_monthly.Weighted_Price_Diff.plot()
df_monthly.Forecastll.plot(color='b', ls='--', label='Predicted Weighted_Price')
plt.legend()
plt.title('Bitcoin exchanges, by months')
plt.ylabel('mean USD')
plt.show()

**Conclusion**

It was my first Notebook for Time Series and clearly observe that next time i will pay attention to model validation, Prob(Q) and Prob(JB)

In [None]:
print('Please add your observation as a comment :) ')