In [None]:
!pip install pmdarima

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pmdarima as pm
from scipy import stats
from datetime import datetime
from scipy.special import boxcox, inv_boxcox
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
path = '/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv'
df = pd.read_csv(path)
df.head(3)


In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
#let start the Timestamp to convert to correct dtype 

df['Timestamp'] = pd.to_datetime(df['Timestamp'],unit = 's')
df.tail()

In [None]:
df.index = df['Timestamp']


In [None]:
df_daily = df.resample('D').mean()
df_daily.head()

In [None]:
df_monthly = df.resample('M').mean()
df_monthly.head()

In [None]:
df_yearly = df.resample('A').mean()
df_yearly.head()

In [None]:
df_monthly.isna().sum()

In [None]:
#Lets look our stock return chart

(df_monthly['Weighted_Price'].pct_change() + 1).cumprod().plot()
plt.show()

In [None]:
plots = [df_daily,df_monthly,df_yearly]

for df in plots:
    df['Weighted_Price'].plot()
    
    plt.show()

In [None]:
df_daily = df_daily.dropna()


In [None]:
print('Adfuller P Value is for daily  = '+ str(sm.tsa.stattools.adfuller(df_daily.Weighted_Price)[1]))

In [None]:
print('Adfuller P Value is for monthly  = '+ str(sm.tsa.stattools.adfuller(df_monthly.Weighted_Price)[1]))

In [None]:
print('Adfuller P Value is for yearly  = '+ str(sm.tsa.stattools.adfuller(df_yearly.Weighted_Price)[1]))

In [None]:
df_monthly['Boxcox'],lmbda = stats.boxcox(df_monthly['Weighted_Price'])
print('Adfuller P Value is for monthly boxcox  = '+ str(sm.tsa.stattools.adfuller(df_monthly['Boxcox'])[1]))

In [None]:
df_monthly['Seasonal_boxcox'] = (df_monthly['Boxcox'] - df_monthly['Boxcox'].shift(12))
sm.tsa.stattools.adfuller(df_monthly['Seasonal_boxcox'][12:])[1]

In [None]:
df_monthly['diff_seasonal_boxcox'] = df_monthly['Seasonal_boxcox'] - df_monthly['Seasonal_boxcox'].shift(1)


In [None]:
sm.tsa.stattools.adfuller(df_monthly['diff_seasonal_boxcox'][13:])[1]

# now our data is stationary

In [None]:

    sm.tsa.seasonal_decompose(df_monthly['diff_seasonal_boxcox'][13:]).plot()
    plt.show()

In [None]:
print('Adfuller P Value is for monthly  = '+ str(sm.tsa.stattools.adfuller(df_monthly.Weighted_Price)[1]))

# we have to handle p < 0.05

In [None]:
df_monthly_train = df_monthly[:'2020-11-30']
df_monthly_test = df_monthly['2020-11-30':]

In [None]:
print('Adfuller P Value is for monthly  = '+ str(sm.tsa.stattools.adfuller(np.log(df_monthly_train.Weighted_Price))[1]))
#logaritmic transform couldnt convert to stationary


In [None]:
diff = df_monthly_train.Weighted_Price - df_monthly_train.Weighted_Price.shift(12)

print('Adfuller P Value is for monthly  = '+ str(sm.tsa.stattools.adfuller(diff[12:])[1]))



In [None]:
# our Seasonal Chart shows that it is Seasonal and stationary

In [None]:
fig, axs = plt.subplots(1,2,figsize = (15,15))
sm.graphics.tsa.plot_acf(df_monthly_train['diff_seasonal_boxcox'][13:],lags = 13,ax = axs[0])
sm.graphics.tsa.plot_pacf(df_monthly_train['diff_seasonal_boxcox'][13:],lags = 13,ax = axs[1])
plt.show()

In [None]:
model = pm.auto_arima(df_monthly['Boxcox'],
                     seasonal = True,
                     m = 12,
                     d = 1,
                     D = 1,
                     start_p = 1,
                     start_q = 1,
                     max_p = 5,
                     max_q = 5,
                     start_P = 1,
                     start_Q = 1,
                     max_P = 5,
                     max_Q = 5,
                     trace = True,
                     error_action='ignore',
                      suppress_warnings=True)

In [None]:
model.summary()

In [None]:
model = sm.tsa.statespace.SARIMAX(df_monthly['Boxcox'],order=(1,1,0),seasonal_order=(3,1,0,12))
result = model.fit()
result.plot_diagnostics(figsize = (15,15))
plt.show()

In [None]:
# Inverse Box-Cox Transformation Function
def invboxcox(y,lmbda):
    if lmbda == 0:
        return(np.exp(y))
    else:
        return(np.exp(np.log(lmbda*y+1)/lmbda))

In [None]:
def invert_difference(orig_data, diff_data, interval):
	return [diff_data[i-interval] + orig_data[i-interval] for i in range(interval, len(orig_data))]

In [None]:
lists = [df_monthly['Boxcox'],df_monthly_train['Boxcox'],df_monthly_test['Boxcox']]

for i in lists:
    print(len(i))

In [None]:
# Prediction
df_month2 = df_monthly[['Weighted_Price']]
date_list = [datetime(2021, 4, 30), datetime(2021, 5, 31), datetime(2021, 6, 30), datetime(2021, 7, 31), 
             datetime(2021, 8 , 31), datetime(2021, 9, 30), datetime(2021, 10, 31), datetime(2021, 11, 30),
             datetime(2021, 12, 31)]
future = pd.DataFrame(index=date_list, columns= df_monthly.columns)
df_month2 = pd.concat([df_month2, future])
df_month2['forecast'] = invboxcox(result.predict(start=100, end=120), lmbda)
plt.figure(figsize=(15,7))
df_month2.Weighted_Price.plot()
df_month2.forecast.plot(color='r', ls='--', label='Predicted Weighted_Price')

plt.legend()
plt.title('Bitcoin exchanges, by months')
plt.ylabel('mean USD')
plt.show()