In [None]:
# Import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy import stats
import statsmodels.api as sm
import warnings
from itertools import product
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
plt.style.use('seaborn-poster')
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm

In [None]:
# Import datetime
from datetime import datetime

In [None]:
# Load data
df = pd.read_csv('../input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv')
df.head()

# Preprocessing

* We first want to see if there are any outliers and outliers.

In [None]:
# Calculation of index-weighted moving average
ewm_mean = df['Volume_(BTC)'].ewm(span=90).mean()  

# Display exponentially weighted moving average
print(ewm_mean)

# Visualizing Exponentially Weighted Moving Averages
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(df['Volume_(BTC)'], label='original')
ax.plot(ewm_mean, label='ewma')
ax.legend()
plt.show()

* The exponentially weighted moving standard deviation is also calculated, and data that are more than three times out of the standard deviation are plotted as outliers.

In [None]:
def plot_outlier(ts, ewm_span=90, threshold=3.0):

    fig, ax = plt.subplots()
    # Calculation of index-weighted moving average
    ewm_mean = ts.ewm(span=ewm_span).mean()  
    # Calculation of exponentially weighted moving standard deviation
    ewm_std = ts.ewm(span=ewm_span).std()  
    ax.plot(ts, label='original')
    ax.plot(ewm_mean, label='ewma')

    # Plot data that are more than 3.0 times out of the standard deviation as outliers.
    ax.fill_between(ts.index,
                    ewm_mean - ewm_std * threshold,
                    ewm_mean + ewm_std * threshold,
                    alpha=0.2)
    outlier = ts[(ts - ewm_mean).abs() > ewm_std * threshold]
    ax.scatter(outlier.index, outlier, label='outlier')
    ax.legend()
    plt.show()
    return fig,outlier

fig,out_fil = plot_outlier(df['Volume_(BTC)'],ewm_span=90, threshold=3.0);

In [None]:
df[df.index.isin(out_fil.index)]

* Now that we have identified the outlier, we can use that value to remove the outlier.

In [None]:
# Extract records that do not have outliers
df = df[~df.index.isin(out_fil.index)]

* The missing values are determined, the exponentially weighted moving average is calculated, and then the missing values are supplemented by the exponentially weighted moving average.

In [None]:
# Calculation of index-weighted moving average
ewm_mean = df['Volume_(BTC)'].ewm(span=90).mean()  
df['ewm_mean'] = ewm_mean

* Assign the value of ewm_mean to the missing record.

In [None]:
# Completing missing values with exponentially weighted moving averages
df.loc[df['Volume_(BTC)'].isnull(),'Volume_(BTC)']=df[df['Volume_(BTC)'].isnull()]['ewm_mean']
df.loc[df['Weighted_Price'].isnull(),'Weighted_Price']=df[df['Weighted_Price'].isnull()]['ewm_mean']

In [None]:
# Unix-time to 
df.Timestamp = pd.to_datetime(df.Timestamp, unit='s')

# Resampling to daily frequency
df.index = df.Timestamp
df = df.resample('D').mean()

# Resampling to monthly frequency
df_month = df.resample('M').mean()

# Resampling to annual frequency
df_year = df.resample('A-DEC').mean()

# Resampling to quarterly frequency
df_Q = df.resample('Q-DEC').mean()

In [None]:
df_month = df.sort_values("Timestamp").reset_index()

In [None]:
# PLOTS
fig = plt.figure(figsize=[15, 7])
plt.suptitle('Bitcoin exchanges, mean USD', fontsize=22)

plt.subplot(221)
plt.plot(df.Weighted_Price, '-', label='By Days')
plt.legend()

plt.subplot(222)
plt.plot(df_month.Weighted_Price, '-', label='By Months')
plt.legend()

plt.subplot(223)
plt.plot(df_Q.Weighted_Price, '-', label='By Quarters')
plt.legend()

plt.subplot(224)
plt.plot(df_year.Weighted_Price, '-', label='By Years')
plt.legend()

# plt.tight_layout()
plt.show()

In [None]:
# Extraction of specific columns
df_new = df.loc[:, ['Open', 'High', 'Low', 'Close']]
df_close = df.loc[:, ['Close']]

In [None]:
plt.subplot(221)
plt.plot(df_new.Close, '-', label='By Days')
plt.legend()

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x=df_new.index, y="High", data=df_new)
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x=df_new.index, y="Low", data=df_new)
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x=df_new.index, y="Close", data=df_new)
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x=df_new.index, y="Close", data=df_new)
plt.show()

# Building a Predictive Model

In [None]:
print(df_new)

In [None]:
# Training data
train = df[df_new.index<'2019-01-01']

# Testing data
test = df[df_new.index>='2019-01-01']

In [None]:
# Creating a SARIMA Model
sarima = sm.tsa.statespace.SARIMAX(df['Volume_(BTC)'],order=(2, 1, 3) ,seasonal_order=(0, 2, 2, 12)).fit()

#Model summary display
print(sarima.summary())

In [None]:
print(sarima.predict('2019-01-01','2021-03-31'))

In [None]:
# Graphical representation of real data and prediction results
plt.plot(test['Volume_(BTC)'], label='original')
plt.plot(sarima.predict('2019-01-01','2021-03-31'), label='predicted', color='red')
plt.legend(loc='best')
plt.show()

* The forecasting model is not able to reproduce the price spikes in 2019-04, 2019-07, 2019-10, 2020-04, and 2021-01, but it is able to reproduce the upward and downward price trends.

* Let's project beyond April 2021.

In [None]:
print(sarima.predict('20２１-04-01','2030-03-31'))

In [None]:
plt.plot(sarima.predict('20２１-04-01','2030-03-31'), label='predicted', color='black')
plt.legend(loc='best')
plt.show()

* It looks like it will rise until 2024, but after that it will trend downward. I guess I'd better sell my bitcoin before 2024.

# Acknowledgements
* [Bitcoin Price. Prediction by ARIMA](https://www.kaggle.com/myonin/bitcoin-price-prediction-by-arima)