In [None]:
import numpy as np
import pandas as pd

In [None]:
df_train = pd.read_csv('../input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv')

In [None]:
df_train.tail()

In [None]:
time_col = pd.to_datetime(df_train['Timestamp'], unit='s') # converting the 'Timestamp' column to 
                                                             # datetime object

df_train.drop('Timestamp', axis=1, inplace=True) # drops the 'Timestamp' column
df_train['Timestamp'] = time_col # creates a new 'Timestamp' column with datetime dtype
df_train.set_index('Timestamp', inplace=True) # makes 'Timestamp' the index

df_train.head()

In [None]:
daily_Open = df_train['Open'].resample('D').first()
daily_High = df_train['High'].resample('D').max()
daily_Low = df_train['Low'].resample('D').min()
daily_Close = df_train['Close'].resample('D').last()
daily_Volume_BTC = df_train['Volume_(BTC)'].resample('D').sum()
daily_Volume_Currency = df_train['Volume_(Currency)'].resample('D').sum()

In [None]:
actual_df = pd.read_csv('../input/bitcoin-forecast-actual/Bitstamp_BTCUSD_d.csv', header=1)

In [None]:
daily_train_df = pd.DataFrame({'Open': daily_Open, 'High': daily_High, 'Low': daily_Low, 'Close': daily_Close, \
                         'Volume_(BTC)': daily_Volume_BTC, 'Volume_(Currency)': daily_Volume_Currency})
daily_train_df.tail()

In [None]:
actual_df.iloc[30:40]

In [None]:
actual_df.isnull().sum()

In [None]:
actual_df.rename(columns={'Volume BTC': 'Volume_(BTC)', 'Volume USD': 'Volume_(Currency)'}, inplace=True)
actual_df['Weighted_Price'] = actual_df['Volume_(Currency)'] / actual_df['Volume_(BTC)']
actual_df.set_index('Date', inplace=True)
actual_df.head()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
OHLCWp = pd.read_csv('../input/bitcoin-forecast-actual/OHLCWp_mutistep_forecasts.csv')
OHLCWp.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
OHLCWp.set_index('Date', inplace=True)
print(OHLCWp.shape)
OHLCWp.head()

In [None]:
BTC_CUR = pd.read_csv('../input/bitcoin-forecast-actual/BTC_CUR_mutistep_forecast.csv')
BTC_CUR.rename(columns={'Unnamed: 0': 'Date', 'Volume(Currency)': 'Volume_(Currency)', \
                        'Volume(BTC)': 'Volume_(BTC)'}, inplace=True)
BTC_CUR.set_index('Date', inplace=True)
print(BTC_CUR.shape)
BTC_CUR.head()

In [None]:
actual_OHLCWp_df = actual_df.loc[OHLCWp.index[0]: actual_df.index[0]: -1].copy()[list(OHLCWp.columns)]
print(actual_OHLCWp_df.shape)
actual_OHLCWp_df.tail()

In [None]:
forecasted_OHLWp_df = OHLCWp.iloc[:actual_OHLCWp_df.shape[0]].copy()
print(forecasted_OHLWp_df.shape)
forecasted_OHLWp_df.head()

In [None]:
from matplotlib.dates import DateFormatter, DayLocator, MonthLocator, DateLocator

In [None]:
# plots of our forecasts vs actual values for OHLCWp 
fig = plt.figure(figsize=(18, 12))
dates = DateLocator()
date_form = DateFormatter("%d-%m")

for i, col in zip(range(1, 6), forecasted_OHLWp_df):    
    if i == 5:
        a = fig.add_subplot(3, 1, 3)
        
        plt.plot(actual_OHLCWp_df[col])
        plt.plot(forecasted_OHLWp_df[col])
        plt.legend(['actual', 'forecast'])
        plt.xlabel('days')
        plt.ylabel(col)
        plt.xticks(range(0, actual_OHLCWp_df.shape[0], 5))
        
        
    else:
        a = fig.add_subplot(3, 2, i)
        plt.plot(actual_OHLCWp_df[col])
        plt.plot(forecasted_OHLWp_df[col])
        plt.legend(['actual', 'forecast'])
        plt.xlabel('days')
        plt.ylabel(col)
        plt.xticks(range(0, actual_OHLCWp_df.shape[0], 5))
plt.savefig('OHLCWp_forecast_vs_real.png', bbox_inches='tight')

In [None]:
from sklearn.metrics import mean_squared_error as mse

In [None]:
# defining mean absolute percentage error metric
def mape(df_true, df_pred, epsilon=1e-4):
    df_true[df_true == 0] = epsilon
    
    error = df_true - df_pred
    
    return 100 * np.mean(np.abs(error/df_true), axis=0)

In [None]:
# defining a function to print out both rmse and mape scores
def print_rmse_and_mape(df_true, df_pred):
    for col, rmse in zip(df_true.columns, np.round(mse(np.array(df_true), np.array(df_pred), \
                                                multioutput='raw_values', squared=False), 3)):
        print('RMSE of {} is {}'.format(col, rmse))
    
    print()

    for col, col_mape in zip(df_true.columns, np.round(mape(np.array(df_true), np.array(df_pred)), 3)):
        print('MAPE of {} is {}%'.format(col, col_mape))

In [None]:
print_rmse_and_mape(actual_OHLCWp_df, forecasted_OHLWp_df) # gets the rmse and mape scores of OHLC

In [None]:
actual_df['Dates'] = pd.DatetimeIndex(actual_df.index)
actual_BTC_CUR = actual_df.set_index('Dates')[['Volume_(Currency)', 'Volume_(BTC)']]
actual_BTC_CUR = actual_BTC_CUR.loc[OHLCWp.index[0]: actual_df.index[0]: -1].copy()
actual_BTC_CUR = actual_BTC_CUR.resample('W').sum().iloc[:4].copy()
print(actual_BTC_CUR.shape)
actual_BTC_CUR

In [None]:
forecasted_BTC_CUR = BTC_CUR.iloc[:actual_BTC_CUR.shape[0]]
forecasted_BTC_CUR

In [None]:
# plots of our predictions and the true values for BTC_CUR with VAR model
fig = plt.figure(figsize=(18, 6))
for i, col in zip(range(1, 3), forecasted_BTC_CUR.columns):
    a = fig.add_subplot(1, 2, i)
    plt.xlabel('weeks')
    plt.ylabel(col)
    plt.plot(actual_BTC_CUR[col], '-')
    plt.plot(forecasted_BTC_CUR[col], '-')
    plt.legend(['actual', 'forecast'])

In [None]:
print_rmse_and_mape(actual_BTC_CUR, forecasted_BTC_CUR) # gets the rmse and mape scores of BTC_CUR