# ARIMAX

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
import helper_functions as hf

import itertools
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

### Data

In [3]:
df_blockchain = pd.read_csv("/Users/francoisporcher/Desktop/EI/st4-mds/data/df_blockchain.csv", delimiter=";")
df_blockchain.head()

Unnamed: 0,Date,market-price,n-transactions-per-block,median-confirmation-time,hash-rate,difficulty,miners-revenue,trade-volume,blocks-size,avg-block-size,...,cost-per-transaction,n-unique-addresses,n-transactions,n-transactions-total,n-transactions-excluding-popular,output-volume,estimated-transaction-volume,estimated-transaction-volume-usd,total-bitcoins,market-cap
0,2011-09-13,6.88,45.908451,0.0,12.018816,1777774.0,52318.011503,0.0,592.190091,0.019009,...,7.666766,12622.0,6519.0,1497195.0,6519.0,358543.612114,58615.64132,403275.612279,7257416.0,50220140.0
1,2011-09-14,6.19,42.465753,0.0,13.263925,1755425.0,48306.468911,0.0,594.907367,0.018007,...,7.369408,12408.0,6200.0,1503780.0,6200.0,302619.024544,74521.484625,461287.98983,7264662.0,45409300.0
2,2011-09-15,5.92,41.5,0.0,12.914875,1755425.0,60431.444952,0.0,597.554226,0.01824,...,7.333913,12988.0,6474.0,1509972.0,6474.0,299226.130646,79422.402932,470180.625359,7272284.0,43222280.0
3,2011-09-16,5.58,52.176471,0.0,10.995096,1755425.0,34345.021913,0.0,600.362512,0.022136,...,5.466341,12059.0,6209.0,1516381.0,6209.0,674606.861338,82696.853247,461448.441118,7279040.0,40881360.0
4,2011-09-17,5.18,40.701493,0.0,10.733308,1755425.0,36805.913687,0.0,602.99551,0.017116,...,6.489054,10988.0,5454.0,1522600.0,5454.0,354198.945778,68238.166521,353473.702578,7285375.0,38018330.0


In [4]:
X_train, X_test = hf.data_split(df_blockchain)
# X_train.fillna(X_train.mean())

### Choix du meilleur modèle

In [9]:
p = d = q = range(0, 3) #On définit les ensembles dans lesquels on fait varier p,d,q
pdq = list(itertools.product(p, d, q))
y_train, y_test = hf.data_split(df_blockchain["market-price"])

selected_columns= ["miners-revenue", "market-cap","cost-per-transaction"] #On choisit les variables externes que l'on prend en compte
exog_train, exog_test = hf.data_split(df_blockchain[selected_columns])

In [10]:
best_score, best_cfg = float("inf"), None

for param in pdq:
    try:
        history = [x for x in y_train]
        exog_ = exog_train.copy()
        # make predictions
        predictions = list()

        for t in range(len(y_test)):
            model = sm.tsa.statespace.SARIMAX(history, order=param, exog=exog_, enforce_stationarity=False, enforce_invertibility=False)
            model_fit = model.fit()
            yhat = model_fit.predict(start=X_test.index[t], end=X_test.index[-1], exog=exog_test[t:])
            predictions.append(yhat.values[0])
            history.append(y_test.values[t])
            exog_ = df_blockchain[selected_columns][:-30+t+1]

        # calculate out of sample error
        error = hf.rmse(y_test, predictions)
        print('ARIMAX{} - RMSE:{}'.format(param, error))
        if error < best_score:
            best_score, best_cfg = error, param
    except:
        continue
print("Best ARIMAX%s - RMSE=%.3f" % (best_cfg, best_score)) #On affiche le melilleur modèle où notre critère de sélection est la RMSE

ARIMAX(0, 0, 0) - RMSE:344.6744571430455
ARIMAX(0, 0, 1) - RMSE:308.42909021282367
ARIMAX(0, 0, 2) - RMSE:274.8497966885849
ARIMAX(0, 1, 0) - RMSE:256.913281212085
ARIMAX(0, 1, 1) - RMSE:213.47346615471565
ARIMAX(0, 1, 2) - RMSE:213.9326825792465
ARIMAX(0, 2, 0) - RMSE:430.29037869650443
ARIMAX(0, 2, 1) - RMSE:262.9579478495177
ARIMAX(0, 2, 2) - RMSE:249.92045974576448
ARIMAX(1, 0, 0) - RMSE:271.32620458669203
ARIMAX(1, 0, 1) - RMSE:200.45653693576818
ARIMAX(1, 0, 2) - RMSE:200.84817840422323
ARIMAX(1, 1, 0) - RMSE:228.1801656570357
ARIMAX(1, 1, 1) - RMSE:216.6672034696147
ARIMAX(1, 1, 2) - RMSE:212.86030477659307
ARIMAX(1, 2, 0) - RMSE:306.61164375020616
ARIMAX(1, 2, 1) - RMSE:263.2517956078189
ARIMAX(1, 2, 2) - RMSE:243.29503080703253
ARIMAX(2, 0, 0) - RMSE:226.11773107925293
ARIMAX(2, 0, 1) - RMSE:201.76447804448026
ARIMAX(2, 0, 2) - RMSE:228.26392644255668
ARIMAX(2, 1, 0) - RMSE:232.48736452403125
ARIMAX(2, 1, 1) - RMSE:217.82161507534138
ARIMAX(2, 1, 2) - RMSE:213.08007244749317
A

In [None]:
Conclusion : Ici notre meilleur modèle est un ARIMAX(1,0,1) de RMSE 200.457, qui prend en compte les paramètres "miners-revenue", "market-cap","cost-per-transaction"

### Predictions for the best model

In [None]:
history = [x for x in y_train]
exog_ = exog_train.copy()
# make predictions
predictions = list()

for t in range(len(y_test)):
    model = sm.tsa.statespace.SARIMAX(history, order=best_cfg, exog=exog_, enforce_stationarity=False, enforce_invertibility=False)
    model_fit = model.fit()
    yhat = model_fit.predict(start=X_test.index[t], end=X_test.index[-1], exog=exog_test[t:])
    predictions.append(yhat.values[0])
    history.append(y_test.values[t])
    exog_ = df_blockchain[selected_columns][:-30+t+1]

# calculate out of sample error
error = hf.rmse(y_test, predictions)
print('ARIMAX{} - RMSE:{}'.format(best_cfg, error))

In [None]:
# prepare the dataset for plotting
predict_date = df_blockchain.iloc[-30::]["Date"]
df_predict = pd.DataFrame(zip(predict_date,
                              predictions, X_test["market-price"].values),
                          columns=["date", "predict", "true"])
df_predict

In [None]:
# Plotting
fig, ax = plt.subplots(figsize=(8,4))

sns.lineplot(x="date", y="true", data=df_predict,
             ax=ax, label="y_true", markers=True )
plot_ = sns.lineplot(x="date",
             y="predict",
             data=df_predict,
             color="red",ax=ax, label="y_predict")
plot_.lines[1].set_linestyle("--")
new_ticks = plot_.get_xticklabels()
freq = 4
ax.set_xticks(np.arange(0, len(new_ticks), freq))
labels = predict_date[::freq]
ax.set_xticklabels(labels)
ax.set_ylabel("Bitcoin Market Price [USD]")
ax.set_xlabel("Test date")
ax.set_title("Model ARIMAX{}".format(best_cfg))
sns.despine()
plt.legend()
plt.tight_layout()