# ARIMA Model

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

from sklearn.preprocessing import MinMaxScaler

from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

from scipy import stats
import statsmodels.api as sm
from itertools import product

In [33]:
df = pd.read_csv("Data/BTC-USD.csv")
print(df.shape)
df.head()

(1827, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-12-21,800.643982,834.281006,799.405029,834.281006,834.281006,155576000
1,2016-12-22,834.179993,875.781982,834.148987,864.539978,864.539978,200027008
2,2016-12-23,864.888,925.117004,864.677002,921.984009,921.984009,275564000
3,2016-12-24,922.179993,923.479004,886.335022,898.822021,898.822021,137727008
4,2016-12-25,899.651978,899.651978,862.424011,896.182983,896.182983,143664992


In [34]:
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-12-21,800.643982,834.281006,799.405029,834.281006,834.281006,155576000
1,2016-12-22,834.179993,875.781982,834.148987,864.539978,864.539978,200027008
2,2016-12-23,864.888000,925.117004,864.677002,921.984009,921.984009,275564000
3,2016-12-24,922.179993,923.479004,886.335022,898.822021,898.822021,137727008
4,2016-12-25,899.651978,899.651978,862.424011,896.182983,896.182983,143664992
...,...,...,...,...,...,...,...
1822,2021-12-17,47653.730469,48004.894531,45618.214844,46202.144531,46202.144531,32902725329
1823,2021-12-18,46219.253906,47313.828125,45598.441406,46848.777344,46848.777344,26098292690
1824,2021-12-19,46853.867188,48089.664063,46502.953125,46707.015625,46707.015625,25154053861
1825,2021-12-20,46707.062500,47401.718750,45579.808594,46880.277344,46880.277344,30961902129


In [50]:
df.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [53]:
df_month = df.resample('M').mean()

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'

In [35]:
scaled_data = df[['Open', 'High', 'Low', 'Close', 'Volume']]
scaler = MinMaxScaler(copy=False)
scaled_data[['Open', 'High', 'Low', 'Close', 'Volume']] = scaler.fit_transform(scaled_data[['Open', 'High', 'Low', 'Close', 'Volume']])
scaled_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Unnamed: 0,Open,High,Low,Close,Volume
0,0.000381,0.000161,0.000665,0.000846,0.000270
1,0.000884,0.000772,0.001195,0.001299,0.000397
2,0.001343,0.001498,0.001660,0.002159,0.000612
3,0.002201,0.001474,0.001990,0.001813,0.000219
4,0.001864,0.001123,0.001625,0.001773,0.000236
...,...,...,...,...,...
1822,0.702042,0.694191,0.683605,0.680117,0.093591
1823,0.680560,0.684023,0.683304,0.689799,0.074200
1824,0.690064,0.695438,0.697086,0.687676,0.071510
1825,0.687865,0.685316,0.683020,0.690270,0.088060


In [36]:
df2 = pd.DataFrame(df, columns = ['Date', 'Close'])
df2

Unnamed: 0,Date,Close
0,2016-12-21,834.281006
1,2016-12-22,864.539978
2,2016-12-23,921.984009
3,2016-12-24,898.822021
4,2016-12-25,896.182983
...,...,...
1822,2021-12-17,46202.144531
1823,2021-12-18,46848.777344
1824,2021-12-19,46707.015625
1825,2021-12-20,46880.277344


In [37]:
# Initial approximation of parameters
qs = range(0, 3)
ps = range(0, 3)
d=1
parameters = product(ps, qs)
parameters_list = list(parameters)
len(parameters_list)

9

In [38]:
# Model Selection
results = []
best_aic = float("inf")
for param in parameters_list:
    try:
        model = SARIMAX(scaled_data, order=(param[0], d, param[1])).fit(disp=-1)
    except ValueError:
        print('bad parameter combination:', param)
        continue
    aic = model.aic
    if aic < best_aic:
        best_model = model
        best_aic = aic
        best_param = param
    results.append([param, model.aic])

bad parameter combination: (0, 0)
bad parameter combination: (0, 1)
bad parameter combination: (0, 2)
bad parameter combination: (1, 0)
bad parameter combination: (1, 1)
bad parameter combination: (1, 2)
bad parameter combination: (2, 0)
bad parameter combination: (2, 1)
bad parameter combination: (2, 2)


In [45]:
btc_month2 = scaled_data[['Close']]
date_list = [datetime(2016,12,21),datetime(2021,12,21)]
future = pd.DataFrame(index=date_list, columns= scaled_data.columns)
btc_month2 = pd.concat([btc_month2, future])



In [46]:
btc_month2['forecast'] = invboxcox(best_model.predict(start=0, end=75), lmbda)

NameError: name 'invboxcox' is not defined

In [49]:
plt.figure(figsize=(15,7))
btc_month2.close.plot()
btc_month2.future.plot(color='r', ls='--', label='forecast')
plt.legend()
plt.title('Bitcoin Monthly Close Forecast')
plt.ylabel('USD')

plt.show()

AttributeError: 'DataFrame' object has no attribute 'close'

<Figure size 1080x504 with 0 Axes>