In [1]:
import warnings
from math import sqrt
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
warnings.filterwarnings("ignore")

In [3]:
def evaluate_arima_model(X, arima_order):
    #prepare training dataset
    train_size = int(len(X) * 0.80)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
    # calculate out of sample error
    rmse = sqrt(mean_squared_error(test, predictions))
    return rmse

In [4]:
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    rmse = evaluate_arima_model(dataset, order)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, order
                    print('ARIMA%s RMSE=%.3f' % (order,rmse))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

In [2]:
#load the data
df = pd.read_csv('all_stocks_5yr.csv',parse_dates=True)

In [3]:
#Pick one stock
df = df[df['Name']=='AAL']

In [4]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
1,2013-02-11,14.89,15.01,14.26,14.46,8882000,AAL
2,2013-02-12,14.45,14.51,14.1,14.27,8126000,AAL
3,2013-02-13,14.3,14.94,14.25,14.66,10259500,AAL
4,2013-02-14,14.94,14.96,13.16,13.99,31879900,AAL


In [16]:
p_values = range(0,10)
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(df.close.values, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=11.695
ARIMA(0, 0, 1) RMSE=6.136
ARIMA(0, 0, 2) RMSE=3.817
ARIMA(0, 1, 0) RMSE=0.892
ARIMA(0, 1, 1) RMSE=0.891
ARIMA(0, 1, 2) RMSE=0.892
ARIMA(0, 2, 0) RMSE=1.224
ARIMA(0, 2, 1) RMSE=0.892
ARIMA(0, 2, 2) RMSE=0.891
ARIMA(1, 0, 0) RMSE=0.892
ARIMA(1, 0, 1) RMSE=0.892
ARIMA(1, 0, 2) RMSE=0.893
ARIMA(1, 1, 0) RMSE=0.891
ARIMA(1, 1, 1) RMSE=0.891
ARIMA(1, 1, 2) RMSE=0.892
ARIMA(1, 2, 0) RMSE=1.081
ARIMA(1, 2, 1) RMSE=0.891
ARIMA(1, 2, 2) RMSE=0.892
ARIMA(2, 0, 0) RMSE=0.892
ARIMA(2, 0, 1) RMSE=0.892
ARIMA(2, 0, 2) RMSE=0.893
ARIMA(2, 1, 0) RMSE=0.892
ARIMA(2, 1, 1) RMSE=0.893
ARIMA(2, 1, 2) RMSE=0.892
ARIMA(2, 2, 0) RMSE=1.027
ARIMA(2, 2, 1) RMSE=0.892
ARIMA(2, 2, 2) RMSE=0.893
ARIMA(3, 0, 0) RMSE=0.892
ARIMA(3, 0, 1) RMSE=0.894
ARIMA(3, 0, 2) RMSE=0.893
ARIMA(3, 1, 0) RMSE=0.892
ARIMA(3, 1, 1) RMSE=3.365
ARIMA(3, 1, 2) RMSE=0.897
ARIMA(3, 2, 0) RMSE=1.000
ARIMA(3, 2, 1) RMSE=0.892
ARIMA(3, 2, 2) RMSE=0.893
ARIMA(4, 0, 0) RMSE=0.893
ARIMA(4, 0, 1) RMSE=0.894
ARIMA(4, 0,