In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
register_matplotlib_converters()
from time import time

In [2]:
def parser(s):
    return datetime.strptime(s, '%Y-%m-%d')

In [3]:
#read data


df=pd.read_csv("steps_p01.csv", parse_dates=[1], index_col=1, squeeze=True, date_parser=parser)


In [4]:
df.head()

Unnamed: 0_level_0,Participant_ID,Steps
Dates,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-11-01,p01,4664
2019-11-02,p01,3035
2019-11-04,p01,1284
2019-11-05,p01,4966
2019-11-08,p01,2094


In [5]:
df.dtypes

Participant_ID    object
Steps              int64
dtype: object

In [6]:
df_copy=df.drop(labels = ["Participant_ID"], axis=1)

In [7]:
df_copy.head()


Unnamed: 0_level_0,Steps
Dates,Unnamed: 1_level_1
2019-11-01,4664
2019-11-02,3035
2019-11-04,1284
2019-11-05,4966
2019-11-08,2094


In [8]:
df_copy.dtypes

Steps    int64
dtype: object

In [9]:
df_copy.count()

Steps    117
dtype: int64

In [10]:

# grid search ARIMA parameters for time series
import warnings
from math import sqrt
from pandas import read_csv
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm

In [11]:
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    train_size = int(len(X) * 0.66)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
        
    # calculate out of sample error
    #rmse = sqrt(mean_squared_error(test, predictions))
    #rmse = np.mean((forecast - actual)**2)**.5
    mape = np.mean(np.abs(predictions - test)/np.abs(test))  # MAPE.ipynb
    return mape

In [12]:
# evaluate an SARIMA model for a given order (p,d,q,ps,ds,qs)
def evaluate_sarima_model(X, arima_order, sarima_order):
    # prepare training dataset
    print("Call SARIMA MODEL")
    train_size = int(len(X) * 0.66)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        model = sm.tsa.statespace.SARIMAX(history, order=arima_order, seasonal_order = sarima_order)
        #model = SARIMA(history, order=arima_order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
        
    # calculate out of sample error
    #rmse = sqrt(mean_squared_error(test, predictions))
    #rmse = np.mean((forecast - actual)**2)**.5
    mape = np.mean(np.abs(predictions - test)/np.abs(test))  # MAPE.ipynb
    return mape




In [13]:
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values, PS_values, DS_values, QS_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
#    for p in p_values:
#        for d in d_values:
#            for q in q_values:
#                order = (p,d,q)
#                try:
#                    mape = evaluate_arima_model(dataset, order)
#                    if mape < best_score:
#                        best_score, best_cfg = mape, order
#                    print('ARIMA%s MAPE=%.3f' % (order,mape))
#                except:
#                    continue
        
    for p in p_values:
        print(p_values,p)
        for d in d_values:
            for q in q_values:
                for PS in PS_values:
                    for DS in DS_values:
                        for QS in QS_values:
                            order = (p,d,q)
                            sorder = (PS,DS,QS,365)
                            #try:
                            mape = evaluate_sarima_model(dataset, order, sorder)
                            if mape < best_score:
                                best_score, best_cfg = mape, order
                            print('SARIMA%s MAPE=%.3f' % (order + sorder, mape))
                            #except:
                            #    continue
                    
    print('Best (S)ARIMA%s mape=%.3f' % (best_cfg, best_score))
 

In [None]:
# evaluate parameters
#p_values = [0, 1, 2, 4, 6, 8, 10]
p_values = [0]
d_values = range(0, 3)
q_values = range(0, 3)
PS_values = [0, 1, 2, 4, 6, 8, 10]
DS_values = range(0, 3)
QS_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(df_copy.values, p_values, d_values, q_values, PS_values, DS_values, QS_values)

[0] 0
Call SARIMA MODEL
SARIMA(0, 0, 0, 0, 0, 0, 365) MAPE=1.000
Call SARIMA MODEL


In [None]:
# https://machinelearningmastery.com/grid-search-arima-hyperparameters-with-python/
# https://github.com/ritvikmath/Time-Series-Analysis/blob/master/SARIMA%20Model.ipynb
# https://www.youtube.com/watch?v=Al8m6K_stfA
# https://towardsdatascience.com/most-useful-python-functions-for-time-series-analysis-ed1a9cb3aa8b
# https://www.statsmodels.org/dev/examples/notebooks/generated/stationarity_detrending_adf_kpss.html
# https://www.machinelearningplus.com/time-series/arima-model-time-series-forecasting-python/