In [37]:
import pandas as pd
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import numpy as np
from fbprophet import Prophet
from functools import partial
import time
from multiprocessing import cpu_count
from joblib import Parallel, delayed, parallel_backend
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
warnings.filterwarnings("ignore")
from hyperopt import space_eval

In [30]:
class HelperFunctions():
    def getStores(self, data):
        grouped_by_store = data.groupby('store_id')

        stores = {}
        for key, item in grouped_by_store:
            stores[key] = grouped_by_store.get_group(key)

        return stores
    
    def train_test_split(self, df, year=2016):
        train, test = df[df['year'] < year], df[df['year'] >= year]
        return train, test

In [31]:
class Evaluator():        
    def __init__(self, labelCol, predictionCol):
        self.labelCol = labelCol
        self.predictionCol = predictionCol
    
    def Mape(self, data):        
        return np.mean(abs((data[self.labelCol] - data[self.predictionCol]) / data[self.labelCol]))

In [66]:
class ProphetModel():
    space = {
        'seasonality_mode':hp.choice('seasonality_mode',['multiplicative','additive']),
        'changepoint_prior_scale':hp.choice('changepoint_prior_scale',np.arange(.1,2,.1)),
        'holidays_prior_scale': hp.choice('holidays_prior_scale',np.arange(.1,2,.1)),
        'n_changepoints' : hp.choice('n_changepoints',np.arange(20,200,20)),
        'weekly_seasonality' : hp.choice('weekly_seasonality', [True, False]),
        'daily_seasonality' : hp.choice('daily_seasonality', [True, False]),
        'yearly_seasonality' : hp.choice('yearly_seasonality', [True, False])
    }
        
    def train(self, train, validation, params):
        if params is None:
            prophet = Prophet()
        else:
            prophet = Prophet(**params)

        prophetModel = prophet.fit(train, iter=3000)

        validation_result = validation[["store_id", "year", "month", "y"]]
        validation_result = validation_result.reset_index(drop=True)
        validation_result["yhat"] = prophetModel.predict(validation[['ds']])[["yhat"]]
        
        evaluator = Evaluator(labelCol="y", predictionCol="yhat")
        score = evaluator.Mape(validation_result)    
        
        print('score: {0} model: {1}'.format(score, 'Prophet'))
        return {'loss': score, 'status': STATUS_OK}
    
    def fit(self, data, labelCol):
        df = data.copy()
        df = df.rename(columns={labelCol: 'y'})    
        data = HelperFunctions()
        train, validation = data.train_test_split(df, 2015)
        train = train[['ds','y']]

        trials = Trials()
        best = fmin(partial(self.train, train, validation),
                    space=ProphetModel.space,
                    algo=tpe.suggest,
                    max_evals=5,
                    trials=trials)
        
        bestParams = space_eval(self.space, best)
        bestLoss = trials.best_trial['result']['loss']        
                
        prophetModel = Prophet(**bestParams)
        df = df[['ds','y']]
        prophetModel = prophetModel.fit(df)

        return bestLoss, bestParams, prophetModel

In [67]:
class SarimaxModel():
    
    def __init__(self):
        self.p_values = np.arange(0, 2)
        self.d_values = np.arange(1, 2)
        self.q_values = np.arange(1, 4)
        self.P_values = np.arange(0, 2)
        self.D_values = np.arange(1, 2)
        self.Q_values = np.arange(0, 3)
        self.m_values = np.arange(7, 8)     
        
    def train(self, train, validation, arima_order, seasonalOrder):    
        try:          
            y_hat = validation.copy() 
            model = SARIMAX(train['sales'], order=arima_order, seasonal_order=seasonalOrder)        
            model_fit = model.fit()
            predict = model_fit.predict("2015-01-01", "2015-12-01", dynamic=True)
            y_hat['model_prediction'] = predict      

            evaluator = Evaluator(labelCol="sales", predictionCol="model_prediction")
            error = evaluator.Mape(y_hat) 
            
            #error = Mape(validation['sales'], y_hat.model_prediction)            
                        
            print('score: {0} model: {1}'.format(error, 'Sarimax'))
            return error, arima_order, seasonalOrder
        
        except Exception as e:                
            print(f"##### Skipped modelling with: {arima_order}, {seasonalOrder}\n")
            print(e)
            return -1, arima_order, seasonalOrder

    
    def evaluate(self, train, validation, p_values, d_values, q_values, P_values,
                 D_values, Q_values, m_values, parallel=True):    
        
        executor = Parallel(n_jobs=cpu_count()) 
        score = []

        if parallel==False:
            for p in p_values:
                for d in d_values:
                    for q in q_values:
                        for P in P_values:
                            for D in D_values:
                                for Q in Q_values:
                                    for m in m_values:        
                                        results = self.train(train, validation, (p,d,q), (P,D,Q,m)) 
                                        #print(results)
                                        score.append(results)                                        

        else:
            try:
                tasks = (delayed(self.train)(train, validation, (p,d,q), (P,D,Q,m)) for m in m_values for Q in Q_values for D in D_values for P in P_values for q in q_values for d in d_values for p in p_values)        
                results = executor(tasks)
                score.append(results)
            except Exception as e:
                print('Fatal Error....')
                print(e)

        return score
        
    def fit(self, data, labelCol):
        df = data.copy()
        data = HelperFunctions()
        train, validation = data.train_test_split(df, 2015)
        train = train[['ds',labelCol]]
        validation = validation[['ds',labelCol]]
        train.set_index('ds', inplace=True)
        validation.set_index('ds', inplace=True)
        train.index = pd.DatetimeIndex(train.index.values,
                                       freq=train.index.inferred_freq)
        validation.index = pd.DatetimeIndex(validation.index.values,
                                       freq=validation.index.inferred_freq)

        result = self.evaluate(train, validation, self.p_values, self.d_values, self.q_values,
                                         self.P_values, self.D_values, self.Q_values, self.m_values, False)
        
        scores=[]
        for tuple_list in result:            
            scores.append(tuple_list)

        scores.sort(key=lambda x: x[0])
        params = scores[0]
        
        df = df[['ds',labelCol]]
        df.set_index('ds', inplace=True)
        df.index = pd.DatetimeIndex(df.index.values,
                                       freq=df.index.inferred_freq)
        
        sarimaxModel = SARIMAX(df['sales'], order=params[1], seasonal_order=params[2])        
        sarimaxModel = sarimaxModel.fit()
        
        return params[0], (params[1], params[2]), sarimaxModel

In [68]:
class ModelSelector():
    def getModel(self, data):
        models = {}
        for key, value in data.items():
                                    
            result = Parallel(n_jobs=cpu_count(), prefer="threads")(delayed(self.parallelTraining)(model, value) 
                                                                     for model in ['prophet', 'sarimax'])                     
            
            bestLossProphet, bestParamsProphet, modelProphet = result[0]
            bestLossSarimax, bestParamsSarimax, modelSarimax = result[1]
            
            print(bestLossProphet, bestParamsProphet, modelProphet)
            print(bestLossSarimax, bestParamsSarimax, modelSarimax)            
            
            print("Best Loss Prophet: {0}".format(bestLossProphet))
            print("Best Loss Sarimax: {0}".format(bestLossSarimax))
            
            if bestLossProphet < bestLossSarimax:
                models[key] = ['Prophet', modelProphet]
            else:
                models[key] = ['Sarimax', modelSarimax]
            
        return models    
    
    def parallelTraining(self, model, data):
        if model == "prophet":
            prophet = ProphetModel()
            bestLossProphet, bestParamsProphet, prophetModel = prophet.fit(data, "sales")
            print(bestLossProphet, bestParamsProphet, prophetModel)
            return bestLossProphet, bestParamsProphet, prophetModel
        
        elif model == "sarimax":
            sarimax = SarimaxModel()
            bestLossSarimax, bestParamsSarimax, sarimaxModel = sarimax.fit(data, "sales")
            print(bestLossSarimax, bestParamsSarimax, sarimaxModel)            
            return bestLossSarimax, bestParamsSarimax, sarimaxModel        

In [69]:
class Driver():
    def main(self):
        train = pd.read_csv('train.csv', index_col = 0)
        test = pd.read_csv('test.csv', index_col = 0)
        
        train['ds'] = pd.to_datetime(train[['year', 'month']].assign(day=1))
        test['ds'] = pd.to_datetime(test[['year', 'month']].assign(day=1))
        
        helper = HelperFunctions()
        trainStores = helper.getStores(train)
        testStores = helper.getStores(test)        
        
        print("number of stores: {0}".format(len(trainStores)))                
        
        modelSelector = ModelSelector()
        return modelSelector.getModel(trainStores)    

In [None]:
if __name__ == "__main__":
    driver = Driver()
    models = driver.main()

number of stores: 10
  0%|                                                                          | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.015621 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.021063141003058453 model: Sarimax                                                                           
score: 0.0471886800938189 model: Prophet                                                                             
 20%|█████████▌                                      | 1/5 [01:11<04:47, 71.85s/trial, best loss: 0.0471886800938189]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.004982 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.047189
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.017846001294108318 model: Sarimax                                                                           
score: 0.01762183960137016 model: Prophet                                                                            
 40%|██████████████████▊                            | 2/5 [01:15<01:34, 31.57s/trial, best loss: 0.01762183960137016]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.003988 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.017622
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.1050549779649454 model: Prophet                                                                             
 60%|████████████████████████████▏                  | 3/5 [02:24<01:37, 48.58s/trial, best loss: 0.01762183960137016]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.003988 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.017622
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.01693326699881369 model: Sarimax                                                                            
score: 0.02085094440858457 model: Prophet                                                                            
score: 0.017106446850248588 model: Sarimax                                                                           
 80%|█████████████████████████████████████▌         | 4/5 [03:14<00:49, 49.28s/trial, best loss: 0.01762183960137016]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.003987 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.017622
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.021153640500120915 model: Prophet                                                                           
100%|███████████████████████████████████████████████| 5/5 [03:17<00:00, 39.41s/trial, best loss: 0.01762183960137016]

INFO:fbprophet:n_changepoints greater than number of observations. Using 47.



0.01762183960137016 {'changepoint_prior_scale': 0.1, 'daily_seasonality': False, 'holidays_prior_scale': 0.5, 'n_changepoints': 100, 'seasonality_mode': 'multiplicative', 'weekly_seasonality': True, 'yearly_seasonality': False} <fbprophet.forecaster.Prophet object at 0x0000026F8B367948>
score: 0.017152597669482805 model: Sarimax
score: 0.01736799063541656 model: Sarimax
score: 0.022126131915158175 model: Sarimax
score: 0.018010692656682783 model: Sarimax
score: 0.017097676392160158 model: Sarimax
score: 0.017257514558625446 model: Sarimax
score: 0.020237665848885162 model: Sarimax
score: 0.017248455572712606 model: Sarimax
score: 0.023052568318454956 model: Sarimax
score: 0.018244945712127047 model: Sarimax
score: 0.016976716649639057 model: Sarimax
score: 0.017025263780505073 model: Sarimax
score: 0.020192452647799574 model: Sarimax
score: 0.017094092569126657 model: Sarimax
score: 0.02248717665328924 model: Sarimax
score: 0.018038730969357278 model: Sarimax
score: 0.0171030159852625

INFO:hyperopt.tpe:build_posterior_wrapper took 0.007023 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.03518457603307092 model: Sarimax                                                                            
score: 0.031763362173485445 model: Prophet                                                                           
 20%|█████████▏                                    | 1/5 [00:03<00:13,  3.30s/trial, best loss: 0.031763362173485445]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.004987 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.031763
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.035072567231430006 model: Sarimax                                                                           
score: 0.04119019274738852 model: Prophet                                                                            
 40%|██████████████████▍                           | 2/5 [01:12<02:05, 41.79s/trial, best loss: 0.031763362173485445]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.004988 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.031763
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.03665015701512721 model: Sarimax                                                                            
score: 0.035089847816089335 model: Sarimax                                                                           
score: 0.04014161119831025 model: Sarimax                                                                            
score: 0.031121071048429122 model: Prophet                                                                           
 60%|███████████████████████████▌                  | 3/5 [02:43<02:08, 64.43s/trial, best loss: 0.031121071048429122]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.015622 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.031121
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.04418592276085412 model: Sarimax                                                                            
score: 0.04872149607515419 model: Prophet                                                                            
 80%|████████████████████████████████████         | 4/5 [05:35<01:46, 106.91s/trial, best loss: 0.031121071048429122]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.002716 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.031121
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.03550092510969939 model: Sarimax                                                                            
score: 0.044633524143131764 model: Prophet                                                                           
100%|██████████████████████████████████████████████| 5/5 [05:37<00:00, 67.52s/trial, best loss: 0.031121071048429122]

INFO:fbprophet:n_changepoints greater than number of observations. Using 47.



0.031121071048429122 {'changepoint_prior_scale': 1.3000000000000003, 'daily_seasonality': True, 'holidays_prior_scale': 0.4, 'n_changepoints': 60, 'seasonality_mode': 'multiplicative', 'weekly_seasonality': True, 'yearly_seasonality': True} <fbprophet.forecaster.Prophet object at 0x0000026F8DF6FF48>
score: 0.035568539859573194 model: Sarimax
score: 0.036868856939641426 model: Sarimax
score: 0.03570099204895122 model: Sarimax
score: 0.04129153081965375 model: Sarimax
score: 0.04908882883921423 model: Sarimax
score: 0.03276969106635298 model: Sarimax
score: 0.03489607260978599 model: Sarimax
score: 0.03620965311498783 model: Sarimax
score: 0.034909609058872006 model: Sarimax
score: 0.03908350719806172 model: Sarimax
score: 0.04313063450262606 model: Sarimax
score: 0.03895092733874417 model: Sarimax
score: 0.03642408409202506 model: Sarimax
score: 0.03718979253502021 model: Sarimax
score: 0.0366228438990097 model: Sarimax
score: 0.043926388278108956 model: Sarimax
score: 0.05206582765628

INFO:hyperopt.tpe:build_posterior_wrapper took 0.003992 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.015811897456003487 model: Sarimax                                                                           
score: 0.024787485953728313 model: Prophet                                                                           
 20%|█████████▏                                    | 1/5 [00:56<03:46, 56.50s/trial, best loss: 0.024787485953728313]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.005425 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.024787
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.015098346210285503 model: Sarimax                                                                           
score: 0.023694463647563387 model: Prophet                                                                           
 40%|██████████████████▍                           | 2/5 [00:59<01:15, 25.03s/trial, best loss: 0.023694463647563387]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001151 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.023694
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.028016546421507322 model: Prophet                                                                           
 60%|███████████████████████████                  | 3/5 [04:23<03:33, 106.61s/trial, best loss: 0.023694463647563387]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.004002 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.023694
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.03333061989139991 model: Prophet                                                                            
 80%|████████████████████████████████████▊         | 4/5 [04:25<01:05, 65.30s/trial, best loss: 0.023694463647563387]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.002990 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.023694
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.015460622874716176 model: Sarimax                                                                           
score: 0.01582559246023521 model: Prophet                                                                            
100%|███████████████████████████████████████████████| 5/5 [04:31<00:00, 54.32s/trial, best loss: 0.01582559246023521]
0.01582559246023521 {'changepoint_prior_scale': 0.5, 'daily_seasonality': True, 'holidays_prior_scale': 0.5, 'n_changepoints': 40, 'seasonality_mode': 'multiplicative', 'weekly_seasonality': False, 'yearly_seasonality': False} <fbprophet.forecaster.Prophet object at 0x0000026F8C099308>
score: 0.01537575013255855 model: Sarimax
score: 0.015315885578899036 model: Sarimax
score: 0.018802246714453907 model: Sarimax
score: 0.016539843816684104 model: Sarimax
score: 0.015545238268170836 model: Sarimax
score: 0.015865654737745774 model: Sarimax
score: 0.015769896491005634 model: Sarimax
score: 0.015730383231516185 model: Sarimax
score: 0.0200852

INFO:hyperopt.tpe:build_posterior_wrapper took 0.005457 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score: 0.011476949635423277 model: Sarimax                                                                           
score: 0.026495087531214462 model: Prophet                                                                           
 20%|█████████▏                                    | 1/5 [01:11<04:45, 71.37s/trial, best loss: 0.026495087531214462]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001554 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.026495
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.
