In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pyspark.ml.feature import VectorAssembler, StringIndexer
from pyspark.sql import SparkSession
from pyspark.ml import Pipeline

from DataManipulation import DataManipulation
from Estimators.XGBoost import XGBoost
from Logging import Logging
from Transformers.FilterDepartment import FilterDepartment
from Transformers.ImputePrice import ImputePrice
from Transformers.LagFeature import LagFeature
from Transformers.LogTransformation import LogTransformation
from Transformers.MonthlyAggregate import MonthlyAggregate
from Transformers.NegativeSales import NegativeSales
from Estimators.RandomForest import RandomForest
from Estimators.ProphetEstimator import ProphetEstimator
from pyspark.ml.feature import MinMaxScaler
from pyspark.sql.functions import mean, stddev
from Transformers.Scaling import Scaling
from Evaluator.MAPE import MAPE
import pyspark.sql.functions as F
import pandas as pd
import findspark
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter("ignore")

Importing plotly failed. Interactive plots will not work.


In [3]:
def initialize_session(name):
    return SparkSession.builder.master("local[*]").appName(name).\
        config("spark.driver.bindAddress", "localhost").\
        config("spark.ui.port", "4050").getOrCreate()

In [4]:
findspark.init()
spark = initialize_session("Assignment")
spark.conf.set("spark.sql.execution.arrow.enabled", "true")
log = Logging.getLogger()
log.info("Initializing session")

data = DataManipulation()
df = data.get_data()

In [5]:
# df = data.filter_store(df, "WI_1")
filterDepartment = FilterDepartment(inputCol="FOODS_1", filterCol="dept_id")

In [6]:
imputePrice = ImputePrice()
negativeSales = NegativeSales(column="sales")
aggregate = MonthlyAggregate(columns=["store_id", "dept_id", "year", "month"],
                             expressions={"sales": "sum",
                                          "sell_price": "avg",
                                          "event_name_1": "count",
                                          "event_name_2": "count",
                                          "snap_WI": "sum"}
                             )
logTransformation = LogTransformation(inputCols=["sales"])
lagFeatures = LagFeature(partitionBy=["store_id", "dept_id"],
                         orderBy=["year", "month"],
                         lags=[i for i in range(1, 13)],
                         target="sales"
                         )

storeIndexer = StringIndexer(inputCol="store_id", outputCol="store_id_index")
yearIndexer = StringIndexer(inputCol="year", outputCol="year_index")
#scaling = Scaling(inputCols=["sell_price"])
#scaling1 = Scaling(inputCols=["event_name_1"])
#scaling2 = Scaling(inputCols=["event_name_2"])

In [7]:
inputColumns = ["store_id_index", "month", "year_index", "event_name_1", "event_name_2", "sell_price"]
inputColumns.extend(["lag_{}".format(i) for i in range(1, 13)])
vector = VectorAssembler(inputCols=inputColumns,outputCol="features")

In [8]:
log.info("Initiating pipeline")
transformed = Pipeline(stages=[filterDepartment, imputePrice, negativeSales, aggregate,
                               logTransformation, lagFeatures, storeIndexer,
                               yearIndexer, vector]).fit(df).transform(df)

In [9]:
train, test = data.train_test_split(transformed)

In [8]:
#train = spark.read.option("inferSchema", "true").option("header", "true").csv("train.csv")
#test = spark.read.option("inferSchema", "true").option("header", "true").csv("test.csv")

# XGBoost

In [15]:
xgbModel = XGBoost(inputCols=inputColumns, labelCol="sales").fit(train)

Training XGBoost
score: 0.042476453781381526
score:                                                                                                               
0.032620789686800794                                                                                                 
score:                                                                                                               
0.028908502516641784                                                                                                 
score:                                                                                                               
0.027340361032599115                                                                                                 
score:                                                                                                               
0.030438058954456934                                                                                                 
score:     

In [16]:
pred = xgbModel.transform(test)
print(pred.show())

+-----+----+-----+----------+------------------+
|store|year|month|prediction|            actual|
+-----+----+-----+----------+------------------+
|  0.0| 5.0|    1| 3.8563993|3.7662640906519957|
|  0.0| 5.0|    2| 3.8563993| 3.991447598003803|
|  0.0| 5.0|    3| 3.8563993|3.8987251815894934|
|  0.0| 5.0|    4| 3.8563993| 4.073571728304925|
|  0.0| 5.0|    5| 3.8563993| 3.770557474850995|
|  1.0| 5.0|    1| 3.8563993| 3.922154325231059|
|  1.0| 5.0|    2| 3.8563993|   3.9703933720796|
|  1.0| 5.0|    3| 3.8563993| 4.006380458549693|
|  1.0| 5.0|    4| 3.8563993|  4.14674801363064|
|  1.0| 5.0|    5| 3.8563993|4.1285285037974395|
|  2.0| 5.0|    1| 3.8563993|3.8274338954007794|
|  2.0| 5.0|    2| 3.8563993|3.8232785569516707|
|  2.0| 5.0|    3| 3.8563993|3.9811387826406603|
|  2.0| 5.0|    4| 3.8563993| 3.773932647467645|
|  2.0| 5.0|    5| 3.8563993| 4.146686055647526|
|  3.0| 5.0|    1| 3.8563993| 3.850829959848531|
|  3.0| 5.0|    2| 3.8563993| 3.932879457823799|
|  3.0| 5.0|    3| 3

In [18]:
#pred.repartition(1).write.csv("RFresult", header="true")
pred.toPandas().to_csv("XGBresult.csv", header=True, index=False)

In [17]:
mape = MAPE(labelCol="actual", predictionCol="prediction")
score = mape.evaluate(pred)
print("Score: ", score)

Score:  0.028539248254612275


In [19]:
xgbModel.save("XGBoostBestModel")

# Random Forest

In [26]:
rfModel = RandomForest(featuresCol="features", labelCol="sales").fit(train)

Training Random Forest
score: 0.010931220998669416
score:                                                                                                               
0.030190120390308016                                                                                                 
score:                                                                                                               
0.030396800602553765                                                                                                 
score:                                                                                                               
0.011024392406822415                                                                                                 
score:                                                                                                               
0.03021033623642931                                                                                                  
score

In [27]:
predRf = rfModel.transform(test)
predRf.select("store_id", "year", "month", "sales", "prediction").show()

+--------+----+-----+------------------+------------------+
|store_id|year|month|             sales|        prediction|
+--------+----+-----+------------------+------------------+
|    CA_1|2016|    1| 3.959089114367392|3.9705157543934466|
|    CA_1|2016|    2|3.9631264410819047|3.9676277267533813|
|    CA_1|2016|    3| 4.006380458549693|3.9667071590597844|
|    CA_1|2016|    4|  3.95525468282018| 3.982620703180685|
|    CA_1|2016|    5|  3.88058495606498|3.9863100982005246|
|    CA_2|2016|    1|  4.14674801363064| 4.074364171863007|
|    CA_2|2016|    2| 4.104282207094438| 4.075550791931293|
|    CA_2|2016|    3| 4.121428518367963| 4.057472623526878|
|    CA_2|2016|    4| 4.146686055647526| 4.062845940921025|
|    CA_2|2016|    5| 4.073571728304925| 4.070152647635027|
|    CA_3|2016|    1| 4.172369376763842| 4.065421247590753|
|    CA_3|2016|    2|4.1285285037974395| 4.073434137599054|
|    CA_3|2016|    3|4.1170059966359664| 4.061250096710981|
|    CA_3|2016|    4| 4.066102196766773|

In [28]:
RFresult = predRf.select("store_id", "year", "month", "sales", "prediction")
#RFresult.repartition(1).write.csv("RFresult", header="true")
result = RFresult.toPandas()

In [29]:
result.to_csv("RFresult.csv",header=True, index=False)

In [30]:
mape = MAPE(labelCol="sales", predictionCol="prediction")
score = mape.evaluate(predRf)
print("Score: ", score)

Score:  0.010473321067061045


# Prophet

In [11]:
transformedProphet = transformed.withColumn("ds", F.to_date(F.concat_ws("-", "Year","month")))
trainProphet, testProphet = data.train_test_split(transformedProphet)

In [12]:
def getStores(data):
    storesName = data.select("store_id").distinct().collect()
    stores = {}
    for store in storesName:
        stores[store.store_id] = data.filter(df["store_id"] == store.store_id)    
    return stores

In [13]:
trainStores = getStores(trainProphet)
testStores = getStores(testProphet)

In [14]:
#prophetModel = ProphetEstimator(labelCol="sales").fit(trainStores["WI_2"])
#pred = prophetModel.transform(testStores["WI_2"])
#pred.show()

In [15]:
models = {}
for key, value in trainStores.items():
    prophetModel = ProphetEstimator(labelCol="sales").fit(value)
    models[key] = prophetModel  

Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.018402331270018394
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.015897449170829973                                                                                                   
 20%|█████████▌                                      | 1/5 [00:02<00:08,  2.24s/trial, best loss: 0.015897449170829973]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.015897
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.03746208071161486                                                                                                    
 40%|███████████████████▏                            | 2/5 [00:46<01:21, 27.09s/trial, best loss: 0.015897449170829973]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.015897
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.01240285428835003                                                                                                    
 60%|█████████████████████████████▍                   | 3/5 [00:50<00:32, 16.30s/trial, best loss: 0.01240285428835003]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.012403
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.0154774406696688                                                                                                     
 80%|███████████████████████████████████████▏         | 4/5 [00:52<00:10, 10.64s/trial, best loss: 0.01240285428835003]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.012403
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.01617318487791192                                                                                                    
100%|█████████████████████████████████████████████████| 5/5 [00:55<00:00, 11.08s/trial, best loss: 0.01240285428835003]
{'changepoint_prior_scale': 0.30000000000000004, 'holidays_prior_scale': 0.2, 'n_changepoints': 25}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.0585131485655536
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.02723238927569095                                                                                                    
 20%|█████████▊                                       | 1/5 [00:47<03:11, 47.91s/trial, best loss: 0.02723238927569095]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.015605 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.027232
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.03481743945066771                                                                                                    
 40%|███████████████████▌                             | 2/5 [00:49<01:02, 20.77s/trial, best loss: 0.02723238927569095]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.027232
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.03039688957811736                                                                                                    
 60%|█████████████████████████████▍                   | 3/5 [00:52<00:25, 12.53s/trial, best loss: 0.02723238927569095]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.027232
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.02553126660941374                                                                                                    
 80%|███████████████████████████████████████▏         | 4/5 [01:39<00:26, 26.31s/trial, best loss: 0.02553126660941374]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.025531
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.05373016650665643                                                                                                    
100%|█████████████████████████████████████████████████| 5/5 [02:22<00:00, 28.47s/trial, best loss: 0.02553126660941374]
{'changepoint_prior_scale': 0.30000000000000004, 'holidays_prior_scale': 0.30000000000000004, 'n_changepoints': 35}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.026729855997590205
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.02052824214315092                                                                                                    
 20%|█████████▊                                       | 1/5 [00:01<00:07,  1.76s/trial, best loss: 0.02052824214315092]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.020528
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.017881947026712395                                                                                                   
 40%|███████████████████▏                            | 2/5 [00:05<00:09,  3.07s/trial, best loss: 0.017881947026712395]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.017882
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.09821415767074011                                                                                                    
 60%|████████████████████████████▊                   | 3/5 [00:57<00:50, 25.27s/trial, best loss: 0.017881947026712395]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.017882
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.03897143328234495                                                                                                    
 80%|██████████████████████████████████████▍         | 4/5 [01:48<00:35, 35.29s/trial, best loss: 0.017881947026712395]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.017882
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.017916345893343153                                                                                                   
100%|████████████████████████████████████████████████| 5/5 [01:53<00:00, 22.60s/trial, best loss: 0.017881947026712395]
{'changepoint_prior_scale': 0.2, 'holidays_prior_scale': 0.2, 'n_changepoints': 30}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.016392109137133935
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.009639574191335598                                                                                                   
 20%|█████████▌                                      | 1/5 [00:48<03:15, 48.80s/trial, best loss: 0.009639574191335598]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.009640
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.027084453329055475                                                                                                   
 40%|███████████████████▏                            | 2/5 [01:38<02:28, 49.38s/trial, best loss: 0.009639574191335598]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.009640
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.13510338305792968                                                                                                    
 60%|████████████████████████████▊                   | 3/5 [01:40<00:55, 27.80s/trial, best loss: 0.009639574191335598]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.009640
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.009639574191335598                                                                                                   
 80%|██████████████████████████████████████▍         | 4/5 [02:28<00:35, 35.57s/trial, best loss: 0.009639574191335598]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.009640
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.02123780547825793                                                                                                    
100%|████████████████████████████████████████████████| 5/5 [02:31<00:00, 30.30s/trial, best loss: 0.009639574191335598]
{'changepoint_prior_scale': 0.30000000000000004, 'holidays_prior_scale': 0.6, 'n_changepoints': 35}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.025418223187866296
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.022877780534841376                                                                                                   
 20%|█████████▌                                      | 1/5 [00:01<00:06,  1.60s/trial, best loss: 0.022877780534841376]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.022878
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.020667266383034413                                                                                                   
 40%|███████████████████▏                            | 2/5 [00:04<00:07,  2.44s/trial, best loss: 0.020667266383034413]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.020667
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.059526444770449015                                                                                                   
 60%|████████████████████████████▊                   | 3/5 [01:02<00:55, 27.93s/trial, best loss: 0.020667266383034413]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.020667
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.016827351564676806                                                                                                   
 80%|██████████████████████████████████████▍         | 4/5 [01:52<00:36, 36.54s/trial, best loss: 0.016827351564676806]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000000 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.016827
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.13841305465481804                                                                                                    
100%|████████████████████████████████████████████████| 5/5 [01:55<00:00, 23.07s/trial, best loss: 0.016827351564676806]
{'changepoint_prior_scale': 0.5000000000000001, 'holidays_prior_scale': 0.2, 'n_changepoints': 45}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.021003632912945356
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000999 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.01085824250191221                                                                                                    
 20%|█████████▊                                       | 1/5 [00:42<02:48, 42.16s/trial, best loss: 0.01085824250191221]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001994 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.010858
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.010173914117431461                                                                                                   
 40%|███████████████████▏                            | 2/5 [00:45<00:57, 19.32s/trial, best loss: 0.010173914117431461]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001999 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.010174
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.05822431020442232                                                                                                    
 60%|████████████████████████████▊                   | 3/5 [01:28<01:00, 30.17s/trial, best loss: 0.010173914117431461]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001997 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.010174
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.010173914117431461                                                                                                   
 80%|██████████████████████████████████████▍         | 4/5 [01:31<00:19, 19.56s/trial, best loss: 0.010173914117431461]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.002990 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.010174
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.017334615075615345                                                                                                   
100%|████████████████████████████████████████████████| 5/5 [01:33<00:00, 18.77s/trial, best loss: 0.010173914117431461]
{'changepoint_prior_scale': 0.5000000000000001, 'holidays_prior_scale': 0.2, 'n_changepoints': 25}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.014234230879097393
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000999 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.009961897035304779                                                                                                   
 20%|█████████▌                                      | 1/5 [00:02<00:10,  2.51s/trial, best loss: 0.009961897035304779]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001993 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.009962
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.017088406753338847                                                                                                   
 40%|███████████████████▏                            | 2/5 [00:03<00:05,  1.87s/trial, best loss: 0.009961897035304779]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001997 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.009962
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.028903080111887653                                                                                                   
 60%|████████████████████████████▊                   | 3/5 [00:52<00:46, 23.22s/trial, best loss: 0.009961897035304779]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001995 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.009962
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.0200625066130001                                                                                                     
 80%|██████████████████████████████████████▍         | 4/5 [01:36<00:31, 31.57s/trial, best loss: 0.009961897035304779]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001995 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.009962
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.023536329735691447                                                                                                   
100%|████████████████████████████████████████████████| 5/5 [02:12<00:00, 26.53s/trial, best loss: 0.009961897035304779]
{'changepoint_prior_scale': 0.6000000000000001, 'holidays_prior_scale': 0.2, 'n_changepoints': 20}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.03113582390786192
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001027 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.027792108570999466                                                                                                   
 20%|█████████▌                                      | 1/5 [00:39<02:39, 39.83s/trial, best loss: 0.027792108570999466]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001995 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.027792
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.026247681198214027                                                                                                   
 40%|███████████████████▏                            | 2/5 [01:22<02:04, 41.43s/trial, best loss: 0.026247681198214027]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000996 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.026248
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.027161897521405826                                                                                                   
 60%|████████████████████████████▊                   | 3/5 [01:26<00:49, 24.57s/trial, best loss: 0.026247681198214027]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001994 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.026248
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.03867692654972735                                                                                                    
 80%|██████████████████████████████████████▍         | 4/5 [01:28<00:15, 15.35s/trial, best loss: 0.026247681198214027]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.002991 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.026248
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.03552450533046369                                                                                                    
100%|████████████████████████████████████████████████| 5/5 [01:31<00:00, 18.36s/trial, best loss: 0.026247681198214027]
{'changepoint_prior_scale': 0.5000000000000001, 'holidays_prior_scale': 0.4, 'n_changepoints': 35}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.022624462791673298
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001022 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.026297193103226476                                                                                                   
 20%|█████████▌                                      | 1/5 [00:03<00:15,  3.94s/trial, best loss: 0.026297193103226476]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.002993 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.026297
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.03309722547200989                                                                                                    
 40%|███████████████████▏                            | 2/5 [00:51<01:29, 29.72s/trial, best loss: 0.026297193103226476]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001995 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.026297
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.03309722547200989                                                                                                    
 60%|████████████████████████████▊                   | 3/5 [01:39<01:15, 37.97s/trial, best loss: 0.026297193103226476]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001993 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.026297
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.021006618653542395                                                                                                   
 80%|██████████████████████████████████████▍         | 4/5 [01:44<00:24, 24.82s/trial, best loss: 0.021006618653542395]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000997 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.021007
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.03875631657509051                                                                                                    
100%|████████████████████████████████████████████████| 5/5 [01:45<00:00, 21.11s/trial, best loss: 0.021006618653542395]
{'changepoint_prior_scale': 0.2, 'holidays_prior_scale': 0.6, 'n_changepoints': 40}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Training Prophet


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score: 0.017588040183716507
  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001972 seconds
INFO:hyperopt.tpe:TPE using 0 trials
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.0107045219642729                                                                                                     
 20%|██████████                                        | 1/5 [00:42<02:51, 42.99s/trial, best loss: 0.0107045219642729]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.003024 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss 0.010705
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.008912998554966595                                                                                                   
 40%|███████████████████▏                            | 2/5 [01:32<02:19, 46.63s/trial, best loss: 0.008912998554966595]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001994 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss 0.008913
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.016921804672799527                                                                                                   
 60%|████████████████████████████▊                   | 3/5 [01:34<00:52, 26.18s/trial, best loss: 0.008912998554966595]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.001994 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss 0.008913
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


score:                                                                                                                 
0.018989121389867365                                                                                                   
 80%|██████████████████████████████████████▍         | 4/5 [01:36<00:16, 16.68s/trial, best loss: 0.008912998554966595]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.000997 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss 0.008913
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 37.


score:                                                                                                                 
0.012510491547732547                                                                                                   
100%|████████████████████████████████████████████████| 5/5 [02:24<00:00, 28.92s/trial, best loss: 0.008912998554966595]
{'changepoint_prior_scale': 0.4000000000000001, 'holidays_prior_scale': 0.4, 'n_changepoints': 45}


INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [17]:
predictions = {}
for key, value in models.items():    
    data = testStores[key]
    pred = value.transform(data)
    predictions[key] = pred

In [18]:
from functools import reduce
from pyspark.sql import DataFrame

def union_all(*dfs):
    return reduce(DataFrame.union, dfs)

In [19]:
predProphet = union_all(*predictions.values())

In [20]:
predProphet.show()

+-----+----+-----+------------------+------------------+
|store|year|month|             sales|        prediction|
+-----+----+-----+------------------+------------------+
| WI_2|2016|    1| 4.025018972282759|3.9069961338498205|
| WI_2|2016|    2|   3.9703933720796| 4.113730260156093|
| WI_2|2016|    3| 4.064120905829622| 4.020392316214241|
| WI_2|2016|    4| 4.080806804334363| 4.032772698458722|
| WI_2|2016|    5| 4.027145665774341| 3.981828016762505|
| WI_3|2016|    1|3.9421569284674907| 3.977281947338469|
| WI_3|2016|    2|3.9200189160289147|  4.00005461053498|
| WI_3|2016|    3|3.8987251815894934| 4.025028742333013|
| WI_3|2016|    4|3.8765642139838454| 4.006011367037505|
| WI_3|2016|    5|   3.8750612633917| 4.035203957707031|
| TX_2|2016|    1|3.8605176774617465| 3.744595002150639|
| TX_2|2016|    2| 3.890532791927745|  4.04413416078616|
| TX_2|2016|    3|3.9088601730172763|3.9528864544498346|
| TX_2|2016|    4| 3.850829959848531|3.8772934498686977|
| TX_2|2016|    5|3.82327855695

In [21]:
mape = MAPE(labelCol="sales", predictionCol="prediction")
score = mape.evaluate(predProphet)
print("Score: ", score)

Score:  0.018951990399497586


In [22]:
result = predProphet.toPandas()
result.to_csv("Prophetresult.csv",header=True, index=False)