# WACS prophet model

In [100]:
%matplotlib inline

2022-08-04 07:39:44,099 - [DEBUG] - matplotlib.pyplot - (pyplot.py).switch_backend(301) - Loaded backend module://matplotlib_inline.backend_inline version unknown.


In [101]:
import json
import pandas as pd
from prophet import Prophet, serialize
from prophet.diagnostics import cross_validation, performance_metrics
import mlflow
import sys

import logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
                    format=f"%(asctime)s - [%(levelname)s] - %(name)s - (%(filename)s).%(funcName)s(%(lineno)d) - %(message)s")
logger = logging.getLogger(__name__)

ARTIFACT_PATH = "model"
tracking_server_uri = "http://138.68.70.41:5000"  # set to your server URI


In [102]:
try:
    mlflow.set_tracking_uri(tracking_server_uri)
except:
    logger.debbug("""Couldn't connect to remote MLFLOW tracking server""")

In [103]:
def extract_params(pr_model):
    return {attr: getattr(pr_model, attr) for attr in serialize.SIMPLE_ATTRIBUTES}

In [130]:
def train(df, settings):

    # Init prophet model

    m = Prophet(
        growth=settings["growth"],
        seasonality_mode=settings["seasonality_mode"],
        changepoint_prior_scale=settings['changepoint_prior_scale'],
        seasonality_prior_scale=settings['seasonality_prior_scale'],
        daily_seasonality=settings['daily_seasonality'],
        weekly_seasonality=settings['weekly_seasonality'],
        yearly_seasonality=settings['yearly_seasonality']
    )

    for season in settings['seasonality']:
        m.add_seasonality(
            name=season['name'],
            period=season['period'],
            fourier_order=season['fourier_order']
        )

    with mlflow.start_run():

        model = m.fit(df)  
        params = extract_params(model)

        metric_keys = ["mse", "rmse", "mae", "mape", "mdape", "smape", "coverage"]
        
        cross_validation_params = settings.get('cross_validation')
        cross_validation_enable = settings.get('cross_validation_enabled')

#       if cross_validation_params and cross_validation_enable:
        metrics_raw = cross_validation(
                model=model,
                horizon=cross_validation_params.get('horizon'),  # "365",
                period=cross_validation_params.get('period'),  # "180",
                initial=cross_validation_params.get('initial'),  # "710",
                parallel=cross_validation_params.get(
                    'parallel'),  # "threads",
                disable_tqdm=cross_validation_params.get(
                    'disable_tqdm')
        )

        cv_metrics = performance_metrics(metrics_raw)
        metrics = {k: cv_metrics[k].mean() for k in metric_keys}

        logger.debug(
            f"Logged Metrics: \n{json.dumps(metrics, indent=2)}")
        logger.debug(
            f"Logged Params: \n{json.dumps(params, indent=2)}")

        mlflow.prophet.log_model(model, artifact_path=ARTIFACT_PATH)
        mlflow.log_metrics(metrics)
        mlflow.log_params(params)

        model_uri = mlflow.get_artifact_uri(ARTIFACT_PATH)

        logger.debug(f"Model artifact logged to: {model_uri}")


In [131]:
# model settings
settings = {
        "growth": "linear", 
        "seasonality_mode": "multiplicative", 
        "changepoint_prior_scale": 30,
        "seasonality_prior_scale": 35,
        "interval_width": 0.98,
        "daily_seasonality": "auto",
        "weekly_seasonality": "auto",
        "yearly_seasonality": False, 
        "seasonality": [{"name": "hour","period": 0.417, "fourier_order": 5}], 
        "cross_validation":{
            "horizon":"12 hours", 
            "period":"2 hours", 
            "initial":"24 hours",
            "parallel":"threads",
            "disable_tqdm":True
        }
    }

In [132]:
SOURCE_DATA = (
    "https://raw.githubusercontent.com/rkrikbaev/model-training/master/jupyter/project/fp_archives.csv"
)
# SOURCE_DATA = (
#     "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_retail_sales.csv"
# )

df = pd.read_csv(SOURCE_DATA, sep=';')
df.tail(2)

Unnamed: 0,ds,y
2878,2022-02-10 12:25,78.472451
2879,2022-02-10 12:26,78.555164


In [133]:
train(df, settings)

2022-08-04 07:43:15,513 - [DEBUG] - urllib3.connectionpool - (connectionpool.py)._new_conn(228) - Starting new HTTP connection (1): 138.68.70.41:5000
2022-08-04 07:43:15,539 - [DEBUG] - urllib3.connectionpool - (connectionpool.py)._make_request(456) - http://138.68.70.41:5000 "POST /api/2.0/mlflow/runs/create HTTP/1.1" 200 750
2022-08-04 07:43:15,561 - [INFO] - prophet - (forecaster.py).parse_seasonality_args(921) - Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
2022-08-04 07:43:15,566 - [INFO] - prophet - (forecaster.py).parse_seasonality_args(921) - Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
2022-08-04 07:43:15,611 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/pm7r1huu.json
2022-08-04 07:43:15,640 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/j8xekz0h.json
2022-08-04 07:43:15,643 - [DEBUG] - cmdstanpy - (model.py)._run_cmd

07:43:15 - cmdstanpy - INFO - Chain [1] start processing


2022-08-04 07:43:15,651 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1568) - Chain [1] start processing


07:43:23 - cmdstanpy - INFO - Chain [1] done processing


2022-08-04 07:43:23,187 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1608) - Chain [1] done processing
2022-08-04 07:43:23,228 - [INFO] - prophet - (diagnostics.py).generate_cutoffs(55) - Making 6 forecasts with cutoffs between 2022-02-09 14:26:00 and 2022-02-10 00:26:00
2022-08-04 07:43:23,229 - [INFO] - prophet - (diagnostics.py).cross_validation(192) - Applying in parallel with <concurrent.futures.thread.ThreadPoolExecutor object at 0x7f68735d1250>
2022-08-04 07:43:23,380 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/fkfucc3p.json
2022-08-04 07:43:23,405 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/l76gpvxt.json
2022-08-04 07:43:23,424 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1552) - idx 0
2022-08-04 07:43:23,460 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1553) - running CmdStan, num_threads: None
2022-08-04 07:43:23,463 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1565) - CmdStan args: [

07:43:23 - cmdstanpy - INFO - Chain [1] start processing


2022-08-04 07:43:23,445 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/lfnhd3g6.json
2022-08-04 07:43:23,468 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1568) - Chain [1] start processing
2022-08-04 07:43:23,527 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/zpgw_b8w.json
2022-08-04 07:43:23,543 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/vzymoj2o.json
2022-08-04 07:43:23,543 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/sbqusy5z.json
2022-08-04 07:43:23,566 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/k5f4f4vm.json
2022-08-04 07:43:23,588 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1552) - idx 0
2022-08-04 07:43:23,605 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1553) - running CmdStan, num_threads: None
2022-08-04 07:43:23,609 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1565) 

07:43:23 - cmdstanpy - INFO - Chain [1] start processing


2022-08-04 07:43:23,569 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/23ycb6o2.json
2022-08-04 07:43:23,610 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1568) - Chain [1] start processing
2022-08-04 07:43:23,647 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/l6kjqqp0.json
2022-08-04 07:43:23,670 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1552) - idx 0
2022-08-04 07:43:23,670 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/bi6yqcqj.json
2022-08-04 07:43:23,674 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1552) - idx 0
2022-08-04 07:43:23,675 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1553) - running CmdStan, num_threads: None
2022-08-04 07:43:23,676 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1565) - CmdStan args: ['/usr/local/lib/python3.8/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=96946', 'data', 'file=/tmp/tmp0qlloh2o/zpgw_b8w.

07:43:23 - cmdstanpy - INFO - Chain [1] start processing


2022-08-04 07:43:23,672 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1553) - running CmdStan, num_threads: None
2022-08-04 07:43:23,678 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1565) - CmdStan args: ['/usr/local/lib/python3.8/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=59090', 'data', 'file=/tmp/tmp0qlloh2o/sbqusy5z.json', 'init=/tmp/tmp0qlloh2o/l6kjqqp0.json', 'output', 'file=/tmp/tmp_agaka9s/prophet_model-20220804074323.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']


07:43:23 - cmdstanpy - INFO - Chain [1] start processing


2022-08-04 07:43:23,670 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/0u4cp2on.json
2022-08-04 07:43:23,680 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1552) - idx 0
2022-08-04 07:43:23,681 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1553) - running CmdStan, num_threads: None
2022-08-04 07:43:23,682 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1565) - CmdStan args: ['/usr/local/lib/python3.8/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=42365', 'data', 'file=/tmp/tmp0qlloh2o/vzymoj2o.json', 'init=/tmp/tmp0qlloh2o/0u4cp2on.json', 'output', 'file=/tmp/tmpn2d_dgwr/prophet_model-20220804074323.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']


07:43:23 - cmdstanpy - INFO - Chain [1] start processing


2022-08-04 07:43:23,678 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1568) - Chain [1] start processing
2022-08-04 07:43:23,676 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1568) - Chain [1] start processing
2022-08-04 07:43:23,685 - [DEBUG] - cmdstanpy - (utils.py).__init__(1448) - input tempfile: /tmp/tmp0qlloh2o/ptgdtau7.json
2022-08-04 07:43:23,683 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1568) - Chain [1] start processing
2022-08-04 07:43:23,721 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1552) - idx 0
2022-08-04 07:43:23,736 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1553) - running CmdStan, num_threads: None
2022-08-04 07:43:23,751 - [DEBUG] - cmdstanpy - (model.py)._run_cmdstan(1565) - CmdStan args: ['/usr/local/lib/python3.8/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=96324', 'data', 'file=/tmp/tmp0qlloh2o/23ycb6o2.json', 'init=/tmp/tmp0qlloh2o/ptgdtau7.json', 'output', 'file=/tmp/tmpwv63uj7y/prophet_model-20220804074323.csv', 'metho

07:43:23 - cmdstanpy - INFO - Chain [1] start processing


2022-08-04 07:43:23,752 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1568) - Chain [1] start processing


07:43:33 - cmdstanpy - INFO - Chain [1] done processing


2022-08-04 07:43:33,468 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1608) - Chain [1] done processing


07:43:36 - cmdstanpy - INFO - Chain [1] done processing


2022-08-04 07:43:36,736 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1608) - Chain [1] done processing


07:43:38 - cmdstanpy - INFO - Chain [1] done processing


2022-08-04 07:43:38,952 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1608) - Chain [1] done processing


07:43:40 - cmdstanpy - INFO - Chain [1] done processing


2022-08-04 07:43:40,335 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1608) - Chain [1] done processing


07:43:41 - cmdstanpy - INFO - Chain [1] done processing


2022-08-04 07:43:41,317 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1608) - Chain [1] done processing


07:43:42 - cmdstanpy - INFO - Chain [1] done processing


2022-08-04 07:43:42,187 - [INFO] - cmdstanpy - (model.py)._run_cmdstan(1608) - Chain [1] done processing
2022-08-04 07:44:11,035 - [DEBUG] - __main__ - (4248716220.py).train(48) - Logged Metrics: 
{
  "mse": 9351.185930939262,
  "rmse": 90.25531279098,
  "mae": 65.86038899234282,
  "mape": 4.481336152927343,
  "mdape": 3.495724374685615,
  "smape": 1.3049227672608839,
  "coverage": 0.8897306397306398
}
2022-08-04 07:44:11,037 - [DEBUG] - __main__ - (4248716220.py).train(50) - Logged Params: 
{
  "growth": "linear",
  "n_changepoints": 25,
  "specified_changepoints": false,
  "changepoint_range": 0.8,
  "yearly_seasonality": false,
  "weekly_seasonality": "auto",
  "daily_seasonality": "auto",
  "seasonality_mode": "multiplicative",
  "seasonality_prior_scale": 35.0,
  "changepoint_prior_scale": 30.0,
  "holidays_prior_scale": 10.0,
  "mcmc_samples": 0,
  "interval_width": 0.8,
  "uncertainty_samples": 1000,
  "y_scale": 85.88370658790792,
  "logistic_floor": false,
  "country_holidays"