From c299c08924e9f62841f4670763a2bfbf87e396d4 Mon Sep 17 00:00:00 2001 From: Prashant Sankhla Date: Tue, 19 Sep 2023 13:57:18 +0530 Subject: [PATCH 1/5] fixed bugs that is causing errors when series are missing in test dataset --- .../lowcode/forecast/model/base_model.py | 21 +++++++++++-------- ads/opctl/operator/lowcode/forecast/utils.py | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index 87bb42f9a..220963ace 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -291,13 +291,15 @@ def _test_evaluate_metrics( ) for idx, col in enumerate(target_columns): - y_true = np.asarray(data[col]) - y_pred = np.asarray(outputs[idx][target_col][-len(y_true) :]) + # Only columns present in test file will be used to generate holdout error + if col in data: + y_true = np.asarray(data[col]) + y_pred = np.asarray(outputs[idx][target_col][-len(y_true) :]) - metrics_df = utils._build_metrics_df( - y_true=y_true, y_pred=y_pred, column_name=col - ) - total_metrics = pd.concat([total_metrics, metrics_df], axis=1) + metrics_df = utils._build_metrics_df( + y_true=y_true, y_pred=y_pred, column_name=col + ) + total_metrics = pd.concat([total_metrics, metrics_df], axis=1) summary_metrics = pd.DataFrame( { @@ -338,7 +340,8 @@ def _test_evaluate_metrics( """Calculates Mean sMAPE, Median sMAPE, Mean MAPE, Median MAPE, Mean wMAPE, Median wMAPE values for each horizon if horizon <= 10.""" - if len(data["ds"]) <= 10: + target_columns_in_output = set(target_columns).intersection(data.columns) + if len(data["ds"]) <= 10 and len(outputs) == len(target_columns_in_output): metrics_per_horizon = utils._build_metrics_per_horizon( data=data, outputs=outputs, @@ -404,11 +407,11 @@ def _save_report( # metrics csv report utils._write_data( - data=metrics_df, + data=metrics_df.rename_axis('metrics').reset_index(), filename=os.path.join(output_dir, self.spec.metrics_filename), format="csv", storage_options=default_signer(), - index=True, + index=False, ) logger.warn( diff --git a/ads/opctl/operator/lowcode/forecast/utils.py b/ads/opctl/operator/lowcode/forecast/utils.py index a5e24270d..d88499c4c 100644 --- a/ads/opctl/operator/lowcode/forecast/utils.py +++ b/ads/opctl/operator/lowcode/forecast/utils.py @@ -374,7 +374,7 @@ def plot_forecast_plotly(idx, col): ), ] ) - if test_data is not None: + if test_data is not None and col in test_data: fig.add_trace( go.Scatter( x=test_data["ds"], From 484cc248e6a853003f4deb6d84f2f7e3a1c7f53c Mon Sep 17 00:00:00 2001 From: Prashant Sankhla Date: Tue, 19 Sep 2023 16:47:31 +0530 Subject: [PATCH 2/5] initial benchmarks --- .../forecast/benchmarks/benchmark_datasets.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py diff --git a/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py new file mode 100755 index 000000000..ca9d46190 --- /dev/null +++ b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: utf-8; -*- +import copy + +# Copyright (c) 2023 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +from ads.opctl.operator.lowcode.forecast.operator_config import * +from ads.opctl.operator.lowcode.forecast.model.factory import ForecastOperatorModelFactory +import pandas as pd +from ads.opctl import logger + +if __name__ == '__main__': + """Benchmarks for datasets.""" + + data_dir = "oci://ads_preview_sdk@ociodscdev/Forecasting/data/" + smape = SupportedMetrics.SMAPE + mape = SupportedMetrics.MAPE + rmse = SupportedMetrics.RMSE + + prophet = 'prophet' + arima = 'arima' + automlx = 'automlx' + neuralprophet = 'neuralprophet' + + benchmark_metrics = [smape, mape, rmse] + + # Expected values + ppg_sales_benchmark_numbers = { + prophet: {smape: 30, mape: 10, rmse: 1780}, + arima: {smape: 20, mape: 2, rmse: 1500}, + automlx: {smape: 25, mape: 6, rmse: 1530}, + # neuralprophet: {smape: 29, mape: 9.5, rmse: 1760}, + } + + ttx_small_benchmark_numbers = { + prophet: {smape: 18, mape: 0.5, rmse: 75}, + arima: {smape: 21, mape: 0.45, rmse: 75}, + automlx: {smape: 15, mape: 0.3, rmse: 74}, + # neuralprophet: {smape: 30, mape: 10, rmse: 1780}, + } + + datasets = { + 'EPM-PPG-CODE-SALES': ppg_sales_benchmark_numbers, + 'TTX-small': ttx_small_benchmark_numbers + } + metrics = [SupportedMetrics.SMAPE, SupportedMetrics.MAPE, SupportedMetrics.RMSE] + + for dataset in datasets: + for model in datasets[dataset]: + operator_config: ForecastOperatorConfig = ForecastOperatorConfig.from_yaml( + uri='{}/{}/forecast.yaml'.format(data_dir, dataset) + ) + operator_config.spec.model = model + operator_config.spec.output_directory = OutputDirectory( + url="{}/{}".format(operator_config.spec.output_directory.url, model)) + + # Training and generating the model outputs + ForecastOperatorModelFactory.get_model(operator_config).generate_report() + + # Reading holdout erros. + metrics_df = pd.read_csv('{}/{}/output/{}/metrics.csv'.format(data_dir, dataset, model)).set_index( + 'metrics') + metrics_dict = metrics_df.mean(axis=1).to_dict() + logger.info("{} | {} | {}".format(dataset, model, metrics_dict)) + # Actual values should be less than actual values + for metric in benchmark_metrics: + assert metrics_dict[metric] <= datasets[dataset][model][metric] + logger.info("Test completed for {} and {} model".format(dataset, model)) From b028fb6eb89741c1c8b2f61f8fb8102b23f36d02 Mon Sep 17 00:00:00 2001 From: Allen Hosler Date: Wed, 20 Sep 2023 11:01:49 +0100 Subject: [PATCH 3/5] Update benchmark_datasets.py --- .../forecast/benchmarks/benchmark_datasets.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py index ca9d46190..dcabb802b 100755 --- a/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py +++ b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py @@ -13,7 +13,11 @@ if __name__ == '__main__': """Benchmarks for datasets.""" - data_dir = "oci://ads_preview_sdk@ociodscdev/Forecasting/data/" + try: + data_dir = os.environ["OCI__FORECASTING_DATA_DIR"] + except: + raise ValueError("Please set the environment variable `OCI__FORECASTING_DATA_DIR` to the location of the forecasting datasets") + smape = SupportedMetrics.SMAPE mape = SupportedMetrics.MAPE rmse = SupportedMetrics.RMSE @@ -26,14 +30,14 @@ benchmark_metrics = [smape, mape, rmse] # Expected values - ppg_sales_benchmark_numbers = { + cust1_numbers = { prophet: {smape: 30, mape: 10, rmse: 1780}, arima: {smape: 20, mape: 2, rmse: 1500}, automlx: {smape: 25, mape: 6, rmse: 1530}, # neuralprophet: {smape: 29, mape: 9.5, rmse: 1760}, } - ttx_small_benchmark_numbers = { + cust2_numbers = { prophet: {smape: 18, mape: 0.5, rmse: 75}, arima: {smape: 21, mape: 0.45, rmse: 75}, automlx: {smape: 15, mape: 0.3, rmse: 74}, @@ -41,8 +45,8 @@ } datasets = { - 'EPM-PPG-CODE-SALES': ppg_sales_benchmark_numbers, - 'TTX-small': ttx_small_benchmark_numbers + 'cust1': cust1_numbers, + 'cust2': cust2_numbers, } metrics = [SupportedMetrics.SMAPE, SupportedMetrics.MAPE, SupportedMetrics.RMSE] From b1b836b88ab6dad266ff2db618dc271fafbdb2e8 Mon Sep 17 00:00:00 2001 From: Allen Hosler Date: Wed, 20 Sep 2023 11:14:38 +0100 Subject: [PATCH 4/5] Update benchmark_datasets.py --- .../operator/forecast/benchmarks/benchmark_datasets.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py index dcabb802b..411887f6c 100755 --- a/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py +++ b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py @@ -9,6 +9,7 @@ from ads.opctl.operator.lowcode.forecast.model.factory import ForecastOperatorModelFactory import pandas as pd from ads.opctl import logger +import os if __name__ == '__main__': """Benchmarks for datasets.""" @@ -53,17 +54,18 @@ for dataset in datasets: for model in datasets[dataset]: operator_config: ForecastOperatorConfig = ForecastOperatorConfig.from_yaml( - uri='{}/{}/forecast.yaml'.format(data_dir, dataset) + uri=os.path.join(data_dir, dataset, 'forecast.yaml') ) operator_config.spec.model = model operator_config.spec.output_directory = OutputDirectory( - url="{}/{}".format(operator_config.spec.output_directory.url, model)) + url=os.path.join(operator_config.spec.output_directory.url, model) + ) # Training and generating the model outputs ForecastOperatorModelFactory.get_model(operator_config).generate_report() # Reading holdout erros. - metrics_df = pd.read_csv('{}/{}/output/{}/metrics.csv'.format(data_dir, dataset, model)).set_index( + metrics_df = pd.read_csv(os.path.join(data_dir, dataset, 'output', model, 'metrics.csv')).set_index( 'metrics') metrics_dict = metrics_df.mean(axis=1).to_dict() logger.info("{} | {} | {}".format(dataset, model, metrics_dict)) From a513d7bbcd45122e87159ba7fbb4eb186828e096 Mon Sep 17 00:00:00 2001 From: Allen Hosler Date: Wed, 20 Sep 2023 12:04:25 +0100 Subject: [PATCH 5/5] adding 15% margin to automlx numbers --- .../operator/forecast/benchmarks/benchmark_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py index 411887f6c..14912d4dc 100755 --- a/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py +++ b/tests/unitary/with_extras/operator/forecast/benchmarks/benchmark_datasets.py @@ -34,7 +34,7 @@ cust1_numbers = { prophet: {smape: 30, mape: 10, rmse: 1780}, arima: {smape: 20, mape: 2, rmse: 1500}, - automlx: {smape: 25, mape: 6, rmse: 1530}, + automlx: {smape: 30, mape: 7, rmse: 1750}, # neuralprophet: {smape: 29, mape: 9.5, rmse: 1760}, }