Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions ads/opctl/operator/lowcode/forecast/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,13 +291,15 @@ def _test_evaluate_metrics(
)

for idx, col in enumerate(target_columns):
y_true = np.asarray(data[col])
y_pred = np.asarray(outputs[idx][target_col][-len(y_true) :])
# Only columns present in test file will be used to generate holdout error
if col in data:
y_true = np.asarray(data[col])
y_pred = np.asarray(outputs[idx][target_col][-len(y_true) :])

metrics_df = utils._build_metrics_df(
y_true=y_true, y_pred=y_pred, column_name=col
)
total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
metrics_df = utils._build_metrics_df(
y_true=y_true, y_pred=y_pred, column_name=col
)
total_metrics = pd.concat([total_metrics, metrics_df], axis=1)

summary_metrics = pd.DataFrame(
{
Expand Down Expand Up @@ -338,7 +340,8 @@ def _test_evaluate_metrics(

"""Calculates Mean sMAPE, Median sMAPE, Mean MAPE, Median MAPE, Mean wMAPE, Median wMAPE values for each horizon
if horizon <= 10."""
if len(data["ds"]) <= 10:
target_columns_in_output = set(target_columns).intersection(data.columns)
if len(data["ds"]) <= 10 and len(outputs) == len(target_columns_in_output):
metrics_per_horizon = utils._build_metrics_per_horizon(
data=data,
outputs=outputs,
Expand Down Expand Up @@ -404,11 +407,11 @@ def _save_report(

# metrics csv report
utils._write_data(
data=metrics_df,
data=metrics_df.rename_axis('metrics').reset_index(),
filename=os.path.join(output_dir, self.spec.metrics_filename),
format="csv",
storage_options=default_signer(),
index=True,
index=False,
)

logger.warn(
Expand Down
2 changes: 1 addition & 1 deletion ads/opctl/operator/lowcode/forecast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def plot_forecast_plotly(idx, col):
),
]
)
if test_data is not None:
if test_data is not None and col in test_data:
fig.add_trace(
go.Scatter(
x=test_data["ds"],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python
# -*- coding: utf-8; -*-
import copy

# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

from ads.opctl.operator.lowcode.forecast.operator_config import *
from ads.opctl.operator.lowcode.forecast.model.factory import ForecastOperatorModelFactory
import pandas as pd
from ads.opctl import logger
import os

if __name__ == '__main__':
"""Benchmarks for datasets."""

try:
data_dir = os.environ["OCI__FORECASTING_DATA_DIR"]
except:
raise ValueError("Please set the environment variable `OCI__FORECASTING_DATA_DIR` to the location of the forecasting datasets")

smape = SupportedMetrics.SMAPE
mape = SupportedMetrics.MAPE
rmse = SupportedMetrics.RMSE

prophet = 'prophet'
arima = 'arima'
automlx = 'automlx'
neuralprophet = 'neuralprophet'

benchmark_metrics = [smape, mape, rmse]

# Expected values
cust1_numbers = {
prophet: {smape: 30, mape: 10, rmse: 1780},
arima: {smape: 20, mape: 2, rmse: 1500},
automlx: {smape: 30, mape: 7, rmse: 1750},
# neuralprophet: {smape: 29, mape: 9.5, rmse: 1760},
}

cust2_numbers = {
prophet: {smape: 18, mape: 0.5, rmse: 75},
arima: {smape: 21, mape: 0.45, rmse: 75},
automlx: {smape: 15, mape: 0.3, rmse: 74},
# neuralprophet: {smape: 30, mape: 10, rmse: 1780},
}

datasets = {
'cust1': cust1_numbers,
'cust2': cust2_numbers,
}
metrics = [SupportedMetrics.SMAPE, SupportedMetrics.MAPE, SupportedMetrics.RMSE]

for dataset in datasets:
for model in datasets[dataset]:
operator_config: ForecastOperatorConfig = ForecastOperatorConfig.from_yaml(
uri=os.path.join(data_dir, dataset, 'forecast.yaml')
)
operator_config.spec.model = model
operator_config.spec.output_directory = OutputDirectory(
url=os.path.join(operator_config.spec.output_directory.url, model)
)

# Training and generating the model outputs
ForecastOperatorModelFactory.get_model(operator_config).generate_report()

# Reading holdout erros.
metrics_df = pd.read_csv(os.path.join(data_dir, dataset, 'output', model, 'metrics.csv')).set_index(
'metrics')
metrics_dict = metrics_df.mean(axis=1).to_dict()
logger.info("{} | {} | {}".format(dataset, model, metrics_dict))
# Actual values should be less than actual values
for metric in benchmark_metrics:
assert metrics_dict[metric] <= datasets[dataset][model][metric]
logger.info("Test completed for {} and {} model".format(dataset, model))