|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8; -*- |
| 3 | +import copy |
| 4 | + |
| 5 | +# Copyright (c) 2023 Oracle and/or its affiliates. |
| 6 | +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ |
| 7 | + |
| 8 | +from ads.opctl.operator.lowcode.forecast.operator_config import * |
| 9 | +from ads.opctl.operator.lowcode.forecast.model.factory import ForecastOperatorModelFactory |
| 10 | +import pandas as pd |
| 11 | +from ads.opctl import logger |
| 12 | +import os |
| 13 | + |
| 14 | +if __name__ == '__main__': |
| 15 | + """Benchmarks for datasets.""" |
| 16 | + |
| 17 | + try: |
| 18 | + data_dir = os.environ["OCI__FORECASTING_DATA_DIR"] |
| 19 | + except: |
| 20 | + raise ValueError("Please set the environment variable `OCI__FORECASTING_DATA_DIR` to the location of the forecasting datasets") |
| 21 | + |
| 22 | + smape = SupportedMetrics.SMAPE |
| 23 | + mape = SupportedMetrics.MAPE |
| 24 | + rmse = SupportedMetrics.RMSE |
| 25 | + |
| 26 | + prophet = 'prophet' |
| 27 | + arima = 'arima' |
| 28 | + automlx = 'automlx' |
| 29 | + neuralprophet = 'neuralprophet' |
| 30 | + |
| 31 | + benchmark_metrics = [smape, mape, rmse] |
| 32 | + |
| 33 | + # Expected values |
| 34 | + cust1_numbers = { |
| 35 | + prophet: {smape: 30, mape: 10, rmse: 1780}, |
| 36 | + arima: {smape: 20, mape: 2, rmse: 1500}, |
| 37 | + automlx: {smape: 30, mape: 7, rmse: 1750}, |
| 38 | + # neuralprophet: {smape: 29, mape: 9.5, rmse: 1760}, |
| 39 | + } |
| 40 | + |
| 41 | + cust2_numbers = { |
| 42 | + prophet: {smape: 18, mape: 0.5, rmse: 75}, |
| 43 | + arima: {smape: 21, mape: 0.45, rmse: 75}, |
| 44 | + automlx: {smape: 15, mape: 0.3, rmse: 74}, |
| 45 | + # neuralprophet: {smape: 30, mape: 10, rmse: 1780}, |
| 46 | + } |
| 47 | + |
| 48 | + datasets = { |
| 49 | + 'cust1': cust1_numbers, |
| 50 | + 'cust2': cust2_numbers, |
| 51 | + } |
| 52 | + metrics = [SupportedMetrics.SMAPE, SupportedMetrics.MAPE, SupportedMetrics.RMSE] |
| 53 | + |
| 54 | + for dataset in datasets: |
| 55 | + for model in datasets[dataset]: |
| 56 | + operator_config: ForecastOperatorConfig = ForecastOperatorConfig.from_yaml( |
| 57 | + uri=os.path.join(data_dir, dataset, 'forecast.yaml') |
| 58 | + ) |
| 59 | + operator_config.spec.model = model |
| 60 | + operator_config.spec.output_directory = OutputDirectory( |
| 61 | + url=os.path.join(operator_config.spec.output_directory.url, model) |
| 62 | + ) |
| 63 | + |
| 64 | + # Training and generating the model outputs |
| 65 | + ForecastOperatorModelFactory.get_model(operator_config).generate_report() |
| 66 | + |
| 67 | + # Reading holdout erros. |
| 68 | + metrics_df = pd.read_csv(os.path.join(data_dir, dataset, 'output', model, 'metrics.csv')).set_index( |
| 69 | + 'metrics') |
| 70 | + metrics_dict = metrics_df.mean(axis=1).to_dict() |
| 71 | + logger.info("{} | {} | {}".format(dataset, model, metrics_dict)) |
| 72 | + # Actual values should be less than actual values |
| 73 | + for metric in benchmark_metrics: |
| 74 | + assert metrics_dict[metric] <= datasets[dataset][model][metric] |
| 75 | + logger.info("Test completed for {} and {} model".format(dataset, model)) |
0 commit comments