diff --git a/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py b/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py index 05c0dd606..29a832a7f 100644 --- a/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +++ b/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py @@ -41,6 +41,7 @@ def set_kwargs(self): model_kwargs["uppper_quantile"] = uppper_quantile return model_kwargs + def preprocess(self, df, series_id): pass @@ -53,54 +54,70 @@ def preprocess(self, df, series_id): err_msg="lightgbm is not installed, please install it with 'pip install lightgbm'", ) def _train_model(self, data_train, data_test, model_kwargs): + import lightgbm as lgb + from mlforecast import MLForecast + from mlforecast.lag_transforms import ExpandingMean, RollingMean + from mlforecast.target_transforms import Differences + + def set_model_config(freq): + seasonal_map = { + "H": 24, + "D": 7, + "W": 52, + "M": 12, + "Q": 4, + } + sp = seasonal_map.get(freq.upper(), 7) + series_lengths = data_train.groupby(ForecastOutputColumns.SERIES).size() + min_len = series_lengths.min() + max_allowed = min_len - sp + + default_lags = [lag for lag in [1, sp, 2 * sp] if lag <= max_allowed] + lags = model_kwargs.get("lags", default_lags) + + default_roll = 2 * sp + roll = model_kwargs.get("RollingMean", default_roll) + + default_diff = sp if sp <= max_allowed else None + diff = model_kwargs.get("Differences", default_diff) + + return { + "target_transforms": [Differences([diff])], + "lags": lags, + "lag_transforms": { + 1: [ExpandingMean()], + sp: [RollingMean(window_size=roll, min_samples=1)] + } + } + try: - import lightgbm as lgb - from mlforecast import MLForecast - from mlforecast.lag_transforms import ExpandingMean, RollingMean - from mlforecast.target_transforms import Differences lgb_params = { "verbosity": model_kwargs.get("verbosity", -1), "num_leaves": model_kwargs.get("num_leaves", 512), } - additional_data_params = {} - if len(self.datasets.get_additional_data_column_names()) > 0: - additional_data_params = { - "target_transforms": [ - Differences([model_kwargs.get("Differences", 12)]) - ], - "lags": model_kwargs.get("lags", [1, 6, 12]), - "lag_transforms": ( - { - 1: [ExpandingMean()], - 12: [ - RollingMean( - window_size=model_kwargs.get("RollingMean", 24), - min_samples=1, - ) - ], - } - ), - } + + data_freq = pd.infer_freq(data_train[self.date_col].drop_duplicates()) \ + or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:]) + + additional_data_params = set_model_config(data_freq) fcst = MLForecast( models={ "forecast": lgb.LGBMRegressor(**lgb_params), - # "p" + str(int(model_kwargs["uppper_quantile"] * 100)) "upper": lgb.LGBMRegressor( **lgb_params, objective="quantile", alpha=model_kwargs["uppper_quantile"], ), - # "p" + str(int(model_kwargs["lower_quantile"] * 100)) "lower": lgb.LGBMRegressor( **lgb_params, objective="quantile", alpha=model_kwargs["lower_quantile"], ), }, - freq=pd.infer_freq(data_train[self.date_col].drop_duplicates()) - or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:]), + freq=data_freq, + date_features=['year', 'month', 'day', 'dayofweek', 'dayofyear'], **additional_data_params, ) @@ -158,6 +175,7 @@ def _train_model(self, data_train, data_test, model_kwargs): self.model_parameters[s_id] = { "framework": SupportedModels.LGBForecast, **lgb_params, + **fcst.models_['forecast'].get_params(), } logger.debug("===========Done===========") @@ -191,48 +209,21 @@ def _generate_report(self): Generates the report for the model """ import report_creator as rc - from utilsforecast.plotting import plot_series logging.getLogger("report_creator").setLevel(logging.WARNING) - # Section 1: Forecast Overview - sec1_text = rc.Block( - rc.Heading("Forecast Overview", level=2), - rc.Text( - "These plots show your forecast in the context of historical data." - ), - ) - sec_1 = _select_plot_list( - lambda s_id: plot_series( - self.datasets.get_all_data_long(include_horizon=False), - pd.concat( - [self.fitted_values, self.outputs], axis=0, ignore_index=True - ), - id_col=ForecastOutputColumns.SERIES, - time_col=self.spec.datetime_column.name, - target_col=self.original_target_column, - seed=42, - ids=[s_id], - ), - self.datasets.list_series_ids(), - ) - # Section 2: LGBForecast Model Parameters sec2_text = rc.Block( rc.Heading("LGBForecast Model Parameters", level=2), rc.Text("These are the parameters used for the LGBForecast model."), ) - blocks = [ - rc.Html( - str(s_id[1]), - label=s_id[0], - ) - for _, s_id in enumerate(self.model_parameters.items()) - ] - sec_2 = rc.Select(blocks=blocks) + k, v = next(iter(self.model_parameters.items())) + sec_2 = rc.Html( + pd.DataFrame(list(v.items())).to_html(index=False, header=False), + ) - all_sections = [sec1_text, sec_1, sec2_text, sec_2] + all_sections = [sec2_text, sec_2] model_description = rc.Text( "LGBForecast uses mlforecast framework to perform time series forecasting using machine learning models" "with the option to scale to massive amounts of data using remote clusters." diff --git a/ads/opctl/operator/lowcode/forecast/schema.yaml b/ads/opctl/operator/lowcode/forecast/schema.yaml index fe7c90df5..45690aa57 100644 --- a/ads/opctl/operator/lowcode/forecast/schema.yaml +++ b/ads/opctl/operator/lowcode/forecast/schema.yaml @@ -455,7 +455,7 @@ spec: - prophet - arima - neuralprophet - # - lgbforecast + - lgbforecast - automlx - autots - auto-select diff --git a/pyproject.toml b/pyproject.toml index 9dc811e1d..bf8e3e674 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -177,6 +177,7 @@ forecast = [ "autots", "mlforecast", "neuralprophet>=0.7.0", + "pytorch-lightning==2.5.5", "numpy<2.0.0", "oci-cli", "optuna", diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index 8460bbea7..aeb5daa66 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -32,7 +32,7 @@ "prophet", "neuralprophet", "autots", - # "lgbforecast", + "lgbforecast", "auto-select", "auto-select-series", ] @@ -177,7 +177,7 @@ def test_load_datasets(model, data_details): subprocess.run(f"ls -a {output_data_path}", shell=True) if yaml_i["spec"]["generate_explanations"] and model not in [ "automlx", - # "lgbforecast", + "lgbforecast", "auto-select", ]: verify_explanations( diff --git a/tests/operators/forecast/test_errors.py b/tests/operators/forecast/test_errors.py index b102d7127..2d69dce9e 100644 --- a/tests/operators/forecast/test_errors.py +++ b/tests/operators/forecast/test_errors.py @@ -143,7 +143,7 @@ "prophet", "neuralprophet", "autots", - # "lgbforecast", + "lgbforecast", ] TEMPLATE_YAML = { @@ -415,8 +415,8 @@ def test_0_series(operator_setup, model): "local_explanation.csv", "global_explanation.csv", ] - if model == "autots": - # explanations are not supported for autots + if model in ["autots", "lgbforecast"]: + # explanations are not supported for autots or lgbforecast output_files.remove("local_explanation.csv") output_files.remove("global_explanation.csv") for file in output_files: @@ -709,7 +709,7 @@ def test_arima_automlx_errors(operator_setup, model): in error_content["13"]["model_fitting"]["error"] ), f"Error message mismatch: {error_content}" - if model not in ["autots", "automlx"]: # , "lgbforecast" + if model not in ["autots", "automlx", "lgbforecast"]: if yaml_i["spec"].get("explanations_accuracy_mode") != "AUTOMLX": global_fn = f"{tmpdirname}/results/global_explanation.csv" assert os.path.exists( @@ -816,7 +816,7 @@ def test_date_format(operator_setup, model): @pytest.mark.parametrize("model", MODELS) def test_what_if_analysis(operator_setup, model): os.environ["TEST_MODE"] = "True" - if model == "auto-select": + if model in ["auto-select", "lgbforecast"]: pytest.skip("Skipping what-if scenario for auto-select") tmpdirname = operator_setup historical_data_path, additional_data_path = setup_small_rossman()