diff --git a/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py b/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py index c63a2fe23..5a2a7c611 100644 --- a/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +++ b/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py @@ -86,7 +86,10 @@ def create_horizon(self, spec, historical_data): pd.date_range( start=historical_data.get_max_time(), periods=spec.horizon + 1, - freq=historical_data.freq, + freq=historical_data.freq + or pd.infer_freq( + historical_data.data.reset_index()[spec.datetime_column.name][-5:] + ), ), name=spec.datetime_column.name, ) diff --git a/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py b/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py index f36d2e23f..6ae69b285 100644 --- a/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +++ b/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py @@ -73,20 +73,35 @@ def _train_model(self, data_train, data_test, model_kwargs): alpha=model_kwargs["lower_quantile"], ), }, - freq=pd.infer_freq(data_train.Date.drop_duplicates()), + freq=pd.infer_freq(data_train["Date"].drop_duplicates()) + or pd.infer_freq(data_train["Date"].drop_duplicates()[-5:]), target_transforms=[Differences([12])], - lags=model_kwargs.get("lags", [1, 6, 12]), - lag_transforms={ - 1: [ExpandingMean()], - 12: [RollingMean(window_size=24)], - }, + lags=model_kwargs.get( + "lags", + ( + [1, 6, 12] + if len(self.datasets.get_additional_data_column_names()) > 0 + else [] + ), + ), + lag_transforms=( + { + 1: [ExpandingMean()], + 12: [RollingMean(window_size=24)], + } + if len(self.datasets.get_additional_data_column_names()) > 0 + else {} + ), # date_features=[hour_index], ) num_models = model_kwargs.get("recursive_models", False) + self.model_columns = [ + ForecastOutputColumns.SERIES + ] + data_train.select_dtypes(exclude=["object"]).columns.to_list() fcst.fit( - data_train, + data_train[self.model_columns], static_features=model_kwargs.get("static_features", []), id_col=ForecastOutputColumns.SERIES, time_col=self.spec.datetime_column.name, @@ -99,8 +114,10 @@ def _train_model(self, data_train, data_test, model_kwargs): h=self.spec.horizon, X_df=pd.concat( [ - data_test, - fcst.get_missing_future(h=self.spec.horizon, X_df=data_test), + data_test[self.model_columns], + fcst.get_missing_future( + h=self.spec.horizon, X_df=data_test[self.model_columns] + ), ], axis=0, ignore_index=True, @@ -166,12 +183,16 @@ def _generate_report(self): # Section 1: Forecast Overview sec1_text = rc.Block( rc.Heading("Forecast Overview", level=2), - rc.Text("These plots show your forecast in the context of historical data.") + rc.Text( + "These plots show your forecast in the context of historical data." + ), ) sec_1 = _select_plot_list( lambda s_id: plot_series( self.datasets.get_all_data_long(include_horizon=False), - pd.concat([self.fitted_values,self.outputs], axis=0, ignore_index=True), + pd.concat( + [self.fitted_values, self.outputs], axis=0, ignore_index=True + ), id_col=ForecastOutputColumns.SERIES, time_col=self.spec.datetime_column.name, target_col=self.original_target_column, @@ -184,7 +205,7 @@ def _generate_report(self): # Section 2: MlForecast Model Parameters sec2_text = rc.Block( rc.Heading("MlForecast Model Parameters", level=2), - rc.Text("These are the parameters used for the MlForecast model.") + rc.Text("These are the parameters used for the MlForecast model."), ) blocks = [ @@ -197,9 +218,11 @@ def _generate_report(self): sec_2 = rc.Select(blocks=blocks) all_sections = [sec1_text, sec_1, sec2_text, sec_2] - model_description = rc.Text("mlforecast is a framework to perform time series forecasting using machine learning models" - "with the option to scale to massive amounts of data using remote clusters." - "Fastest implementations of feature engineering for time series forecasting in Python." - "Support for exogenous variables and static covariates.") + model_description = rc.Text( + "mlforecast is a framework to perform time series forecasting using machine learning models" + "with the option to scale to massive amounts of data using remote clusters." + "Fastest implementations of feature engineering for time series forecasting in Python." + "Support for exogenous variables and static covariates." + ) - return model_description, all_sections \ No newline at end of file + return model_description, all_sections diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index 9eadaa70f..4ed9cf72c 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -137,7 +137,10 @@ def test_load_datasets(model, data_details): run(yaml_i, backend="operator.local", debug=False) subprocess.run(f"ls -a {output_data_path}", shell=True) - if yaml_i["spec"]["generate_explanations"] and model != "automlx": + if yaml_i["spec"]["generate_explanations"] and model not in [ + "automlx", + "mlforecast", + ]: verify_explanations( tmpdirname=tmpdirname, additional_cols=additional_cols, diff --git a/tests/operators/forecast/test_errors.py b/tests/operators/forecast/test_errors.py index 7faaa9e1f..4edae9f2b 100644 --- a/tests/operators/forecast/test_errors.py +++ b/tests/operators/forecast/test_errors.py @@ -687,7 +687,7 @@ def test_arima_automlx_errors(operator_setup, model): in error_content["13"]["error"] ), "Error message mismatch" - if model not in ["autots", "automlx"]: + if model not in ["autots", "automlx", "mlforecast"]: global_fn = f"{tmpdirname}/results/global_explanation.csv" assert os.path.exists( global_fn