Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 50 additions & 59 deletions ads/opctl/operator/lowcode/forecast/model/ml_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def set_kwargs(self):
model_kwargs["uppper_quantile"] = uppper_quantile
return model_kwargs


def preprocess(self, df, series_id):
pass

Expand All @@ -53,54 +54,70 @@ def preprocess(self, df, series_id):
err_msg="lightgbm is not installed, please install it with 'pip install lightgbm'",
)
def _train_model(self, data_train, data_test, model_kwargs):
import lightgbm as lgb
from mlforecast import MLForecast
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.target_transforms import Differences

def set_model_config(freq):
seasonal_map = {
"H": 24,
"D": 7,
"W": 52,
"M": 12,
"Q": 4,
}
sp = seasonal_map.get(freq.upper(), 7)
series_lengths = data_train.groupby(ForecastOutputColumns.SERIES).size()
min_len = series_lengths.min()
max_allowed = min_len - sp

default_lags = [lag for lag in [1, sp, 2 * sp] if lag <= max_allowed]
lags = model_kwargs.get("lags", default_lags)

default_roll = 2 * sp
roll = model_kwargs.get("RollingMean", default_roll)

default_diff = sp if sp <= max_allowed else None
diff = model_kwargs.get("Differences", default_diff)

return {
"target_transforms": [Differences([diff])],
"lags": lags,
"lag_transforms": {
1: [ExpandingMean()],
sp: [RollingMean(window_size=roll, min_samples=1)]
}
}

try:
import lightgbm as lgb
from mlforecast import MLForecast
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.target_transforms import Differences

lgb_params = {
"verbosity": model_kwargs.get("verbosity", -1),
"num_leaves": model_kwargs.get("num_leaves", 512),
}
additional_data_params = {}
if len(self.datasets.get_additional_data_column_names()) > 0:
additional_data_params = {
"target_transforms": [
Differences([model_kwargs.get("Differences", 12)])
],
"lags": model_kwargs.get("lags", [1, 6, 12]),
"lag_transforms": (
{
1: [ExpandingMean()],
12: [
RollingMean(
window_size=model_kwargs.get("RollingMean", 24),
min_samples=1,
)
],
}
),
}

data_freq = pd.infer_freq(data_train[self.date_col].drop_duplicates()) \
or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:])

additional_data_params = set_model_config(data_freq)

fcst = MLForecast(
models={
"forecast": lgb.LGBMRegressor(**lgb_params),
# "p" + str(int(model_kwargs["uppper_quantile"] * 100))
"upper": lgb.LGBMRegressor(
**lgb_params,
objective="quantile",
alpha=model_kwargs["uppper_quantile"],
),
# "p" + str(int(model_kwargs["lower_quantile"] * 100))
"lower": lgb.LGBMRegressor(
**lgb_params,
objective="quantile",
alpha=model_kwargs["lower_quantile"],
),
},
freq=pd.infer_freq(data_train[self.date_col].drop_duplicates())
or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:]),
freq=data_freq,
date_features=['year', 'month', 'day', 'dayofweek', 'dayofyear'],
**additional_data_params,
)

Expand Down Expand Up @@ -158,6 +175,7 @@ def _train_model(self, data_train, data_test, model_kwargs):
self.model_parameters[s_id] = {
"framework": SupportedModels.LGBForecast,
**lgb_params,
**fcst.models_['forecast'].get_params(),
}

logger.debug("===========Done===========")
Expand Down Expand Up @@ -191,48 +209,21 @@ def _generate_report(self):
Generates the report for the model
"""
import report_creator as rc
from utilsforecast.plotting import plot_series

logging.getLogger("report_creator").setLevel(logging.WARNING)

# Section 1: Forecast Overview
sec1_text = rc.Block(
rc.Heading("Forecast Overview", level=2),
rc.Text(
"These plots show your forecast in the context of historical data."
),
)
sec_1 = _select_plot_list(
lambda s_id: plot_series(
self.datasets.get_all_data_long(include_horizon=False),
pd.concat(
[self.fitted_values, self.outputs], axis=0, ignore_index=True
),
id_col=ForecastOutputColumns.SERIES,
time_col=self.spec.datetime_column.name,
target_col=self.original_target_column,
seed=42,
ids=[s_id],
),
self.datasets.list_series_ids(),
)

# Section 2: LGBForecast Model Parameters
sec2_text = rc.Block(
rc.Heading("LGBForecast Model Parameters", level=2),
rc.Text("These are the parameters used for the LGBForecast model."),
)

blocks = [
rc.Html(
str(s_id[1]),
label=s_id[0],
)
for _, s_id in enumerate(self.model_parameters.items())
]
sec_2 = rc.Select(blocks=blocks)
k, v = next(iter(self.model_parameters.items()))
sec_2 = rc.Html(
pd.DataFrame(list(v.items())).to_html(index=False, header=False),
)

all_sections = [sec1_text, sec_1, sec2_text, sec_2]
all_sections = [sec2_text, sec_2]
model_description = rc.Text(
"LGBForecast uses mlforecast framework to perform time series forecasting using machine learning models"
"with the option to scale to massive amounts of data using remote clusters."
Expand Down
2 changes: 1 addition & 1 deletion ads/opctl/operator/lowcode/forecast/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ spec:
- prophet
- arima
- neuralprophet
# - lgbforecast
- lgbforecast
- automlx
- autots
- auto-select
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ forecast = [
"autots",
"mlforecast",
"neuralprophet>=0.7.0",
"pytorch-lightning==2.5.5",
"numpy<2.0.0",
"oci-cli",
"optuna",
Expand Down
4 changes: 2 additions & 2 deletions tests/operators/forecast/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"prophet",
"neuralprophet",
"autots",
# "lgbforecast",
"lgbforecast",
"auto-select",
"auto-select-series",
]
Expand Down Expand Up @@ -177,7 +177,7 @@ def test_load_datasets(model, data_details):
subprocess.run(f"ls -a {output_data_path}", shell=True)
if yaml_i["spec"]["generate_explanations"] and model not in [
"automlx",
# "lgbforecast",
"lgbforecast",
"auto-select",
]:
verify_explanations(
Expand Down
10 changes: 5 additions & 5 deletions tests/operators/forecast/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@
"prophet",
"neuralprophet",
"autots",
# "lgbforecast",
"lgbforecast",
]

TEMPLATE_YAML = {
Expand Down Expand Up @@ -415,8 +415,8 @@ def test_0_series(operator_setup, model):
"local_explanation.csv",
"global_explanation.csv",
]
if model == "autots":
# explanations are not supported for autots
if model in ["autots", "lgbforecast"]:
# explanations are not supported for autots or lgbforecast
output_files.remove("local_explanation.csv")
output_files.remove("global_explanation.csv")
for file in output_files:
Expand Down Expand Up @@ -709,7 +709,7 @@ def test_arima_automlx_errors(operator_setup, model):
in error_content["13"]["model_fitting"]["error"]
), f"Error message mismatch: {error_content}"

if model not in ["autots", "automlx"]: # , "lgbforecast"
if model not in ["autots", "automlx", "lgbforecast"]:
if yaml_i["spec"].get("explanations_accuracy_mode") != "AUTOMLX":
global_fn = f"{tmpdirname}/results/global_explanation.csv"
assert os.path.exists(
Expand Down Expand Up @@ -816,7 +816,7 @@ def test_date_format(operator_setup, model):
@pytest.mark.parametrize("model", MODELS)
def test_what_if_analysis(operator_setup, model):
os.environ["TEST_MODE"] = "True"
if model == "auto-select":
if model in ["auto-select", "lgbforecast"]:
pytest.skip("Skipping what-if scenario for auto-select")
tmpdirname = operator_setup
historical_data_path, additional_data_path = setup_small_rossman()
Expand Down
Loading