Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: compare time series models #1174

Merged
merged 12 commits into from Apr 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
68 changes: 58 additions & 10 deletions pycaret/internal/PycaretExperiment.py
Expand Up @@ -21,7 +21,7 @@
get_columns_to_stratify_by,
get_model_name,
)
from pycaret.internal.utils import SeasonalParameter
from pycaret.internal.utils import SeasonalParameter, id_or_display_name
import pycaret.internal.patches.sklearn
import pycaret.internal.patches.yellowbrick
from pycaret.internal.logging import get_logger, create_logger
Expand Down Expand Up @@ -5136,8 +5136,11 @@ def compare_models(
display.display_monitor()
display.display_master_display()

input_ml_usecase = self._ml_usecase
target_ml_usecase = MLUsecase.TIME_SERIES

greater_is_worse_columns = {
v.display_name
id_or_display_name(v, input_ml_usecase, target_ml_usecase)
for k, v in self._all_metrics.items()
if not v.greater_is_better
}
Expand All @@ -5151,7 +5154,7 @@ def compare_models(

if not (isinstance(sort, str) and (sort == "TT" or sort == "TT (Sec)")):
sort_ascending = not sort.greater_is_better
sort = sort.display_name
sort = id_or_display_name(sort, input_ml_usecase, target_ml_usecase)
else:
sort_ascending = True
sort = "TT (Sec)"
Expand All @@ -5171,13 +5174,19 @@ def compare_models(
model_library = include
else:
if turbo:
model_library = self._all_models
model_library = [k for k, v in self._all_models.items() if v.is_turbo]
else:
model_library = list(self._all_models.keys())
if exclude:
model_library = [x for x in model_library if x not in exclude]

if self._ml_usecase == MLUsecase.TIME_SERIES:
if 'ensemble_forecaster' in model_library:
warnings.warn(
'Unsupported estimator `ensemble_forecaster` for method `compare_models()`, removing from model_library'
)
model_library.remove('ensemble_forecaster')

display.move_progress()

# create URI (before loop)
Expand Down Expand Up @@ -16213,7 +16222,7 @@ def compare_models(
fold: Optional[Union[int, Any]] = None,
round: int = 4,
cross_validation: bool = True,
sort: str = "R2",
sort: str = "smape",
n_select: int = 1,
budget_time: Optional[float] = None,
turbo: bool = True,
Expand All @@ -16234,10 +16243,12 @@ def compare_models(
Example
--------
>>> from pycaret.datasets import get_data
>>> boston = get_data('boston')
>>> from pycaret.regression import *
>>> exp_name = setup(data = boston, target = 'medv')
>>> best_model = compare_models()
>>> from pycaret.internal.PycaretExperiment import TimeSeriesExperiment
>>> airline = get_data('airline', verbose=False)
>>> fh, fold = np.arange(1,13), 3
>>> exp = TimeSeriesExperiment()
>>> exp.setup(data=airline, fh=fh, fold=fold)
>>> master_display_exp = exp.compare_models(fold=fold, sort='mape')


include: list of str or scikit-learn compatible object, default = None
Expand Down Expand Up @@ -16268,7 +16279,7 @@ def compare_models(
is ignored when cross_validation is set to False.


sort: str, default = 'R2'
sort: str, default = 'smape'
The sort order of the score grid. It also accepts custom metrics that are
added through the ``add_metric`` function.

Expand Down Expand Up @@ -16436,6 +16447,43 @@ def create_model(
**kwargs,
)


def _create_model_without_cv(
self, model, data_X, data_y, fit_kwargs, predict, system, display
):
with estimator_pipeline(self._internal_pipeline, model) as pipeline_with_model:

self.logger.info("Support for Exogenous variables not yet supported. Switching X, y order")
data_X, data_y = data_y, data_X

fit_kwargs = get_pipeline_fit_kwargs(pipeline_with_model, fit_kwargs)
self.logger.info("Cross validation set to False")

self.logger.info("Fitting Model")
model_fit_start = time.time()
with io.capture_output():
pipeline_with_model.fit(data_X, data_y, **fit_kwargs)
model_fit_end = time.time()

model_fit_time = np.array(model_fit_end - model_fit_start).round(2)

display.move_progress()

if predict:
self.predict_model(pipeline_with_model, verbose=False)
model_results = self.pull(pop=True).drop("Model", axis=1)

self.display_container.append(model_results)

display.display(
model_results, clear=system, override=False if not system else None,
)

self.logger.info(f"display_container: {len(self.display_container)}")

return model, model_fit_time


def _create_model_with_cv(
self,
model,
Expand Down
16 changes: 15 additions & 1 deletion pycaret/internal/utils.py
Expand Up @@ -30,7 +30,21 @@ class SeasonalParameter(IntEnum):
A = 1 #year
Y = 1 #year



def id_or_display_name(metric, input_ml_usecase, target_ml_usecase):
"""
Get id or display_name attribute from metric. In time series experiment
the pull() method retrieves the metrics idto name the columns of the results
"""

if input_ml_usecase == target_ml_usecase:
output = metric.id
else:
output = metric.display_name

return output


def get_config(variable: str, globals_d: dict):

"""
Expand Down
14 changes: 8 additions & 6 deletions pycaret/time_series.py
Expand Up @@ -257,7 +257,7 @@ def compare_models(
fold: Optional[Union[int, Any]] = None,
round: int = 4,
cross_validation: bool = True,
sort: str = "R2",
sort: str = "smape",
n_select: int = 1,
budget_time: Optional[float] = None,
turbo: bool = True,
Expand All @@ -278,10 +278,12 @@ def compare_models(
Example
--------
>>> from pycaret.datasets import get_data
>>> boston = get_data('boston')
>>> from pycaret.regression import *
>>> exp_name = setup(data = boston, target = 'medv')
>>> best_model = compare_models()
>>> from pycaret.internal.PycaretExperiment import TimeSeriesExperiment
>>> airline = get_data('airline', verbose=False)
>>> fh, fold = np.arange(1,13), 3
>>> exp = TimeSeriesExperiment()
>>> exp.setup(data=airline, fh=fh, fold=fold)
>>> master_display_exp = exp.compare_models(fold=fold, sort='mape')


include: list of str or scikit-learn compatible object, default = None
Expand Down Expand Up @@ -312,7 +314,7 @@ def compare_models(
is ignored when cross_validation is set to False.


sort: str, default = 'R2'
sort: str, default = 'smape'
The sort order of the score grid. It also accepts custom metrics that are
added through the ``add_metric`` function.

Expand Down
2 changes: 1 addition & 1 deletion requirements-ts.txt
@@ -1,2 +1,2 @@
sktime==0.5.3
pmdarima>=1.8.0
pmdarima==1.8.0
2,066 changes: 1,872 additions & 194 deletions test_time_series.ipynb

Large diffs are not rendered by default.