Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert fp32 datasets to fp64 in ARIMA and AutoARIMA + update notebook to avoid deprecation warnings with positional parameters #4195

Merged
merged 5 commits into from
Sep 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 5 additions & 4 deletions notebooks/arima_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
"metadata": {},
"outputs": [],
"source": [
"model_mig = ARIMA(df_mig, (0,0,2), fit_intercept=True)\n",
"model_mig = ARIMA(df_mig, order=(0,0,2), fit_intercept=True)\n",
"model_mig.fit()"
]
},
Expand Down Expand Up @@ -268,7 +268,7 @@
"df_pop = load_dataset(\"population_estimate\")\n",
"\n",
"# Fit an ARIMA(1,2,1) model\n",
"model_pop = ARIMA(df_pop, (1,2,1), fit_intercept=True)\n",
"model_pop = ARIMA(df_pop, order=(1,2,1), fit_intercept=True)\n",
"model_pop.fit()\n",
"\n",
"# Predict in-sample and forecast out-of-sample\n",
Expand Down Expand Up @@ -321,7 +321,8 @@
"df_guests = load_dataset(\"guest_nights_by_region\", 4)\n",
"\n",
"# Create and fit an ARIMA(1,1,1)(1,1,1)12 model:\n",
"model_guests = ARIMA(df_guests, (1,1,1), (1,1,1,12), fit_intercept=False)\n",
"model_guests = ARIMA(df_guests, order=(1,1,1), seasonal_order=(1,1,1,12),\n",
" fit_intercept=False)\n",
"model_guests.fit()"
]
},
Expand Down Expand Up @@ -356,7 +357,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
"version": "3.8.10"
},
"mimetype": "text/x-python",
"name": "python",
Expand Down
10 changes: 9 additions & 1 deletion python/cuml/tsa/arima.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ from cuml.raft.common.handle cimport handle_t
from cuml.tsa.batched_lbfgs import batched_fmin_lbfgs_b
import cuml.common.logger as logger
from cuml.common import has_scipy
from cuml.common.input_utils import determine_array_dtype
from cuml.common.input_utils import input_to_cuml_array
from cuml.common.input_utils import input_to_host_array
from cuml.internals import _deprecate_pos_args
import warnings


cdef extern from "cuml/tsa/arima_common.h" namespace "ML":
Expand Down Expand Up @@ -335,9 +337,15 @@ class ARIMA(Base):
raise ValueError("ERROR: Invalid order. "
"Required: max(p+s*P, q+s*Q) <= 1024")

original_dtype = determine_array_dtype(endog)
if original_dtype != np.float64:
warnings.warn("Only float64 is currently supported. `endog` is"
" converted to float64. This behavior might change"
" in the future.")

# Get device array. Float64 only for now.
self.d_y, self.n_obs, self.batch_size, self.dtype \
= input_to_cuml_array(endog, check_dtype=np.float64)
= input_to_cuml_array(endog, convert_to_dtype=np.float64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
= input_to_cuml_array(endog, convert_to_dtype=np.float64)
= input_to_cuml_array(
endog,
convert_to_dtype=(np.float64 if convert_dtype else None)
)

We can add a convert_dtype parameter in the __init__ (set by default to True) so that the user can disable the auto conversion if they want (similar to fit/predict in other models

def fit(self, X, y, convert_dtype=True) -> "LinearRegression":
) what do you think?

Copy link
Contributor Author

@Nyrio Nyrio Sep 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added convert_dtype and removed the warning (unless I make False the default for this option, I don't think that it makes sense to have a warning, because this is the intended behavior of convert_dtype=True).

I guess we can implement a warning when we add float32 support.


if self.n_obs < d + s * D + 1:
raise ValueError("ERROR: Number of observations too small for the"
Expand Down
10 changes: 9 additions & 1 deletion python/cuml/tsa/auto_arima.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ from cuml.raft.common.handle cimport handle_t
from cuml.raft.common.handle import Handle
from cuml.common import input_to_cuml_array
from cuml.common import using_output_type
from cuml.common.input_utils import determine_array_dtype
from cuml.tsa.arima import ARIMA
from cuml.tsa.seasonality import seas_test
from cuml.tsa.stationarity import kpss_test
import warnings


# TODO:
Expand Down Expand Up @@ -189,9 +191,15 @@ class AutoARIMA(Base):
output_type=output_type)
self._set_base_attributes(output_type=endog)

original_dtype = determine_array_dtype(endog)
if original_dtype != np.float64:
warnings.warn("Only float64 is currently supported. `endog` is"
" converted to float64. This behavior might change"
" in the future.")

# Get device array. Float64 only for now.
self.d_y, self.n_obs, self.batch_size, self.dtype \
= input_to_cuml_array(endog, check_dtype=np.float64)
= input_to_cuml_array(endog, convert_to_dtype=np.float64)

self.simple_differencing = simple_differencing

Expand Down