Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Support and enable convert_dtype in estimator predict #2723

Merged
merged 13 commits into from
Aug 26, 2020
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

## Improvements
- PR #2735: Update seed to random_state in random forest and associated tests

- PR #2739: Use cusparse_wrappers.h from RAFT
- PR #2729: Replace `cupy.sparse` with `cupyx.scipy.sparse`
- PR #2749: Correct docs for python version used in cuml_dev conda environment
- PR #2723: Support and enable convert_dtype in estimator predict

## Bug Fixes

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/common/base.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ class RegressorMixin:
else:
handle = None

preds = self.predict(X)
preds = self.predict(X, **kwargs)
return r2_score(y, preds, handle=handle)


Expand Down
4 changes: 2 additions & 2 deletions python/cuml/linear_model/elastic_net.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,13 @@ class ElasticNet(Base, RegressorMixin):
'type': 'dense',
'description': 'Predicted values',
'shape': '(n_samples, 1)'})
def predict(self, X, convert_dtype=False):
def predict(self, X, convert_dtype=True):
beckernick marked this conversation as resolved.
Show resolved Hide resolved
"""
Predicts `y` values for `X`.

"""

return self.solver_model.predict(X)
return self.solver_model.predict(X, convert_dtype=convert_dtype)

def get_params(self, deep=True):
"""
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/linear_model/lasso.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class Lasso(Base, RegressorMixin):
'type': 'dense',
'description': 'Predicted values',
'shape': '(n_samples, 1)'})
def predict(self, X, convert_dtype=False):
def predict(self, X, convert_dtype=True):
"""
Predicts the y for X.

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/linear_model/linear_regression.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ class LinearRegression(Base, RegressorMixin):
'type': 'dense',
'description': 'Predicted values',
'shape': '(n_samples, 1)'})
def predict(self, X, convert_dtype=False):
def predict(self, X, convert_dtype=True):
"""
Predicts `y` values for `X`.

Expand Down
6 changes: 3 additions & 3 deletions python/cuml/linear_model/logistic_regression.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ class LogisticRegression(Base, ClassifierMixin):
'type': 'dense',
'description': 'Predicted values',
'shape': '(n_samples, 1)'})
def predict(self, X, convert_dtype=False):
def predict(self, X, convert_dtype=True):
"""
Predicts the y for X.

Expand All @@ -329,7 +329,7 @@ class LogisticRegression(Base, ClassifierMixin):
probabilities',
'shape': '(n_samples, n_classes)'})
@with_cupy_rmm
def predict_proba(self, X, convert_dtype=False):
def predict_proba(self, X, convert_dtype=True):
"""
Predicts the class probabilities for each class in X

Expand All @@ -345,7 +345,7 @@ class LogisticRegression(Base, ClassifierMixin):
'description': 'Logaright of predicted \
class probabilities',
'shape': '(n_samples, n_classes)'})
def predict_log_proba(self, X, convert_dtype=False):
def predict_log_proba(self, X, convert_dtype=True):
"""
Predicts the log class probabilities for each class in X

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/linear_model/ridge.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ class Ridge(Base, RegressorMixin):
'type': 'dense',
'description': 'Predicted values',
'shape': '(n_samples, 1)'})
def predict(self, X, convert_dtype=False):
def predict(self, X, convert_dtype=True):
"""
Predicts the y for X.

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/metrics/regression.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ from cuml.common.input_utils import input_to_cuml_array
from cuml.common.memory_utils import with_cupy_rmm


def r2_score(y, y_hat, convert_dtype=False, handle=None):
def r2_score(y, y_hat, convert_dtype=True, handle=None):
"""
Calculates r2 score between y and y_hat

Expand Down
4 changes: 2 additions & 2 deletions python/cuml/svm/svc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ class SVC(SVMBase, ClassifierMixin):
'type': 'dense',
'description': 'Predicted values',
'shape': '(n_samples, 1)'})
def predict(self, X):
def predict(self, X, convert_dtype=True):
"""
Predicts the class labels for X. The returned y values are the class
labels associated to sign(decision_function(X)).
Expand All @@ -416,7 +416,7 @@ class SVC(SVMBase, ClassifierMixin):
# prob_svc has numpy output type, change it if it is necessary:
return _to_output(preds, out_type)
else:
return super(SVC, self).predict(X, True)
return super(SVC, self).predict(X, True, convert_dtype)

@generate_docstring(skip_parameters_heading=True,
return_values={'name': 'preds',
Expand Down
8 changes: 6 additions & 2 deletions python/cuml/svm/svm_base.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ class SVMBase(Base):
else:
self._unique_labels = None

def predict(self, X, predict_class):
def predict(self, X, predict_class, convert_dtype=True):
"""
Predicts the y for X, where y is either the decision function value
(if predict_class == False), or the label associated with X.
Expand Down Expand Up @@ -483,7 +483,11 @@ class SVMBase(Base):
self._check_is_fitted('_model')

X_m, n_rows, n_cols, pred_dtype = \
input_to_cuml_array(X, check_dtype=self.dtype)
input_to_cuml_array(
X,
check_dtype=self.dtype,
convert_to_dtype=(self.dtype if convert_dtype else None))

cdef uintptr_t X_ptr = X_m.ptr

preds = CumlArray.zeros(n_rows, dtype=self.dtype)
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/svm/svr.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -292,10 +292,10 @@ class SVR(SVMBase, RegressorMixin):
'type': 'dense',
'description': 'Predicted values',
'shape': '(n_samples, 1)'})
def predict(self, X):
def predict(self, X, convert_dtype=True):
"""
Predicts the values for X.

"""

return super(SVR, self).predict(X, False)
return super(SVR, self).predict(X, False, convert_dtype)
30 changes: 30 additions & 0 deletions python/cuml/test/test_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,33 @@ def test_elastic_net_default(datatype, nrows, column_info):
sk_predict = elastic_sk.predict(X_test)
sk_r2 = r2_score(y_test, sk_predict)
assert cu_r2 >= sk_r2 - 0.07


@pytest.mark.parametrize('train_dtype', [np.float32, np.float64])
@pytest.mark.parametrize('test_dtype', [np.float64, np.float32])
def test_elastic_net_predict_convert_dtype(train_dtype, test_dtype):
X, y = make_regression(n_samples=50, n_features=10,
n_informative=5, random_state=0)
X = X.astype(train_dtype)
y = y.astype(train_dtype)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
random_state=0)

clf = cuElasticNet()
clf.fit(X_train, y_train)
clf.predict(X_test.astype(test_dtype))


@pytest.mark.parametrize('train_dtype', [np.float32, np.float64])
@pytest.mark.parametrize('test_dtype', [np.float64, np.float32])
def test_lasso_predict_convert_dtype(train_dtype, test_dtype):
X, y = make_regression(n_samples=50, n_features=10,
n_informative=5, random_state=0)
X = X.astype(train_dtype)
y = y.astype(train_dtype)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
random_state=0)

clf = cuLasso()
clf.fit(X_train, y_train)
clf.predict(X_test.astype(test_dtype))
43 changes: 43 additions & 0 deletions python/cuml/test/test_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,3 +400,46 @@ def test_logistic_regression_input_type_consistency(constructor, dtype):

assert isinstance(clf.predict_proba(X), original_type)
assert isinstance(clf.predict(X), original_type)


@pytest.mark.parametrize('train_dtype', [np.float32, np.float64])
@pytest.mark.parametrize('test_dtype', [np.float64, np.float32])
def test_linreg_predict_convert_dtype(train_dtype, test_dtype):
X, y = make_regression(n_samples=50, n_features=10,
n_informative=5, random_state=0)
X = X.astype(train_dtype)
y = y.astype(train_dtype)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
random_state=0)

clf = cuLinearRegression()
clf.fit(X_train, y_train)
clf.predict(X_test.astype(test_dtype))


@pytest.mark.parametrize('train_dtype', [np.float32, np.float64])
@pytest.mark.parametrize('test_dtype', [np.float64, np.float32])
def test_ridge_predict_convert_dtype(train_dtype, test_dtype):
X, y = make_regression(n_samples=50, n_features=10,
n_informative=5, random_state=0)
X = X.astype(train_dtype)
y = y.astype(train_dtype)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
random_state=0)

clf = cuRidge()
clf.fit(X_train, y_train)
clf.predict(X_test.astype(test_dtype))


@pytest.mark.parametrize('train_dtype', [np.float32, np.float64])
@pytest.mark.parametrize('test_dtype', [np.float64, np.float32])
def test_logistic_predict_convert_dtype(train_dtype, test_dtype):
X, y = make_classification(n_samples=50, n_features=10, random_state=0)
X = X.astype(train_dtype)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
random_state=0)

clf = cuLog()
clf.fit(X_train, y_train)
clf.predict(X_test.astype(test_dtype))
19 changes: 19 additions & 0 deletions python/cuml/test/test_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,3 +623,22 @@ def test_svr_skl_cmp_weighted():
sklSVR.fit(X, y, sample_weights)

compare_svr(cuSVR, sklSVR, X, y)


@pytest.mark.parametrize('classifier', [True, False])
@pytest.mark.parametrize('train_dtype', [np.float32, np.float64])
@pytest.mark.parametrize('test_dtype', [np.float64, np.float32])
def test_svm_predict_convert_dtype(train_dtype, test_dtype, classifier):
X, y = make_classification(n_samples=50, random_state=0)

X = X.astype(train_dtype)
y = y.astype(train_dtype)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
random_state=0)

if classifier:
clf = cu_svm.SVC()
else:
clf = cu_svm.SVR()
clf.fit(X_train, y_train)
clf.predict(X_test.astype(test_dtype))