Skip to content

Commit

Permalink
bump to last fix in wip-engines
Browse files Browse the repository at this point in the history
  • Loading branch information
fcharras committed Dec 14, 2022
1 parent 6d87e2d commit e886c29
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
# Development tracker: https://github.com/scikit-learn/scikit-learn/pull/24497/
# TODO: Remove this step when the plugin API is officially released
- name: Install pytest, @ogrisel sklearn branch "wip-engines", and sklearn-numba-dpex
run: pip install pytest git+https://github.com/ogrisel/scikit-learn.git@ab1e34d81a5c56bf980fa56f5a5cb08d2491e4f2#egg=scikit-learn -e .
run: pip install pytest git+https://github.com/ogrisel/scikit-learn.git@ff191e296fa87d57ade7e2a3fb573870eded2f26#egg=scikit-learn -e .

- name: Check device
run: python -c "import dpctl; dpctl.select_default_device().print_device_info()"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ conda create --yes --name sklearn-dev \
conda activate sklearn-dev
git clone https://github.com/ogrisel/scikit-learn -b wip-engines
cd scikit-learn
git checkout ab1e34d81a5c56bf980fa56f5a5cb08d2491e4f2
git checkout ff191e296fa87d57ade7e2a3fb573870eded2f26
python setup.py bdist_wheel
conda activate $CONDA_DPEX_ENV_NAME
cd dist/
Expand Down Expand Up @@ -223,7 +223,7 @@ Once you have loaded into the container, follow those instructions to install th
```bash
git clone https://github.com/ogrisel/scikit-learn -b wip-engines
cd scikit-learn
git checkout ab1e34d81a5c56bf980fa56f5a5cb08d2491e4f2
git checkout ff191e296fa87d57ade7e2a3fb573870eded2f26
pip install -e .
cd ..
```
Expand Down
32 changes: 28 additions & 4 deletions sklearn_numba_dpex/kmeans/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,20 @@ def __init__(self, estimator):
)
self._is_in_testing_mode = _is_in_testing_mode == "1"

def accepts(self, X, y):
def accepts(self, X, y, sample_weight):
try:
if (algorithm := self.estimator.algorithm) not in ("lloyd", "auto", "full"):
raise NotSupportedByEngineError(
"The sklearn_nunmba_dpex engine for KMeans only support the Lloyd"
f" algorithm, {algorithm} is not supported."
)

self._X_validated = self._validate_data(X)
self._X_accepted = X

self._sample_weight_accepted = sample_weight
if sample_weight is not None:
self._sample_weight_validated = self._check_sample_weight(sample_weight)
return True
except Exception:
if self._is_in_testing_mode:
Expand All @@ -124,11 +129,12 @@ def prepare_fit(self, X, y=None, sample_weight=None):
estimator = self.estimator

self._check_is_accepted_X(X)
self._check_is_accepted_sample_weight(sample_weight)
X = self._X_validated
estimator._check_n_features(X, reset=True)
estimator._check_params_vs_input(X)

self.sample_weight = self._check_sample_weight(sample_weight, X)
self.sample_weight = self._sample_weight_validated

init = self.estimator.init
init_is_array_like = _is_arraylike_not_scalar(init)
Expand Down Expand Up @@ -237,9 +243,10 @@ def get_nb_distinct_clusters(self, best_labels):

def prepare_prediction(self, X, sample_weight):
self._check_is_accepted_X(X)
self._check_is_accepted_sample_weight(sample_weight)
X = self._X_validated
self.estimator._check_n_features(X, reset=False)
sample_weight = self._check_sample_weight(sample_weight, X)
sample_weight = self._sample_weight_validated
return X, sample_weight

def get_labels(self, X, sample_weight):
Expand Down Expand Up @@ -332,9 +339,26 @@ def _check_is_accepted_X(self, X):
"is different from the object that was given in downstream methods."
)

def _check_sample_weight(self, sample_weight, X):
def _check_is_accepted_sample_weight(self, sample_weight):
if sample_weight is not self._sample_weight_accepted:
raise RuntimeError(
"The object that was passed to the engine to query its compatibility "
"is different from the object that was given in downstream methods."
)

# When sample_weight is None, the call to `_check_sample_weight` is delayed
# until now because, because the array of `ones` that is created is only
# necessary for engine methods that actually make use of `sample_weight` and
# call `_check_is_accepted_sample_weight`.
# Methods that don't use `sample_weight` still pass `sample_weight=None` to
# `accepts` but doesn't need to create the corresponding array.
if sample_weight is None:
self._sample_weight_validated = self._check_sample_weight(sample_weight)

def _check_sample_weight(self, sample_weight):
"""Adapted from sklearn.utils.validation._check_sample_weight to be compatible
with Array API dispatch"""
X = self._X_validated
n_samples = X.shape[0]
dtype = X.dtype
device = X.device.sycl_device
Expand Down
12 changes: 6 additions & 6 deletions sklearn_numba_dpex/kmeans/tests/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def test_euclidean_distance(dtype):
estimator = KMeans(n_clusters=len(b))
estimator.cluster_centers_ = b
engine = KMeansEngine(estimator)
assert engine.accepts(a, y=None)
assert engine.accepts(a, y=None, sample_weight=None)

result = engine.get_euclidean_distances(a)

Expand All @@ -177,7 +177,7 @@ def test_inertia(dtype):
estimator = KMeans(n_clusters=len(centers))
estimator.cluster_centers_ = dpt.asarray(centers)
engine = KMeansEngine(estimator)
assert engine.accepts(X, y=None)
assert engine.accepts(X, y=None, sample_weight=sample_weight)
X_prepared, sample_weight_prepared = engine.prepare_prediction(X, sample_weight)
labels = engine.get_labels(X_prepared, sample_weight_prepared)

Expand Down Expand Up @@ -352,7 +352,7 @@ def _get_score_with_centers(centers):

kmeans.set_params(random_state=random_state)
engine = KMeansEngine(kmeans)
assert engine.accepts(X, y=None)
assert engine.accepts(X, y=None, sample_weight=None)
X_prepared, *_ = engine.prepare_fit(X)
engine_kmeans_plusplus_centers_t = engine.init_centroids(X_prepared)
engine_kmeans_plusplus_centers = engine_kmeans_plusplus_centers_t.T
Expand Down Expand Up @@ -407,7 +407,7 @@ def test_kmeans_plusplus_output(array_constr, dtype):
init="k-means++", n_clusters=n_clusters_sklearn_test, random_state=random_state
)
engine = KMeansEngine(estimator)
assert engine.accepts(X, y=None)
assert engine.accepts(X, y=None, sample_weight=sample_weight)
X_prepared, *_ = engine.prepare_fit(X, sample_weight=sample_weight)

centers_t, indices = engine._kmeans_plusplus(X_prepared)
Expand Down Expand Up @@ -442,15 +442,15 @@ def test_kmeans_plusplus_dataorder():
init="k-means++", n_clusters=n_clusters_sklearn_test, random_state=random_state
)
engine = KMeansEngine(estimator)
assert engine.accepts(X_sklearn_test, y=None)
assert engine.accepts(X_sklearn_test, y=None, sample_weight=None)
X_sklearn_test_prepared, *_ = engine.prepare_fit(X_sklearn_test)
centers_c = engine.init_centroids(X_sklearn_test_prepared)
centers_c = asnumpy(centers_c.T)

X_fortran = np.asfortranarray(X_sklearn_test)
# The engine is re-created to reset random state
engine = KMeansEngine(estimator)
assert engine.accepts(X_fortran, y=None)
assert engine.accepts(X_fortran, y=None, sample_weight=None)
X_fortran_prepared, *_ = engine.prepare_fit(X_fortran)
centers_fortran = engine.init_centroids(X_fortran_prepared)
centers_fortran = asnumpy(centers_fortran.T)
Expand Down

0 comments on commit e886c29

Please sign in to comment.