Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNT Compatibility with sklearn 1.0 #864

Merged
merged 17 commits into from Sep 29, 2021
10 changes: 0 additions & 10 deletions .pep8speaks.yml

This file was deleted.

13 changes: 5 additions & 8 deletions .pre-commit-config.yaml
@@ -1,19 +1,16 @@
repos:
- repo: https://github.com/python/black
rev: stable
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 21.6b0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.8
rev: 3.9.2
hooks:
- id: flake8
types: [file, python]
# only check for unused imports for now, as long as
# the code is not fully PEP8 compatible
args: [--select=F401]
31 changes: 19 additions & 12 deletions azure-pipelines.yml
Expand Up @@ -3,23 +3,26 @@ jobs:
- job: linting
displayName: Linting
pool:
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
steps:
- bash: echo "##vso[task.prependpath]$CONDA/bin"
displayName: Add conda to PATH
- bash: sudo chown -R $USER $CONDA
displayName: Take ownership of conda installation
- bash: conda create --name flake8_env --yes flake8
displayName: Install flake8
- task: UsePythonVersion@0
inputs:
versionSpec: '3.9'
- bash: |
# Include pytest compatibility with mypy
pip install pytest flake8 mypy==0.782 black==21.6b0
displayName: Install linters
- bash: |
black --check .
displayName: Run black
- bash: |
source activate flake8_env
./build_tools/circle/linting.sh
displayName: Run linting

- template: build_tools/azure/posix.yml
parameters:
name: Linux_Runs
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
matrix:
pylatest_pip_openblas_pandas:
DISTRIB: 'conda-pip-latest'
Expand All @@ -33,15 +36,14 @@ jobs:
- template: build_tools/azure/posix.yml
parameters:
name: Linux
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
dependsOn: [linting]
matrix:
# Linux environment to test that scikit-learn can be built against
# versions of numpy, scipy with ATLAS that comes with Ubuntu Bionic 18.04
# i.e. numpy 1.13.3 and scipy 0.19
py36_ubuntu_atlas:
DISTRIB: 'ubuntu'
PYTHON_VERSION: '3.6'
JOBLIB_VERSION: '*'
pylatest_conda_pandas_keras:
DISTRIB: 'conda'
Expand All @@ -61,11 +63,16 @@ jobs:
TENSORFLOW_VERSION: '*'
COVERAGE: 'true'
TEST_DOCSTRINGS: 'true'
sklearn_0_24_conda:
DISTRIB: 'conda'
PYTHON_VERSION: '3.7'
SKLEARN_VERSION: '0.24.2'
INSTALL_MKL: 'true'

- template: build_tools/azure/posix-32.yml
parameters:
name: Linux32
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
dependsOn: [linting]
matrix:
py36_ubuntu_atlas_32bit:
Expand Down
7 changes: 6 additions & 1 deletion build_tools/azure/install.sh
Expand Up @@ -32,10 +32,15 @@ if [[ "$DISTRIB" == "conda" ]]; then
fi

make_conda $TO_INSTALL
python -m pip install scikit-learn

TO_INSTALL=""

if [[ -n "$SKLEARN_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL scikit-learn=$SKLEARN_VERSION"
else
TO_INSTALL="$TO_INSTALL scikit-learn"
fi

if [[ -n "$PANDAS_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL pandas=$PANDAS_VERSION"
fi
Expand Down
2 changes: 1 addition & 1 deletion doc/install.rst
Expand Up @@ -12,7 +12,7 @@ The imbalanced-learn package requires the following dependencies:
* python (>=3.6)
* numpy (>=1.13.3)
* scipy (>=0.19.1)
* scikit-learn (>=0.23)
* scikit-learn (>=0.24)
* keras 2 (optional)
* tensorflow (optional)

Expand Down
10 changes: 8 additions & 2 deletions doc/sphinxext/sphinx_issues.py
Expand Up @@ -80,7 +80,11 @@ class IssueRole(object):
EXTERNAL_REPO_REGEX = re.compile(r"^(\w+)/(.+)([#@])([\w]+)$")

def __init__(
self, uri_config_option, format_kwarg, github_uri_template, format_text=None,
self,
uri_config_option,
format_kwarg,
github_uri_template,
format_text=None,
):
self.uri_config_option = uri_config_option
self.format_kwarg = format_kwarg
Expand All @@ -103,7 +107,9 @@ def make_node(self, name, issue_no, config, options=None):
)
path = name_map.get(name)
ref = "https://github.com/{issues_github_path}/{path}/{n}".format(
issues_github_path="{}/{}".format(username, repo), path=path, n=issue,
issues_github_path="{}/{}".format(username, repo),
path=path,
n=issue,
)
formatted_issue = self.format_text(issue).lstrip("#")
text = "{username}/{repo}{symbol}{formatted_issue}".format(**locals())
Expand Down
13 changes: 13 additions & 0 deletions doc/whats_new/v0.8.rst
@@ -1,5 +1,18 @@
.. _changes_0_8:

Version 0.8.1
=============

**In development**

Changelog

Maintenance
...........

- Make `imbalanced-learn` compatible with `scikit-learn` 1.0.
:pr:`864` by :user:`Guillaume Lemaitre <glemaitre>`.

Version 0.8.0
=============

Expand Down
10 changes: 8 additions & 2 deletions imblearn/base.py
Expand Up @@ -82,7 +82,9 @@ def fit_resample(self, X, y):

output = self._fit_resample(X, y)

y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
y_ = (
label_binarize(output[1], classes=np.unique(y)) if binarize_y else output[1]
)

X_, y_ = arrays_transformer.transform(output[0], y_)
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
Expand Down Expand Up @@ -284,7 +286,11 @@ def fit_resample(self, X, y):

if self.validate:

y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
y_ = (
label_binarize(output[1], classes=np.unique(y))
if binarize_y
else output[1]
)
X_, y_ = arrays_transformer.transform(output[0], y_)
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])

Expand Down
14 changes: 10 additions & 4 deletions imblearn/ensemble/_forest.py
Expand Up @@ -422,15 +422,13 @@ def fit(self, X, y, sample_weight=None):
)
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
self._n_features = X.shape[1]

if issparse(X):
# Pre-sort indices to avoid that each individual tree of the
# ensemble sorts the indices.
X.sort_indices()

# Remap output
_, self.n_features_ = X.shape

y = np.atleast_1d(y)
if y.ndim == 2 and y.shape[1] == 1:
warn(
Expand Down Expand Up @@ -627,5 +625,13 @@ def _set_oob_score(self, X, y):

self.oob_score_ = oob_score / self.n_outputs_

@property
def n_features_(self):
"""Number of features when fitting the estimator."""
return getattr(self.n_features_in_, self._n_features)

def _more_tags(self):
return {"multioutput": False}
return {
"multioutput": False,
"multilabel": False,
}
38 changes: 30 additions & 8 deletions imblearn/ensemble/tests/test_easy_ensemble.py
Expand Up @@ -48,7 +48,10 @@
def test_easy_ensemble_classifier(n_estimators, base_estimator):
# Check classification for various parameter settings.
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -72,7 +75,10 @@ def test_easy_ensemble_classifier(n_estimators, base_estimator):
def test_base_estimator():
# Check base_estimator and its default values.
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -91,7 +97,10 @@ def test_base_estimator():

def test_bagging_with_pipeline():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
estimator = EasyEnsembleClassifier(
n_estimators=2,
Expand All @@ -109,7 +118,9 @@ def test_warm_start(random_state=42):
for n_estimators in [5, 10]:
if clf_ws is None:
clf_ws = EasyEnsembleClassifier(
n_estimators=n_estimators, random_state=random_state, warm_start=True,
n_estimators=n_estimators,
random_state=random_state,
warm_start=True,
)
else:
clf_ws.set_params(n_estimators=n_estimators)
Expand Down Expand Up @@ -182,7 +193,10 @@ def test_warm_start_equivalence():
)
def test_easy_ensemble_classifier_error(n_estimators, msg_error):
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
with pytest.raises(ValueError, match=msg_error):
eec = EasyEnsembleClassifier(n_estimators=n_estimators)
Expand All @@ -191,7 +205,10 @@ def test_easy_ensemble_classifier_error(n_estimators, msg_error):

def test_easy_ensemble_classifier_single_estimator():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -205,14 +222,19 @@ def test_easy_ensemble_classifier_single_estimator():

def test_easy_ensemble_classifier_grid_search():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)

parameters = {
"n_estimators": [1, 2],
"base_estimator__n_estimators": [3, 4],
}
grid_search = GridSearchCV(
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()), parameters, cv=5,
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()),
parameters,
cv=5,
)
grid_search.fit(X, y)
18 changes: 14 additions & 4 deletions imblearn/ensemble/tests/test_forest.py
Expand Up @@ -32,7 +32,10 @@ def imbalanced_dataset():
[
({"n_estimators": "whatever"}, "n_estimators must be an integer"),
({"n_estimators": -100}, "n_estimators must be greater than zero"),
({"bootstrap": False, "oob_score": True}, "Out of bag estimation only",),
(
{"bootstrap": False, "oob_score": True},
"Out of bag estimation only",
),
],
)
def test_balanced_random_forest_error(imbalanced_dataset, forest_params, err_msg):
Expand Down Expand Up @@ -105,7 +108,10 @@ def test_balanced_random_forest_oob(imbalanced_dataset):
X, y, random_state=42, stratify=y
)
est = BalancedRandomForestClassifier(
oob_score=True, random_state=0, n_estimators=1000, min_samples_leaf=2,
oob_score=True,
random_state=0,
n_estimators=1000,
min_samples_leaf=2,
)

est.fit(X_train, y_train)
Expand Down Expand Up @@ -135,12 +141,16 @@ def test_little_tree_with_small_max_samples():

# First fit with no restriction on max samples
est1 = BalancedRandomForestClassifier(
n_estimators=1, random_state=rng, max_samples=None,
n_estimators=1,
random_state=rng,
max_samples=None,
)

# Second fit with max samples restricted to just 2
est2 = BalancedRandomForestClassifier(
n_estimators=1, random_state=rng, max_samples=2,
n_estimators=1,
random_state=rng,
max_samples=2,
)

est1.fit(X, y)
Expand Down