Merging main, but with two test failures

Signed-off-by: Adam Li <adam2392@gmail.com>
neurodata · Jun 23, 2023 · 1994f15 · 1994f15
2 parents 1c1ec8c + 9cbcc1f
commit 1994f15
Show file tree

Hide file tree

Showing 823 changed files with 7,737 additions and 6,460 deletions.
diff --git a/.binder/requirements.txt b/.binder/requirements.txt
@@ -1,4 +1,4 @@
---find-links https://pypi.anaconda.org/scipy-wheels-nightly/simple/scikit-learn
+--find-links https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/scikit-learn
 --pre
 matplotlib
 scikit-image

diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -11,7 +11,7 @@ jobs:
           command: |
             source build_tools/shared.sh
             # Include pytest compatibility with mypy
-            pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
+            pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
       - run:
           name: linting
           command: ./build_tools/linting.sh

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -28,3 +28,6 @@ d4aad64b1eb2e42e76f49db2ccfbe4b4660d092b
 
 # PR 26110: Update black to 23.3.0
 893d5accaf9d16f447645e704f85a216187564f7
+
+# PR 26649: Add isort and ruff rules
+42173fdb34b5aded79664e045cada719dfbe39dc
diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py
@@ -1,10 +1,11 @@
 """Labels PRs based on title. Must be run in a github action with the
 pull_request_target event."""
-from github import Github
-import os
 import json
+import os
 import re
 
+from github import Github
+
 context_dict = json.loads(os.getenv("CONTEXT_GITHUB"))
 
 repo = context_dict["repository"]

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,103 @@
+# This linter job on GH actions is used to trigger the commenter bot
+# in bot-lint-comment.yml file. It stores the output of the linter to be used
+# by the commenter bot.
+name: linter
+
+on:
+  - pull_request_target
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    # setting any permission will set everything else to none for GITHUB_TOKEN
+    permissions:
+      pull-requests: none
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.11
+
+      - name: Install dependencies
+        run: |
+          source build_tools/shared.sh
+          # Include pytest compatibility with mypy
+          pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
+          # we save the versions of the linters to be used in the error message later.
+          python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt
+          python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt
+          python -c "from importlib.metadata import version; print(f\"black={version('black')}\")" >> /tmp/versions.txt
+          python -c "from importlib.metadata import version; print(f\"cython-lint={version('cython-lint')}\")" >> /tmp/versions.txt
+
+      - name: Run linting
+        id: lint-script
+        # We download the linting script from main, since this workflow is run
+        # from main itself.
+        run: |
+          curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/linting.sh --retry 5 -o ./build_tools/linting.sh
+          set +e
+          ./build_tools/linting.sh &> /tmp/linting_output.txt
+          cat /tmp/linting_output.txt
+
+      - name: Upload Artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: lint-log
+          path: |
+            /tmp/linting_output.txt
+            /tmp/versions.txt
+          retention-days: 1
+
+  comment:
+    needs: lint
+    if: always()
+    runs-on: ubuntu-latest
+
+    # We need these permissions to be able to post / update comments
+    permissions:
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.11
+
+      - name: Install dependencies
+        run: python -m pip install requests
+
+      - name: Download artifact
+        id: download-artifact
+        uses: actions/download-artifact@v3
+        with:
+          name: lint-log
+
+      - name: Print log
+        run: cat linting_output.txt
+
+      - name: Process Comments
+        id: process-comments
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          BRANCH_SHA: ${{ github.event.pull_request.head.sha }}
+          RUN_ID: ${{ github.run_id }}
+          LOG_FILE: linting_output.txt
+          VERSIONS_FILE: versions.txt
+        run: python ./build_tools/get_comment.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,13 +9,14 @@ repos:
     rev: 23.3.0
     hooks:
     -   id: black
--   repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.0.272
     hooks:
-    -   id: flake8
-        types: [file, python]
+    -   id: ruff
+        args: ["--fix", "--show-source"]
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.961
+    rev: v1.3.0
     hooks:
      -  id: mypy
         files: sklearn/

diff --git a/Makefile b/Makefile
@@ -61,8 +61,7 @@ doc-noplot: inplace
 	$(MAKE) -C doc html-noplot
 
 code-analysis:
-	flake8 sklearn | grep -v __init__ | grep -v external
-	pylint -E -i y sklearn/ -d E1103,E0611,E1101
+	build_tools/linting.sh
 
 build-dev:
 	pip install --verbose --no-build-isolation --editable .
diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py
@@ -1,7 +1,7 @@
 from sklearn.cluster import KMeans, MiniBatchKMeans
 
 from .common import Benchmark, Estimator, Predictor, Transformer
-from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset
+from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset
 from .utils import neg_mean_inertia
 
 

diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py
@@ -1,11 +1,11 @@
-import os
+import itertools
 import json
-import timeit
+import os
 import pickle
-import itertools
+import timeit
 from abc import ABC, abstractmethod
-from pathlib import Path
 from multiprocessing import cpu_count
+from pathlib import Path
 
 import numpy as np
 

diff --git a/asv_benchmarks/benchmarks/datasets.py b/asv_benchmarks/benchmarks/datasets.py
@@ -1,21 +1,22 @@
+from pathlib import Path
+
 import numpy as np
 import scipy.sparse as sp
 from joblib import Memory
-from pathlib import Path
 
-from sklearn.decomposition import TruncatedSVD
 from sklearn.datasets import (
-    make_blobs,
     fetch_20newsgroups,
+    fetch_olivetti_faces,
     fetch_openml,
     load_digits,
-    make_regression,
+    make_blobs,
     make_classification,
-    fetch_olivetti_faces,
+    make_regression,
 )
-from sklearn.preprocessing import MaxAbsScaler, StandardScaler
+from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MaxAbsScaler, StandardScaler
 
 # memory location for caching datasets
 M = Memory(location=str(Path(__file__).resolve().parent / "cache"))

diff --git a/asv_benchmarks/benchmarks/decomposition.py b/asv_benchmarks/benchmarks/decomposition.py
@@ -1,8 +1,8 @@
 from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning
 
 from .common import Benchmark, Estimator, Transformer
-from .datasets import _olivetti_faces_dataset, _mnist_dataset
-from .utils import make_pca_scorers, make_dict_learning_scorers
+from .datasets import _mnist_dataset, _olivetti_faces_dataset
+from .utils import make_dict_learning_scorers, make_pca_scorers
 
 
 class PCABenchmark(Transformer, Estimator, Benchmark):

diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py
@@ -1,7 +1,7 @@
 from sklearn.ensemble import (
-    RandomForestClassifier,
     GradientBoostingClassifier,
     HistGradientBoostingClassifier,
+    RandomForestClassifier,
 )
 
 from .common import Benchmark, Estimator, Predictor

diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py
@@ -1,9 +1,9 @@
 from sklearn.linear_model import (
-    LogisticRegression,
-    Ridge,
     ElasticNet,
     Lasso,
     LinearRegression,
+    LogisticRegression,
+    Ridge,
     SGDRegressor,
 )
 

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -35,7 +35,7 @@ jobs:
     - bash: |
         source build_tools/shared.sh
         # Include pytest compatibility with mypy
-        pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
+        pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
       displayName: Install linters
     - bash: |
         ./build_tools/linting.sh

diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py
@@ -1,18 +1,19 @@
-from time import time
 import argparse
-import numpy as np
+from time import time
 
-from sklearn.dummy import DummyClassifier
+import numpy as np
 
 from sklearn.datasets import fetch_20newsgroups_vectorized
-from sklearn.metrics import accuracy_score
-from sklearn.utils.validation import check_array
-
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.ensemble import AdaBoostClassifier
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import (
+    AdaBoostClassifier,
+    ExtraTreesClassifier,
+    RandomForestClassifier,
+)
 from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
 from sklearn.naive_bayes import MultinomialNB
+from sklearn.utils.validation import check_array
 
 ESTIMATORS = {
     "dummy": DummyClassifier(),

diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py
@@ -45,20 +45,24 @@
 #         Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: BSD 3 clause
 
+import argparse
 import os
 from time import time
-import argparse
+
 import numpy as np
 from joblib import Memory
 
 from sklearn.datasets import fetch_covtype, get_data_home
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import SGDClassifier, LogisticRegression
+from sklearn.ensemble import (
+    ExtraTreesClassifier,
+    GradientBoostingClassifier,
+    RandomForestClassifier,
+)
+from sklearn.linear_model import LogisticRegression, SGDClassifier
+from sklearn.metrics import zero_one_loss
 from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import LinearSVC
 from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
-from sklearn.ensemble import GradientBoostingClassifier
-from sklearn.metrics import zero_one_loss
 from sklearn.utils import check_array
 
 # Memoize the data extraction and memory map the resulting

diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py
@@ -1,8 +1,10 @@
+from time import time
+
 import matplotlib.pyplot as plt
 import numpy as np
 import scipy.sparse as sparse
+
 from sklearn.preprocessing import PolynomialFeatures
-from time import time
 
 degree = 2
 trials = 3

diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py
@@ -5,9 +5,10 @@
 
 """
 from datetime import datetime
+
 import numpy as np
-from sklearn import linear_model
 
+from sklearn import linear_model
 
 if __name__ == "__main__":
     import matplotlib.pyplot as plt

diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py
@@ -16,9 +16,11 @@
 
 In both cases, only 10% of the features are informative.
 """
-import numpy as np
 import gc
 from time import time
+
+import numpy as np
+
 from sklearn.datasets import make_regression
 
 alpha = 0.1
@@ -45,11 +47,11 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
 
 
 if __name__ == "__main__":
-    from glmnet.elastic_net import Lasso as GlmnetLasso
-    from sklearn.linear_model import Lasso as ScikitLasso
-
     # Delayed import of matplotlib.pyplot
     import matplotlib.pyplot as plt
+    from glmnet.elastic_net import Lasso as GlmnetLasso
+
+    from sklearn.linear_model import Lasso as ScikitLasso
 
     scikit_results = []
     glmnet_results = []