Developer tools (#8)

Add linting, formatting, and type-checking to the project via pre-commit, as well as CI testing and linting. Also, make necessary changes to get all checks passing.
lemma-osu · Apr 24, 2023 · 950b0bb · 950b0bb
1 parent ae2d557
commit 950b0bb
Show file tree

Hide file tree

Showing 15 changed files with 165 additions and 32 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,47 @@
+name: Test and lint
+on: [push, pull_request]
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[dev]
+
+      - name: Run tests
+        run: pytest
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[dev]
+
+      - name: Sourcery login
+        run: sourcery login --token ${{ secrets.SOURCERY_TOKEN }}
+
+      - name: Set up pre-commit
+        uses: pre-commit/action@v3.0.0
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
-__pycache__
-*.egg-info
-*.R
+__pycache__/
+*.egg-info/
+.mypy_cache/
+.pytest_cache/
+.coverage
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+repos:
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.2.0
+  hooks:
+  - id: mypy
+    exclude: ^tests/|^setup.py
+    args: [--ignore-missing-imports]
+
+- repo: https://github.com/pycqa/isort
+  rev: 5.12.0
+  hooks:
+  - id: isort
+    args: [--profile, black]
+
+- repo: https://github.com/psf/black
+  rev: 22.10.0
+  hooks:
+  - id: black
+
+- repo: https://github.com/pycqa/flake8
+  rev: 6.0.0
+  hooks:
+  - id: flake8
+    args: [--max-line-length, '88']
+    additional_dependencies:
+    - flake8-bugbear
+
+- repo: https://github.com/sourcery-ai/sourcery
+  rev: v1.2.0
+  hooks:
+  - id: sourcery
+    args: [--diff=git diff HEAD, --no-summary]
diff --git a/README.md b/README.md
@@ -0,0 +1,41 @@
+# scikit-learn-knn
+
+This package is in active development.
+
+## Developer Guide
+
+### Setup
+
+After cloning the repository, install the package in editable mode with the development dependencies using:
+
+```bash
+$ pip install -e .[dev]
+```
+
+### Pre-commit
+
+This project uses [pre-commit](https://pre-commit.com/) to run testing, linting, type-checking, and formatting. You can run pre-commit manually with:
+
+```bash
+$ pre-commit run --all-files
+```
+
+...or install it to run automatically before every commit with:
+
+```bash
+$ pre-commit install
+```
+
+### Testing
+
+Unit tests are *not* run by `pre-commit`, but can be run manually with:
+
+```bash
+$ pytest
+```
+
+Measure test coverage with:
+
+```bash
+$ pytest --cov=sklearn_knn
+```
diff --git a/setup.py b/setup.py
@@ -1,6 +1,4 @@
-from setuptools import setup, find_packages
-
-VERSION = "0.1.0"
+from setuptools import find_packages, setup
 
 
 def setup_package():
@@ -10,19 +8,30 @@ def setup_package():
         author_email="matt.gregory@oregonstate.edu",
         description="Scikit-learn estimators for kNN methods",
         url="http://github.com/lemma-osu/scikit-learn-knn/",
-        version=VERSION,
+        version="0.1.0",
         package_dir={"": "src"},
         packages=find_packages(where="src"),
+        python_requires=">=3.8",
         install_requires=[
             "numpy",
             "scikit-learn",
         ],
         extras_require={
             "dev": [
+                "black",
+                "bumpversion",
+                "flake8",
+                "flake8-bugbear",
+                "isort",
                 "pytest",
+                "pytest-cov",
                 "pandas",
+                "pre-commit",
+                "mypy",
+                "twine",
+                "sourcery",
             ],
-        }
+        },
     )
     setup(**metadata)
 

diff --git a/src/sklearn_knn/__init__.py b/src/sklearn_knn/__init__.py
@@ -1,8 +1,8 @@
-from ._raw import Raw
 from ._euclidean import Euclidean
+from ._gnn import GNN
 from ._mahalanobis import Mahalanobis
 from ._msn import MSN
-from ._gnn import GNN
+from ._raw import Raw
 
 __all__ = [
     "Raw",

diff --git a/src/sklearn_knn/_base.py b/src/sklearn_knn/_base.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils.validation import check_is_fitted
@@ -10,24 +9,26 @@ class IDNeighborsClassifier(KNeighborsClassifier):
     Specialized KNeighborsClassifier where labels
     are IDs for samples and not classes
     """
+
     def kneighbor_ids(self, X=None, n_neighbors=None):
         neigh_ind = super().kneighbors(X, n_neighbors, False)
         return self.classes_[self._y[neigh_ind]]
 
 
 class TransformedKNeighborsMixin(KNeighborsClassifier):
     """
-    Mixin for KNeighbors classifiers that store a `transform_` during fitting 
+    Mixin for KNeighbors classifiers that store a `transform_` during fitting
     (e.g. GNN).
     """
+
     def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         if X is not None:
             check_is_fitted(self, "transform_")
             X = self.transform_.transform(X)
         return super().kneighbors(
             X=X, n_neighbors=n_neighbors, return_distance=return_distance
         )
-    
+
 
 class MyStandardScaler(StandardScaler):
     def fit(self, X, y=None, sample_weight=None):

diff --git a/src/sklearn_knn/_euclidean.py b/src/sklearn_knn/_euclidean.py
@@ -1,5 +1,6 @@
 from sklearn.utils.validation import check_is_fitted
-from ._base import MyStandardScaler, IDNeighborsClassifier, TransformedKNeighborsMixin
+
+from ._base import IDNeighborsClassifier, MyStandardScaler, TransformedKNeighborsMixin
 
 
 class Euclidean(IDNeighborsClassifier, TransformedKNeighborsMixin):

diff --git a/src/sklearn_knn/_gnn.py b/src/sklearn_knn/_gnn.py
@@ -1,4 +1,5 @@
 from sklearn.utils.validation import check_is_fitted
+
 from ._base import IDNeighborsClassifier, TransformedKNeighborsMixin
 from .original._cca_transformer import CCATransformer
 

diff --git a/src/sklearn_knn/_raw.py b/src/sklearn_knn/_raw.py
@@ -2,4 +2,4 @@
 
 
 class Raw(IDNeighborsClassifier):
-    pass
+    pass
diff --git a/src/sklearn_knn/original/_cca.py b/src/sklearn_knn/original/_cca.py
@@ -1,5 +1,5 @@
-from dataclasses import dataclass
 import math
+from dataclasses import dataclass
 
 import numpy as np
 
@@ -202,9 +202,7 @@ def site_wa_scores(self):
     def species_tolerances(self):
         xi = self.site_lc_scores
         uk = self.species_scores
-        xiuk = np.zeros(
-            (uk.shape[0], xi.shape[0], xi.shape[1]), dtype=np.float64
-        )
+        xiuk = np.zeros((uk.shape[0], xi.shape[0], xi.shape[1]), dtype=np.float64)
         for i, s in enumerate(uk):
             xiuk[i] = xi - s
         y = self.Y.T

diff --git a/src/sklearn_knn/original/_cca_transformer.py b/src/sklearn_knn/original/_cca_transformer.py
@@ -1,4 +1,3 @@
-import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin
 
 from ._cca import CCA

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -24,7 +24,7 @@ def __init__(self, project, method, k=5):
         self.ref_neighbors = ref_neighbors_df.loc[:, cols].values
         self.trg_distances = trg_distances_df.loc[:, cols].values
         self.trg_neighbors = trg_neighbors_df.loc[:, cols].values
-        
+
         self.X = env_df.iloc[:, 1:].values
         self.y = spp_df.iloc[:, 1:].values
         self.ids = env_df.iloc[:, 0].values
@@ -42,4 +42,4 @@ def moscow_euc():
 
 @pytest.fixture
 def moscow_gnn():
-    return Dataset(project="moscow", method="gnn", k=5)
+    return Dataset(project="moscow", method="gnn", k=5)
diff --git a/tests/test_estimators.py b/tests/test_estimators.py
@@ -1,25 +1,28 @@
+from typing import List
+
 import pytest
 
+# from sklearn.utils.estimator_checks import parametrize_with_checks
 from sklearn.utils.validation import NotFittedError
-from sklearn.utils.estimator_checks import parametrize_with_checks
-from sklearn_knn import Euclidean, Raw, GNN
+
+from sklearn_knn import GNN, Euclidean, Raw
 from sklearn_knn._base import IDNeighborsClassifier
-from typing import List
+
 
 def get_kneighbor_estimator_instances() -> List[IDNeighborsClassifier]:
     """
     Return instances of all supported IDNeighborsClassifier estimators.
     """
     return [
-        Raw(), 
+        Raw(),
         Euclidean(),
         GNN(),
     ]
 
 
-# Note: This will run all the sklearn estimator checks. It's going to take quite a bit of work
-# to get these all passing, and it's possible we just won't be able to do it while maintaining
-# all the features we need. 
+# Note: This will run all the sklearn estimator checks. It's going to take quite a bit
+# of work to get these all passing, and it's possible we just won't be able to do it
+# while maintaining all the features we need.
 # @parametrize_with_checks(get_kneighbor_estimator_instances())
 # def test_sklearn_compatibile_estimators(estimator, check):
 #     check(estimator)
@@ -44,4 +47,3 @@ def test_estimators_raise_notfitted_predict(estimator, moscow_euc):
     """Attempting to call predict on an unfitted estimator should raise."""
     with pytest.raises(NotFittedError):
         estimator.predict(moscow_euc.X)
-
diff --git a/tests/test_port.py b/tests/test_port.py
@@ -1,7 +1,7 @@
-from numpy.testing import assert_array_equal, assert_array_almost_equal
+from numpy.testing import assert_array_almost_equal, assert_array_equal
 from sklearn.model_selection import train_test_split
 
-from sklearn_knn import Raw, Euclidean, GNN
+from sklearn_knn import GNN, Euclidean, Raw
 
 
 def test_moscow_raw(moscow_raw):