Skip to content

Commit

Permalink
Developer tools (#8)
Browse files Browse the repository at this point in the history
Add linting, formatting, and type-checking to the project via pre-commit, as well as CI testing and linting. Also, make necessary changes to get all checks passing.
  • Loading branch information
aazuspan committed Apr 24, 2023
1 parent ae2d557 commit 950b0bb
Show file tree
Hide file tree
Showing 15 changed files with 165 additions and 32 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Test and lint
on: [push, pull_request]

permissions:
contents: read

jobs:
test:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]

runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[dev]
- name: Run tests
run: pytest
lint:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[dev]
- name: Sourcery login
run: sourcery login --token ${{ secrets.SOURCERY_TOKEN }}

- name: Set up pre-commit
uses: pre-commit/action@v3.0.0
8 changes: 5 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
__pycache__
*.egg-info
*.R
__pycache__/
*.egg-info/
.mypy_cache/
.pytest_cache/
.coverage
32 changes: 32 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
repos:
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.2.0
hooks:
- id: mypy
exclude: ^tests/|^setup.py
args: [--ignore-missing-imports]

- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args: [--profile, black]

- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
args: [--max-line-length, '88']
additional_dependencies:
- flake8-bugbear

- repo: https://github.com/sourcery-ai/sourcery
rev: v1.2.0
hooks:
- id: sourcery
args: [--diff=git diff HEAD, --no-summary]
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# scikit-learn-knn

This package is in active development.

## Developer Guide

### Setup

After cloning the repository, install the package in editable mode with the development dependencies using:

```bash
$ pip install -e .[dev]
```

### Pre-commit

This project uses [pre-commit](https://pre-commit.com/) to run testing, linting, type-checking, and formatting. You can run pre-commit manually with:

```bash
$ pre-commit run --all-files
```

...or install it to run automatically before every commit with:

```bash
$ pre-commit install
```

### Testing

Unit tests are *not* run by `pre-commit`, but can be run manually with:

```bash
$ pytest
```

Measure test coverage with:

```bash
$ pytest --cov=sklearn_knn
```
19 changes: 14 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from setuptools import setup, find_packages

VERSION = "0.1.0"
from setuptools import find_packages, setup


def setup_package():
Expand All @@ -10,19 +8,30 @@ def setup_package():
author_email="matt.gregory@oregonstate.edu",
description="Scikit-learn estimators for kNN methods",
url="http://github.com/lemma-osu/scikit-learn-knn/",
version=VERSION,
version="0.1.0",
package_dir={"": "src"},
packages=find_packages(where="src"),
python_requires=">=3.8",
install_requires=[
"numpy",
"scikit-learn",
],
extras_require={
"dev": [
"black",
"bumpversion",
"flake8",
"flake8-bugbear",
"isort",
"pytest",
"pytest-cov",
"pandas",
"pre-commit",
"mypy",
"twine",
"sourcery",
],
}
},
)
setup(**metadata)

Expand Down
4 changes: 2 additions & 2 deletions src/sklearn_knn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from ._raw import Raw
from ._euclidean import Euclidean
from ._gnn import GNN
from ._mahalanobis import Mahalanobis
from ._msn import MSN
from ._gnn import GNN
from ._raw import Raw

__all__ = [
"Raw",
Expand Down
7 changes: 4 additions & 3 deletions src/sklearn_knn/_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_is_fitted
Expand All @@ -10,24 +9,26 @@ class IDNeighborsClassifier(KNeighborsClassifier):
Specialized KNeighborsClassifier where labels
are IDs for samples and not classes
"""

def kneighbor_ids(self, X=None, n_neighbors=None):
neigh_ind = super().kneighbors(X, n_neighbors, False)
return self.classes_[self._y[neigh_ind]]


class TransformedKNeighborsMixin(KNeighborsClassifier):
"""
Mixin for KNeighbors classifiers that store a `transform_` during fitting
Mixin for KNeighbors classifiers that store a `transform_` during fitting
(e.g. GNN).
"""

def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
if X is not None:
check_is_fitted(self, "transform_")
X = self.transform_.transform(X)
return super().kneighbors(
X=X, n_neighbors=n_neighbors, return_distance=return_distance
)


class MyStandardScaler(StandardScaler):
def fit(self, X, y=None, sample_weight=None):
Expand Down
3 changes: 2 additions & 1 deletion src/sklearn_knn/_euclidean.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from sklearn.utils.validation import check_is_fitted
from ._base import MyStandardScaler, IDNeighborsClassifier, TransformedKNeighborsMixin

from ._base import IDNeighborsClassifier, MyStandardScaler, TransformedKNeighborsMixin


class Euclidean(IDNeighborsClassifier, TransformedKNeighborsMixin):
Expand Down
1 change: 1 addition & 0 deletions src/sklearn_knn/_gnn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from sklearn.utils.validation import check_is_fitted

from ._base import IDNeighborsClassifier, TransformedKNeighborsMixin
from .original._cca_transformer import CCATransformer

Expand Down
2 changes: 1 addition & 1 deletion src/sklearn_knn/_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


class Raw(IDNeighborsClassifier):
pass
pass
6 changes: 2 additions & 4 deletions src/sklearn_knn/original/_cca.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
import math
from dataclasses import dataclass

import numpy as np

Expand Down Expand Up @@ -202,9 +202,7 @@ def site_wa_scores(self):
def species_tolerances(self):
xi = self.site_lc_scores
uk = self.species_scores
xiuk = np.zeros(
(uk.shape[0], xi.shape[0], xi.shape[1]), dtype=np.float64
)
xiuk = np.zeros((uk.shape[0], xi.shape[0], xi.shape[1]), dtype=np.float64)
for i, s in enumerate(uk):
xiuk[i] = xi - s
y = self.Y.T
Expand Down
1 change: 0 additions & 1 deletion src/sklearn_knn/original/_cca_transformer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin

from ._cca import CCA
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, project, method, k=5):
self.ref_neighbors = ref_neighbors_df.loc[:, cols].values
self.trg_distances = trg_distances_df.loc[:, cols].values
self.trg_neighbors = trg_neighbors_df.loc[:, cols].values

self.X = env_df.iloc[:, 1:].values
self.y = spp_df.iloc[:, 1:].values
self.ids = env_df.iloc[:, 0].values
Expand All @@ -42,4 +42,4 @@ def moscow_euc():

@pytest.fixture
def moscow_gnn():
return Dataset(project="moscow", method="gnn", k=5)
return Dataset(project="moscow", method="gnn", k=5)
18 changes: 10 additions & 8 deletions tests/test_estimators.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
from typing import List

import pytest

# from sklearn.utils.estimator_checks import parametrize_with_checks
from sklearn.utils.validation import NotFittedError
from sklearn.utils.estimator_checks import parametrize_with_checks
from sklearn_knn import Euclidean, Raw, GNN

from sklearn_knn import GNN, Euclidean, Raw
from sklearn_knn._base import IDNeighborsClassifier
from typing import List


def get_kneighbor_estimator_instances() -> List[IDNeighborsClassifier]:
"""
Return instances of all supported IDNeighborsClassifier estimators.
"""
return [
Raw(),
Raw(),
Euclidean(),
GNN(),
]


# Note: This will run all the sklearn estimator checks. It's going to take quite a bit of work
# to get these all passing, and it's possible we just won't be able to do it while maintaining
# all the features we need.
# Note: This will run all the sklearn estimator checks. It's going to take quite a bit
# of work to get these all passing, and it's possible we just won't be able to do it
# while maintaining all the features we need.
# @parametrize_with_checks(get_kneighbor_estimator_instances())
# def test_sklearn_compatibile_estimators(estimator, check):
# check(estimator)
Expand All @@ -44,4 +47,3 @@ def test_estimators_raise_notfitted_predict(estimator, moscow_euc):
"""Attempting to call predict on an unfitted estimator should raise."""
with pytest.raises(NotFittedError):
estimator.predict(moscow_euc.X)

4 changes: 2 additions & 2 deletions tests/test_port.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from numpy.testing import assert_array_equal, assert_array_almost_equal
from numpy.testing import assert_array_almost_equal, assert_array_equal
from sklearn.model_selection import train_test_split

from sklearn_knn import Raw, Euclidean, GNN
from sklearn_knn import GNN, Euclidean, Raw


def test_moscow_raw(moscow_raw):
Expand Down

0 comments on commit 950b0bb

Please sign in to comment.