Skip to content

Commit

Permalink
Merge branch 'main' into feature/ohe_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Chip2916 committed Jul 9, 2024
2 parents d2ffa71 + 9b901d7 commit 1c05bf2
Show file tree
Hide file tree
Showing 8 changed files with 268 additions and 311 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@ Subsections for each version can be one of the following;
- ``Security`` in case of vulnerabilities.

Each individual change should have a link to the pull request after the description of the change.

1.3.1 (unreleased)
------------------

Changed
^^^^^^^

- Refactored NominalToIntegerTransformer tests in new format `#261 <https://github.com/lvgig/tubular/pull/261>`_
- Refactored GroupRareLevelsTransformer tests in new format `#259 <https://github.com/lvgig/tubular/pull/259>`_
- DatetimeInfoExtractor.mappings_provided changed from a dict.keys() object to list so transformer is serialisable. `#258 <https://github.com/lvgig/tubular/pull/258>`_
- Created BaseNumericTransformer class to support test refactor of numeric file

1.3.0 (2024-06-13)
------------------
Expand Down
29 changes: 17 additions & 12 deletions tests/base_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import sklearn.base as b
import test_aide as ta

import tests.test_data as d


class GenericInitTests:
"""
Expand Down Expand Up @@ -288,10 +286,11 @@ def test_X_non_df_error(
self,
initialized_transformers,
non_df,
minimal_dataframe_lookup,
):
"""Test an error is raised if X is not passed as a pd.DataFrame."""

df = d.create_numeric_df_1()
df = minimal_dataframe_lookup[self.transformer_name]

x = initialized_transformers[self.transformer_name]

Expand All @@ -306,10 +305,11 @@ def test_non_pd_type_error(
self,
non_series,
initialized_transformers,
minimal_dataframe_lookup,
):
"""Test an error is raised if y is not passed as a pd.Series."""

df = d.create_numeric_df_1()
df = minimal_dataframe_lookup[self.transformer_name]

x = initialized_transformers[self.transformer_name]

Expand Down Expand Up @@ -338,12 +338,13 @@ def test_X_no_rows_error(
def test_Y_no_rows_error(
self,
initialized_transformers,
minimal_dataframe_lookup,
):
"""Test an error is raised if Y has no rows."""

x = initialized_transformers[self.transformer_name]

df = pd.DataFrame({"a": 1, "b": "wow", "c": np.nan}, index=[0])
df = minimal_dataframe_lookup[self.transformer_name]

with pytest.raises(
ValueError,
Expand Down Expand Up @@ -549,10 +550,11 @@ def test_non_pd_type_error(
self,
non_df,
initialized_transformers,
minimal_dataframe_lookup,
):
"""Test that an error is raised in transform is X is not a pd.DataFrame."""

df = d.create_df_8()
df = minimal_dataframe_lookup[self.transformer_name]

x = initialized_transformers[self.transformer_name]

Expand All @@ -564,19 +566,20 @@ def test_non_pd_type_error(
):
x_fitted.transform(X=non_df)

def test_no_rows_error(self, initialized_transformers):
def test_no_rows_error(self, initialized_transformers, minimal_dataframe_lookup):
"""Test an error is raised if X has no rows."""
df = d.create_df_8()

df = minimal_dataframe_lookup[self.transformer_name]

x = initialized_transformers[self.transformer_name]

x = x.fit(df, df["c"])

df = pd.DataFrame(columns=["a", "b", "c"])
df = df.head(0)

with pytest.raises(
ValueError,
match=re.escape(f"{self.transformer_name}: X has no rows; (0, 3)"),
match=re.escape(f"{self.transformer_name}: X has no rows; {df.shape}"),
):
x.transform(df)

Expand Down Expand Up @@ -695,9 +698,10 @@ def test_columns_not_list_error(
self,
non_list,
initialized_transformers,
minimal_dataframe_lookup,
):
"""Test an error is raised if self.columns is not a list."""
df = d.create_df_1()
df = minimal_dataframe_lookup[self.transformer_name]

x = initialized_transformers[self.transformer_name]

Expand All @@ -712,9 +716,10 @@ def test_columns_not_list_error(
def test_columns_not_in_X_error(
self,
initialized_transformers,
minimal_dataframe_lookup,
):
"""Test an error is raised if self.columns contains a value not in X."""
df = d.create_df_1()
df = minimal_dataframe_lookup[self.transformer_name]

x = initialized_transformers[self.transformer_name]

Expand Down
5 changes: 4 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ def minimal_attribute_dict():
"BaseNominalTransformer": {
"columns": ["b"],
},
"BaseNumericTransformer": {
"columns": ["a", "b"],
},
"BaseTransformer": {
"columns": ["a"],
},
Expand Down Expand Up @@ -233,7 +236,7 @@ def minimal_attribute_dict():
"separator": "-",
},
"OrdinalEncoderTransformer": {
"columns": ["b"],
"columns": ["b"],

Check failure on line 239 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / lint (3.8)

Ruff (E241)

tests/conftest.py:239:30: E241 Multiple spaces after comma

Check failure on line 239 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / lint (3.8)

Ruff (W291)

tests/conftest.py:239:30: W291 Trailing whitespace

Check failure on line 239 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / lint (3.8)

Ruff (E241)

tests/conftest.py:239:30: E241 Multiple spaces after comma

Check failure on line 239 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / lint (3.8)

Ruff (W291)

tests/conftest.py:239:30: W291 Trailing whitespace
},
"OutOfRangeNullTransformer": {
"capping_values": {"a": [0.1, 0.2]},
Expand Down
28 changes: 4 additions & 24 deletions tests/nominal/test_BaseNominalTransformer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import re

import pandas as pd
import pytest
from sklearn.exceptions import NotFittedError
Expand All @@ -16,7 +14,7 @@
# The first part of this file builds out the tests for BaseNominalTransformer so that they can be
# imported into other test files (by not starting the class name with Test)
# The second part actually calls these tests (along with all other require tests) for the BaseNominalTransformer
class GenericBaseNominalTransformerTests:
class GenericNominalTransformTests(GenericTransformTests):
"""
Tests for BaseNominalTransformer.transform().
Note this deliberately avoids starting with "Tests" so that the tests are not run on import.
Expand All @@ -29,7 +27,7 @@ def test_not_fitted_error_raised(self, initialized_transformers):
with pytest.raises(NotFittedError):
initialized_transformers[self.transformer_name].transform(df)

def test_exception_raised(self, initialized_transformers):
def test_non_mappable_rows_exception_raised(self, initialized_transformers):
"""Test an exception is raised if non-mappable rows are present in X."""
df = d.create_df_1()

Expand All @@ -44,7 +42,7 @@ def test_exception_raised(self, initialized_transformers):

with pytest.raises(
ValueError,
match="BaseNominalTransformer: nulls would be introduced into column b from levels not present in mapping",
match=f"{self.transformer_name}: nulls would be introduced into column b from levels not present in mapping",
):
x.transform(df)

Expand All @@ -63,24 +61,6 @@ def test_original_df_not_updated(self, initialized_transformers):

pd.testing.assert_frame_equal(df, d.create_df_1())

def test_no_rows_error(self, initialized_transformers):
"""Test an error is raised if X has no rows."""
df = d.create_df_1()

x = initialized_transformers[self.transformer_name]

x = x.fit(df)

x.mappings = {"b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}}

df = pd.DataFrame(columns=["a", "b", "c"])

with pytest.raises(
ValueError,
match=re.escape(f"{self.transformer_name}: X has no rows; (0, 3)"),
):
x.transform(df)


class TestInit(ColumnStrListInitTests):
"""Generic tests for transformer.init()."""
Expand All @@ -98,7 +78,7 @@ def setup_class(cls):
cls.transformer_name = "BaseNominalTransformer"


class TestTransform(GenericBaseNominalTransformerTests, GenericTransformTests):
class TestTransform(GenericNominalTransformTests):
"""Tests for BaseImputer.transform."""

@classmethod
Expand Down
Loading

0 comments on commit 1c05bf2

Please sign in to comment.