Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/check for NA or Infinity when notears is used #54

Merged
merged 10 commits into from
Sep 8, 2020
4 changes: 4 additions & 0 deletions causalnex/contrib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@
#
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = ["assert_all_finite"]

from .utils.validation import assert_all_finite
46 changes: 46 additions & 0 deletions causalnex/contrib/utils/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Utilities for input validation"""

# Authors: Jean-Baptiste Oger

import numpy as np

def assert_all_finite(
X: np.ndarray,
allow_nan: bool = False
):
"""Throw a ValueError if X contains NaN or Infinity.

Based on Sklearn method to handle NaN & Infinity.
@inproceedings{sklearn_api,
author = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and
Fabian Pedregosa and Andreas Mueller and Olivier Grisel and
Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort
and Jaques Grobler and Robert Layton and Jake VanderPlas and
Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux},
title = {{API} design for machine learning software: experiences from the scikit-learn
project},
booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning},
year = {2013},
pages = {108--122},
}

Args:
X: array
allow_nan: bool
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved

Raises:
ValueError: If X contains NaN or Infinity
"""
is_float = X.dtype.kind in 'fc'
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
if is_float and (np.isfinite(np.sum(X))):
pass
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
elif is_float:
msg_err = "Input contains {} or a value too large for {!r}."
if (allow_nan and np.isinf(X).any() or
not allow_nan and not np.isfinite(X).all()):
type_err = 'infinity' if allow_nan else 'NaN, infinity'
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
msg_err.format
(type_err,
X.dtype)
)
6 changes: 6 additions & 0 deletions causalnex/structure/notears.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
import scipy.linalg as slin
import scipy.optimize as sopt

from causalnex.contrib.utils.validation import assert_all_finite
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved

from causalnex.structure.structuremodel import StructureModel

__all__ = ["from_numpy", "from_pandas", "from_numpy_lasso", "from_pandas_lasso"]
Expand Down Expand Up @@ -101,6 +103,8 @@ def from_numpy(

# n examples, d properties
_, d = X.shape

assert_all_finite(X, allow_nan=False)
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved

bnds = [
(0, 0)
Expand Down Expand Up @@ -162,6 +166,8 @@ def from_numpy_lasso(
# n examples, d properties
_, d = X.shape

assert_all_finite(X, allow_nan=False)

bnds = [
(0, 0)
if i == j
Expand Down
20 changes: 20 additions & 0 deletions tests/contrib/test_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved

import numpy as np

from causalnex.contrib.utils.validation import (
assert_all_finite
)

class TestValidation:
def test_array_with_nan_raises_error(self):
with pytest.raises(ValueError, match="Input contains NaN, infinity or a value too large for dtype*"):
arr = np.ones((1,1))
arr[0,0] = np.nan
assert_all_finite(arr)

def test_array_with_inf_raises_error(self):
with pytest.raises(ValueError, match="Input contains NaN, infinity or a value too large for dtype*"):
arr = np.ones((1,1))
arr[0,0] = np.inf
assert_all_finite(arr)
64 changes: 64 additions & 0 deletions tests/structure/test_notears.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,22 @@ def test_non_numeric_data_raises_error(self):
with pytest.raises(ValueError, match="All columns must have numeric data.*"):
from_pandas(pd.DataFrame(data=["x"], columns=["a"]))

def test_array_with_nan_raises_error(self):
"""
Providing a data set including nan should result in a Value Error explaining that data contains nan.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError, match="Input contains NaN, infinity or a value too large for dtype*"):
from_pandas(pd.DataFrame(data=[np.nan], columns=["a"]))

def test_array_with_inf_raises_error(self):
"""
Providing a data set including infinite values should result in a Value Error explaining that data contains infinite values.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError):
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
from_pandas(pd.DataFrame(data=[np.inf], columns=["a"]))

def test_single_iter_gets_converged_fail_warnings(self, train_data_idx):
"""
With a single iteration on this dataset, learn_structure fails to converge and should give warnings.
Expand Down Expand Up @@ -206,6 +222,22 @@ def test_non_numeric_data_raises_error(self):
with pytest.raises(ValueError, match="All columns must have numeric data.*"):
from_pandas_lasso(pd.DataFrame(data=["x"], columns=["a"]), 0.1)

def test_array_with_nan_raises_error(self):
"""
Providing a data set including nan should result in a Value Error explaining that data contains nan.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError):
from_pandas_lasso(pd.DataFrame(data=[np.nan], columns=["a"]), 0.1)

def test_array_with_inf_raises_error(self):
"""
Providing a data set including infinite values should result in a Value Error explaining that data contains infinite values.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError):
from_pandas_lasso(pd.DataFrame(data=[np.inf], columns=["a"]), 0.1)

def test_single_iter_gets_converged_fail_warnings(self, train_data_idx):
"""
With a single iteration on this dataset, learn_structure fails to converge and should give warnings.
Expand Down Expand Up @@ -369,6 +401,22 @@ def test_empty_data_raises_error(self):
with pytest.raises(ValueError):
from_numpy(np.empty([0, 5]))

def test_array_with_nan_raises_error(self):
"""
Providing a data set including nan should result in a Value Error explaining that data contains nan.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError):
from_numpy(np.ones([3, 5])*np.nan)

def test_array_with_inf_raises_error(self):
"""
Providing a data set including infinite values should result in a Value Error explaining that data contains infinite values.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError):
from_numpy(np.ones([3, 5])*np.inf)

def test_single_iter_gets_converged_fail_warnings(self, train_data_idx):
"""
With a single iteration on this dataset, learn_structure fails to converge and should give warnings.
Expand Down Expand Up @@ -489,6 +537,22 @@ def test_empty_data_raises_error(self):
with pytest.raises(ValueError):
from_numpy_lasso(np.empty([0, 5]), 0.1)

def test_array_with_nan_raises_error(self):
"""
Providing a data set including nan should result in a Value Error explaining that data contains nan.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError):
from_numpy_lasso(np.ones([3, 5])*np.nan, 0.1)

def test_array_with_inf_raises_error(self):
"""
Providing a data set including infinite values should result in a Value Error explaining that data contains infinite values.
This error is useful to catch and handle gracefully, because otherwise the user would have empty structures.
"""
with pytest.raises(ValueError):
from_numpy_lasso(np.ones([3, 5])*np.inf, 0.1)

def test_single_iter_gets_converged_fail_warnings(self, train_data_idx):
"""
With a single iteration on this dataset, learn_structure fails to converge and should give warnings.
Expand Down