improve test coverage

keras-team · Aug 3, 2020 · 2d922b4 · 2d922b4
1 parent 9b891f0
commit 2d922b4
Show file tree

Hide file tree

Showing 9 changed files with 240 additions and 12 deletions.
diff --git a/autokeras/adapters/output_adapter.py b/autokeras/adapters/output_adapter.py
@@ -75,6 +75,14 @@ def from_config(cls, config):
         obj.label_encoder = encoders.deserialize(encoder)
         return obj
 
+    def _check_data_shape(self, shape):
+        if len(shape) > 2 or shape[1] != self.num_classes:
+            raise ValueError(
+                "Expect one hot encoded labels to have shape "
+                "(num_instances, {num_classes}), "
+                "but got {shape}.".format(num_classes=self.num_classes, shape=shape)
+            )
+
     def fit_before_convert(self, dataset):
         """Fit the encoder."""
         # If in tf.data.Dataset, must be encoded already.
@@ -91,7 +99,7 @@ def fit_before_convert(self, dataset):
         # TODO: support raw string labels for multi-label.
         if len(dataset.flatten()) != len(dataset):
             if self.num_classes:
-                self._check_data_shape(dataset.shape[1:])
+                self._check_data_shape(dataset.shape)
             return
 
         # Fit encoder.

diff --git a/autokeras/blocks/heads.py b/autokeras/blocks/heads.py
@@ -162,13 +162,6 @@ def get_config(self):
         return config
 
     def build(self, hp, inputs=None):
-        if self.output_dim and self.output_shape[-1] != self.output_dim:
-            raise ValueError(
-                "The data doesn't match the output_dim. "
-                "Expecting {} but got {}".format(
-                    self.output_dim, self.output_shape[-1]
-                )
-            )
         inputs = nest.flatten(inputs)
         utils.validate_num_inputs(inputs, 1)
         input_node = inputs[0]

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = autokeras
-summary = Auto-Keras provides functions to automatically search for architecture and hyperparameters of deep learning models.
+summary = AutoKeras provides functions to automatically search for architecture and hyperparameters of deep learning models.
 license = MIT
 description-file = README.md
 classifier =

diff --git a/shell/format.sh b/shell/format.sh
@@ -6,7 +6,7 @@ do
   if ! grep -q Copyright $i
   then
     echo $i
-    cat copyright.txt $i >$i.new && mv $i.new $i
+    cat shell/copyright.txt $i >$i.new && mv $i.new $i
   fi
 done
 

diff --git a/tests/autokeras/adapters/output_adapter_test.py b/tests/autokeras/adapters/output_adapter_test.py
@@ -33,14 +33,35 @@ def test_clf_from_config_fit_transform_to_dataset():
     assert isinstance(adapter.transform(np.array(["a", "b", "a"])), tf.data.Dataset)
 
 
-def test_transform_pd_series_to_dataset():
+def test_clf_head_transform_pd_series_to_dataset():
     adapter = output_adapter.ClassificationHeadAdapter(name="a")
 
     y = adapter.fit_transform(pd.read_csv(utils.TEST_CSV_PATH).pop("survived"))
 
     assert isinstance(y, tf.data.Dataset)
 
 
+def test_clf_head_transform_df_to_dataset():
+    adapter = output_adapter.ClassificationHeadAdapter(name="a")
+
+    y = adapter.fit_transform(
+        pd.DataFrame(utils.generate_one_hot_labels(dtype="np", num_classes=10))
+    )
+
+    assert isinstance(y, tf.data.Dataset)
+
+
+def test_clf_head_one_hot_shape_error():
+    adapter = output_adapter.ClassificationHeadAdapter(name="a", num_classes=9)
+
+    with pytest.raises(ValueError) as info:
+        adapter.fit_transform(
+            utils.generate_one_hot_labels(dtype="np", num_classes=10)
+        )
+
+    assert "Expect one hot encoded labels to have shape" in str(info.value)
+
+
 def test_unsupported_types_error():
     adapter = output_adapter.ClassificationHeadAdapter(name="a")
 
@@ -61,7 +82,9 @@ def test_one_class_error():
 def test_infer_ten_classes():
     adapter = output_adapter.ClassificationHeadAdapter(name="a")
 
-    adapter.fit(utils.generate_one_hot_labels(dtype="dataset", num_classes=10))
+    adapter.fit_transform(
+        utils.generate_one_hot_labels(dtype="dataset", num_classes=10)
+    )
 
     assert adapter.num_classes == 10
 
@@ -117,3 +140,19 @@ def test_multi_label_postprocess_to_one_hot_labels():
     y = adapter.postprocess(y)
 
     assert set(y.flatten().tolist()) == set([1, 0])
+
+
+def test_reg_head_transform_pd_series():
+    adapter = output_adapter.RegressionHeadAdapter(name="a")
+
+    y = adapter.fit_transform(pd.read_csv(utils.TEST_CSV_PATH).pop("survived"))
+
+    assert isinstance(y, tf.data.Dataset)
+
+
+def test_reg_head_transform_1d_np():
+    adapter = output_adapter.RegressionHeadAdapter(name="a")
+
+    y = adapter.fit_transform(np.random.rand(10))
+
+    assert isinstance(y, tf.data.Dataset)
diff --git a/tests/autokeras/blocks/heads_test.py b/tests/autokeras/blocks/heads_test.py
@@ -15,6 +15,7 @@
 import kerastuner
 import numpy as np
 import tensorflow as tf
+from tensorflow.python.util import nest
 
 import autokeras as ak
 from autokeras import blocks
@@ -52,6 +53,18 @@ def test_multi_label_loss():
     assert head.loss.name == "binary_crossentropy"
 
 
+def test_clf_head_build_with_zero_dropout_return_tensor():
+    block = head_module.ClassificationHead(dropout=0)
+    block.output_shape = (8,)
+
+    outputs = block.build(
+        kerastuner.HyperParameters(), tf.keras.Input(shape=(5,), dtype=tf.float32),
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
 def test_segmentation():
     y = np.array(["a", "a", "c", "b"])
     head = head_module.SegmentationHead(name="a")

diff --git a/tests/autokeras/blocks/preprocessing_test.py b/tests/autokeras/blocks/preprocessing_test.py
@@ -32,6 +32,66 @@ def test_augment_build_return_tensor():
     assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
 
 
+def test_augment_build_with_translation_factor_range_return_tensor():
+    block = blocks.ImageAugmentation(translation_factor=(0, 0.1))
+
+    outputs = block.build(
+        kerastuner.HyperParameters(),
+        tf.keras.Input(shape=(32, 32, 3), dtype=tf.float32),
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
+def test_augment_build_with_no_flip_return_tensor():
+    block = blocks.ImageAugmentation(vertical_flip=False, horizontal_flip=False)
+
+    outputs = block.build(
+        kerastuner.HyperParameters(),
+        tf.keras.Input(shape=(32, 32, 3), dtype=tf.float32),
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
+def test_augment_build_with_vflip_only_return_tensor():
+    block = blocks.ImageAugmentation(vertical_flip=True, horizontal_flip=False)
+
+    outputs = block.build(
+        kerastuner.HyperParameters(),
+        tf.keras.Input(shape=(32, 32, 3), dtype=tf.float32),
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
+def test_augment_build_with_zoom_factor_return_tensor():
+    block = blocks.ImageAugmentation(zoom_factor=0.1)
+
+    outputs = block.build(
+        kerastuner.HyperParameters(),
+        tf.keras.Input(shape=(32, 32, 3), dtype=tf.float32),
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
+def test_augment_build_with_contrast_factor_return_tensor():
+    block = blocks.ImageAugmentation(contrast_factor=0.1)
+
+    outputs = block.build(
+        kerastuner.HyperParameters(),
+        tf.keras.Input(shape=(32, 32, 3), dtype=tf.float32),
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
 def test_augment_deserialize_to_augment():
     serialized_block = blocks.serialize(blocks.ImageAugmentation())
 
@@ -61,6 +121,17 @@ def test_ngram_build_return_tensor():
     assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
 
 
+def test_ngram_build_with_ngrams_return_tensor():
+    block = blocks.TextToNgramVector(ngrams=2)
+
+    outputs = block.build(
+        kerastuner.HyperParameters(), tf.keras.Input(shape=(1,), dtype=tf.string)
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
 def test_ngram_deserialize_to_ngram():
     serialized_block = blocks.serialize(blocks.TextToNgramVector())
 
@@ -90,6 +161,17 @@ def test_int_seq_build_return_tensor():
     assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
 
 
+def test_int_seq_build_with_seq_len_return_tensor():
+    block = blocks.TextToIntSequence(output_sequence_length=50)
+
+    outputs = block.build(
+        kerastuner.HyperParameters(), tf.keras.Input(shape=(1,), dtype=tf.string)
+    )
+
+    assert len(nest.flatten(outputs)) == 1
+    assert isinstance(nest.flatten(outputs)[0], tf.Tensor)
+
+
 def test_int_seq_deserialize_to_int_seq():
     serialized_block = blocks.serialize(blocks.TextToIntSequence())
 

diff --git a/tests/autokeras/tasks/structured_data_test.py b/tests/autokeras/tasks/structured_data_test.py
@@ -33,6 +33,40 @@ def test_raise_error_unknown_str_in_col_type(tmp_path):
     assert 'Column_types should be either "categorical"' in str(info.value)
 
 
+def test_raise_error_unknown_name_in_col_type(tmp_path):
+    with pytest.raises(ValueError) as info:
+        ak.StructuredDataClassifier(
+            column_types={"age": "numerical", "parch": "categorical"},
+            column_names=["age", "fare"],
+            directory=tmp_path,
+            seed=utils.SEED,
+        )
+
+    assert "Column_names and column_types are mismatched" in str(info.value)
+
+
+@mock.patch("autokeras.AutoModel.fit")
+@mock.patch("autokeras.AutoModel.evaluate")
+def test_structured_clf_evaluate_call_automodel_evaluate(evaluate, fit, tmp_path):
+    auto_model = ak.StructuredDataClassifier(directory=tmp_path, seed=utils.SEED)
+
+    auto_model.fit(x=utils.TRAIN_CSV_PATH, y="survived")
+    auto_model.evaluate(x=utils.TRAIN_CSV_PATH, y="survived")
+
+    assert evaluate.is_called
+
+
+@mock.patch("autokeras.AutoModel.fit")
+@mock.patch("autokeras.AutoModel.predict")
+def test_structured_clf_predict_csv_call_automodel_predict(predict, fit, tmp_path):
+    auto_model = ak.StructuredDataClassifier(directory=tmp_path, seed=utils.SEED)
+
+    auto_model.fit(x=utils.TRAIN_CSV_PATH, y="survived")
+    auto_model.predict(x=utils.TEST_CSV_PATH)
+
+    assert predict.is_called
+
+
 @mock.patch("autokeras.AutoModel.fit")
 def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path):
     auto_model = ak.StructuredDataClassifier(directory=tmp_path, seed=utils.SEED)

diff --git a/tests/autokeras/tasks/time_series_forecaster_test.py b/tests/autokeras/tasks/time_series_forecaster_test.py
@@ -0,0 +1,59 @@
+# Copyright 2020 The AutoKeras Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import autokeras as ak
+from tests import utils
+
+
+@mock.patch("autokeras.AutoModel.fit")
+@mock.patch("autokeras.AutoModel.evaluate")
+def test_tsf_evaluate_call_automodel_evaluate(evaluate, fit, tmp_path):
+    auto_model = ak.TimeseriesForecaster(
+        lookback=10, directory=tmp_path, seed=utils.SEED
+    )
+
+    auto_model.fit(x=utils.TRAIN_CSV_PATH, y="survived")
+    auto_model.evaluate(x=utils.TRAIN_CSV_PATH, y="survived")
+
+    assert evaluate.is_called
+
+
+@mock.patch("autokeras.AutoModel.fit")
+@mock.patch("autokeras.AutoModel.predict")
+def test_tsf_predict_call_automodel_predict(predict, fit, tmp_path):
+    auto_model = ak.TimeseriesForecaster(
+        lookback=10, directory=tmp_path, seed=utils.SEED
+    )
+
+    auto_model.fit(x=utils.TRAIN_CSV_PATH, y="survived")
+    auto_model.predict(x=utils.TRAIN_CSV_PATH, y="survived")
+
+    assert predict.is_called
+
+
+@mock.patch("autokeras.AutoModel.fit")
+def test_tsf_fit_call_automodel_fit(fit, tmp_path):
+    auto_model = ak.TimeseriesForecaster(
+        lookback=10, directory=tmp_path, seed=utils.SEED
+    )
+
+    auto_model.fit(
+        x=utils.TRAIN_CSV_PATH,
+        y="survived",
+        validation_data=(utils.TRAIN_CSV_PATH, "survived"),
+    )
+
+    assert fit.is_called