fix: Update example test files (#216)

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>
kedro-org · Apr 17, 2024 · 580f7f0 · 580f7f0
1 parent 835c559
commit 580f7f0
Show file tree

Hide file tree

Showing 10 changed files with 220 additions and 80 deletions.
diff --git a/astro-airflow-iris/{{ cookiecutter.repo_name }}/tests/test_run.py b/astro-airflow-iris/{{ cookiecutter.repo_name }}/tests/test_run.py
@@ -4,8 +4,6 @@
 Tests should be placed in ``src/tests``, in modules that mirror your
 project's structure, and in files named test_*.py. They are simply functions
 named ``test_*`` which test a unit of logic.
-
-To run the tests, run ``kedro test`` from the project root directory.
 """
 from pathlib import Path
 

diff --git a/databricks-iris/{{ cookiecutter.repo_name }}/tests/test_run.py b/databricks-iris/{{ cookiecutter.repo_name }}/tests/test_run.py
@@ -4,8 +4,6 @@
 Tests should be placed in ``src/tests``, in modules that mirror your
 project's structure, and in files named test_*.py. They are simply functions
 named ``test_*`` which test a unit of logic.
-
-To run the tests, run ``kedro test`` from the project root directory.
 """
 
 from pathlib import Path

diff --git a/...hts-pandas-viz/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py b/...hts-pandas-viz/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py
@@ -1,29 +1,67 @@
+import logging
 import pandas as pd
 import pytest
+from kedro.io import DataCatalog
+from kedro.runner import SequentialRunner
+from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
 from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data
 
-
 @pytest.fixture
 def dummy_data():
     return pd.DataFrame(
-        {"engines": [1, 2, 3],
-         "crew": [4, 5, 6],
-         "passenger_capacity": [5, 6, 7],
-         "price": [120, 290, 30]})
+        {
+            "engines": [1, 2, 3],
+            "crew": [4, 5, 6],
+            "passenger_capacity": [5, 6, 7],
+            "price": [120, 290, 30],
+        }
+    )
 
 @pytest.fixture
 def dummy_parameters():
-    parameters = {"model_options":
-                     {"test_size": 0.2,
-                      "random_state": 3,
-                      "features": ["engines", "passenger_capacity", "crew"]}
-                 }
+    parameters = {
+        "model_options": {
+            "test_size": 0.2,
+            "random_state": 3,
+            "features": ["engines", "passenger_capacity", "crew"],
+        }
+    }
     return parameters
 
-class TestDataScienceNodes:
-    def test_split_data(self, dummy_data, dummy_parameters):
-        X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
-        assert len(X_train) == 2  # noqa: PLR2004
-        assert len(y_train) == 2  # noqa: PLR2004
-        assert len(X_test) == 1
-        assert len(y_test) == 1
+
+def test_split_data(dummy_data, dummy_parameters):
+    X_train, X_test, y_train, y_test = split_data(
+        dummy_data, dummy_parameters["model_options"]
+    )
+    assert len(X_train) == 2
+    assert len(y_train) == 2
+    assert len(X_test) == 1
+    assert len(y_test) == 1
+
+def test_split_data_missing_price(dummy_data, dummy_parameters):
+    dummy_data_missing_price = dummy_data.drop(columns="price")
+    with pytest.raises(KeyError) as e_info:
+        X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])
+
+    assert "price" in str(e_info.value)
+
+def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
+    pipeline = (
+        create_ds_pipeline()
+        .from_nodes("split_data_node")
+        .to_nodes("evaluate_model_node")
+    )
+    catalog = DataCatalog()
+    catalog.add_feed_dict(
+        {
+            "model_input_table" : dummy_data,
+            "params:model_options": dummy_parameters["model_options"],
+        }
+    )
+
+    caplog.set_level(logging.DEBUG, logger="kedro")
+    successful_run_msg = "Pipeline execution completed successfully."
+
+    SequentialRunner().run(pipeline, catalog)
+
+    assert successful_run_msg in caplog.text
diff --git a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/tests/test_run.py b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/tests/test_run.py
@@ -4,8 +4,6 @@
 Tests should be placed in ``src/tests``, in modules that mirror your
 project's structure, and in files named test_*.py. They are simply functions
 named ``test_*`` which test a unit of logic.
-
-To run the tests, run ``kedro test`` from the project root directory.
 """
 from pathlib import Path
 

diff --git a/...flights-pandas/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py b/...flights-pandas/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py
@@ -1,29 +1,67 @@
+import logging
 import pandas as pd
 import pytest
+from kedro.io import DataCatalog
+from kedro.runner import SequentialRunner
+from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
 from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data
 
-
 @pytest.fixture
 def dummy_data():
     return pd.DataFrame(
-        {"engines": [1, 2, 3],
-         "crew": [4, 5, 6],
-         "passenger_capacity": [5, 6, 7],
-         "price": [120, 290, 30]})
+        {
+            "engines": [1, 2, 3],
+            "crew": [4, 5, 6],
+            "passenger_capacity": [5, 6, 7],
+            "price": [120, 290, 30],
+        }
+    )
 
 @pytest.fixture
 def dummy_parameters():
-    parameters = {"model_options":
-                     {"test_size": 0.2,
-                      "random_state": 3,
-                      "features": ["engines", "passenger_capacity", "crew"]}
-                 }
+    parameters = {
+        "model_options": {
+            "test_size": 0.2,
+            "random_state": 3,
+            "features": ["engines", "passenger_capacity", "crew"],
+        }
+    }
     return parameters
 
-class TestDataScienceNodes:
-    def test_split_data(self, dummy_data, dummy_parameters):
-        X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
-        assert len(X_train) == 2  # noqa: PLR2004
-        assert len(y_train) == 2  # noqa: PLR2004
-        assert len(X_test) == 1
-        assert len(y_test) == 1
+
+def test_split_data(dummy_data, dummy_parameters):
+    X_train, X_test, y_train, y_test = split_data(
+        dummy_data, dummy_parameters["model_options"]
+    )
+    assert len(X_train) == 2
+    assert len(y_train) == 2
+    assert len(X_test) == 1
+    assert len(y_test) == 1
+
+def test_split_data_missing_price(dummy_data, dummy_parameters):
+    dummy_data_missing_price = dummy_data.drop(columns="price")
+    with pytest.raises(KeyError) as e_info:
+        X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])
+
+    assert "price" in str(e_info.value)
+
+def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
+    pipeline = (
+        create_ds_pipeline()
+        .from_nodes("split_data_node")
+        .to_nodes("evaluate_model_node")
+    )
+    catalog = DataCatalog()
+    catalog.add_feed_dict(
+        {
+            "model_input_table" : dummy_data,
+            "params:model_options": dummy_parameters["model_options"],
+        }
+    )
+
+    caplog.set_level(logging.DEBUG, logger="kedro")
+    successful_run_msg = "Pipeline execution completed successfully."
+
+    SequentialRunner().run(pipeline, catalog)
+
+    assert successful_run_msg in caplog.text
diff --git a/spaceflights-pandas/{{ cookiecutter.repo_name }}/tests/test_run.py b/spaceflights-pandas/{{ cookiecutter.repo_name }}/tests/test_run.py
@@ -4,8 +4,6 @@
 Tests should be placed in ``src/tests``, in modules that mirror your
 project's structure, and in files named test_*.py. They are simply functions
 named ``test_*`` which test a unit of logic.
-
-To run the tests, run ``kedro test`` from the project root directory.
 """
 from pathlib import Path
 

diff --git a/...ts-pyspark-viz/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py b/...ts-pyspark-viz/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py
@@ -1,29 +1,67 @@
+import logging
 import pandas as pd
 import pytest
+from kedro.io import DataCatalog
+from kedro.runner import SequentialRunner
+from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
 from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data
 
-
 @pytest.fixture
 def dummy_data():
     return pd.DataFrame(
-        {"engines": [1, 2, 3],
-         "crew": [4, 5, 6],
-         "passenger_capacity": [5, 6, 7],
-         "price": [120, 290, 30]})
+        {
+            "engines": [1, 2, 3],
+            "crew": [4, 5, 6],
+            "passenger_capacity": [5, 6, 7],
+            "price": [120, 290, 30],
+        }
+    )
 
 @pytest.fixture
 def dummy_parameters():
-    parameters = {"model_options":
-                     {"test_size": 0.2,
-                      "random_state": 3,
-                      "features": ["engines", "passenger_capacity", "crew"]}
-                 }
+    parameters = {
+        "model_options": {
+            "test_size": 0.2,
+            "random_state": 3,
+            "features": ["engines", "passenger_capacity", "crew"],
+        }
+    }
     return parameters
 
-class TestDataScienceNodes:
-    def test_split_data(self, dummy_data, dummy_parameters):
-        X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
-        assert len(X_train) == 2  # noqa: PLR2004
-        assert len(y_train) == 2  # noqa: PLR2004
-        assert len(X_test) == 1
-        assert len(y_test) == 1
+
+def test_split_data(dummy_data, dummy_parameters):
+    X_train, X_test, y_train, y_test = split_data(
+        dummy_data, dummy_parameters["model_options"]
+    )
+    assert len(X_train) == 2
+    assert len(y_train) == 2
+    assert len(X_test) == 1
+    assert len(y_test) == 1
+
+def test_split_data_missing_price(dummy_data, dummy_parameters):
+    dummy_data_missing_price = dummy_data.drop(columns="price")
+    with pytest.raises(KeyError) as e_info:
+        X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])
+
+    assert "price" in str(e_info.value)
+
+def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
+    pipeline = (
+        create_ds_pipeline()
+        .from_nodes("split_data_node")
+        .to_nodes("evaluate_model_node")
+    )
+    catalog = DataCatalog()
+    catalog.add_feed_dict(
+        {
+            "model_input_table" : dummy_data,
+            "params:model_options": dummy_parameters["model_options"],
+        }
+    )
+
+    caplog.set_level(logging.DEBUG, logger="kedro")
+    successful_run_msg = "Pipeline execution completed successfully."
+
+    SequentialRunner().run(pipeline, catalog)
+
+    assert successful_run_msg in caplog.text
diff --git a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/tests/test_run.py b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/tests/test_run.py
@@ -4,8 +4,6 @@
 Tests should be placed in ``src/tests``, in modules that mirror your
 project's structure, and in files named test_*.py. They are simply functions
 named ``test_*`` which test a unit of logic.
-
-To run the tests, run ``kedro test`` from the project root directory.
 """
 from pathlib import Path
 

diff --git a/...lights-pyspark/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py b/...lights-pyspark/{{ cookiecutter.repo_name }}/tests/pipelines/data_science/test_pipeline.py
@@ -1,29 +1,67 @@
+import logging
 import pandas as pd
 import pytest
+from kedro.io import DataCatalog
+from kedro.runner import SequentialRunner
+from {{ cookiecutter.python_package }}.pipelines.data_science import create_pipeline as create_ds_pipeline
 from {{ cookiecutter.python_package }}.pipelines.data_science.nodes import split_data
 
-
 @pytest.fixture
 def dummy_data():
     return pd.DataFrame(
-        {"engines": [1, 2, 3],
-         "crew": [4, 5, 6],
-         "passenger_capacity": [5, 6, 7],
-         "price": [120, 290, 30]})
+        {
+            "engines": [1, 2, 3],
+            "crew": [4, 5, 6],
+            "passenger_capacity": [5, 6, 7],
+            "price": [120, 290, 30],
+        }
+    )
 
 @pytest.fixture
 def dummy_parameters():
-    parameters = {"model_options":
-                     {"test_size": 0.2,
-                      "random_state": 3,
-                      "features": ["engines", "passenger_capacity", "crew"]}
-                 }
+    parameters = {
+        "model_options": {
+            "test_size": 0.2,
+            "random_state": 3,
+            "features": ["engines", "passenger_capacity", "crew"],
+        }
+    }
     return parameters
 
-class TestDataScienceNodes:
-    def test_split_data(self, dummy_data, dummy_parameters):
-        X_train, X_test, y_train, y_test = split_data(dummy_data, dummy_parameters["model_options"])
-        assert len(X_train) == 2  # noqa: PLR2004
-        assert len(y_train) == 2  # noqa: PLR2004
-        assert len(X_test) == 1
-        assert len(y_test) == 1
+
+def test_split_data(dummy_data, dummy_parameters):
+    X_train, X_test, y_train, y_test = split_data(
+        dummy_data, dummy_parameters["model_options"]
+    )
+    assert len(X_train) == 2
+    assert len(y_train) == 2
+    assert len(X_test) == 1
+    assert len(y_test) == 1
+
+def test_split_data_missing_price(dummy_data, dummy_parameters):
+    dummy_data_missing_price = dummy_data.drop(columns="price")
+    with pytest.raises(KeyError) as e_info:
+        X_train, X_test, y_train, y_test = split_data(dummy_data_missing_price, dummy_parameters["model_options"])
+
+    assert "price" in str(e_info.value)
+
+def test_data_science_pipeline(caplog, dummy_data, dummy_parameters):
+    pipeline = (
+        create_ds_pipeline()
+        .from_nodes("split_data_node")
+        .to_nodes("evaluate_model_node")
+    )
+    catalog = DataCatalog()
+    catalog.add_feed_dict(
+        {
+            "model_input_table" : dummy_data,
+            "params:model_options": dummy_parameters["model_options"],
+        }
+    )
+
+    caplog.set_level(logging.DEBUG, logger="kedro")
+    successful_run_msg = "Pipeline execution completed successfully."
+
+    SequentialRunner().run(pipeline, catalog)
+
+    assert successful_run_msg in caplog.text
diff --git a/spaceflights-pyspark/{{ cookiecutter.repo_name }}/tests/test_run.py b/spaceflights-pyspark/{{ cookiecutter.repo_name }}/tests/test_run.py
@@ -4,8 +4,6 @@
 Tests should be placed in ``src/tests``, in modules that mirror your
 project's structure, and in files named test_*.py. They are simply functions
 named ``test_*`` which test a unit of logic.
-
-To run the tests, run ``kedro test`` from the project root directory.
 """
 from pathlib import Path