add tests

made-mlops-2022 · Nov 2, 2022 · 873f0e1 · 873f0e1
1 parent 807767e
commit 873f0e1
Show file tree

Hide file tree

Showing 10 changed files with 155 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,36 @@
-# bykov_vladimir
+├── LICENSE\
+├── Makefile           <- Makefile with commands like `make data` or `make train`\
+├── README.md          <- The top-level README for developers using this project.\
+├── data\\
+│   ├── external       <- Data from third party sources.\
+│   ├── interim        <- Intermediate data that has been transformed.\
+│   ├── processed      <- The final, canonical data sets for modeling.\
+│   └── raw            <- The original, immutable data dump.\
+│\
+├── docs               <- A default Sphinx project; see sphinx-doc.org for details\
+│\
+├── models             <- Trained and serialized models, model predictions, or model summaries\
+│\
+├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering),\
+│                         the creator's initials, and a short `-` delimited description, e.g.\
+│                         `1.0-jqp-initial-data-exploration`.\
+│\
+├── references         <- Data dictionaries, manuals, and all other explanatory materials.\
+│\
+├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.\
+│   └── figures        <- Generated graphics and figures to be used in reporting\
+│\
+├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.\
+│                         generated with `pip freeze > requirements.txt`\
+│\
+├── setup.py           <- makes project pip installable (pip install -e .) so src can be imported\
+├── ml_example                <- Source code for use in this project.\
+│   ├── __init__.py    <- Makes src a Python module\
+│   │\
+│   ├── data           <- code to download or generate data\
+│   │\
+│   ├── features       <- code to turn raw data into features for modeling\
+│   │\
+│   ├── models         <- code to train models and then use trained models to make\
+│   │\
+└── tox.ini            <- tox file with settings for running tox; see tox.readthedocs.io\
diff --git a/configs/predict_config.yaml b/configs/predict_config.yaml
@@ -1,6 +1,6 @@
-input_data_path: "../../data/raw/heart_cleveland_upload.csv"
-input_model_path: "../../models/model.pkl"
-output_predictions_path: "../../data/predictions/predictions.csv"
+input_data_path: "./tests/test_data/synthetic_data.csv"
+input_model_path: "./models/model.pkl"
+output_predictions_path: "./data/predictions/predictions.csv"
 feature_params:
   categorical_features:
     - 'sex'

diff --git a/configs/train_config.yaml b/configs/train_config.yaml
diff --git a/src/model/predict_model_pipeline.py b/src/model/predict_model_pipeline.py
@@ -50,6 +50,8 @@ def run_predict_pipeline(predict_params: PredictParams):
     logger.info(f"Setting prediction to {predict_params.output_predictions_path}...")
     pd.DataFrame(predictions).to_csv(predict_params.output_predictions_path)
 
+    return predictions
+
 
 @hydra.main(version_base=None, config_path="../../configs/.", config_name="predict_config")
 def predict_pipeline(config_params: PredictParams) -> None:

diff --git a/src/model/train_model_pipeline.py b/src/model/train_model_pipeline.py
@@ -2,7 +2,9 @@
 
 import logging
 import sys
-import hydra
+from typing import Dict
+
+import hydra.core
 from hydra.core.config_store import ConfigStore
 
 
@@ -25,7 +27,7 @@
 logger.propagate = False
 
 
-def run_training_pipeline(training_params: TrainingParams) -> None:
+def run_training_pipeline(training_params: TrainingParams) -> dict[str, float]:
     """Main training pipeline"""
 
     # read data
@@ -67,9 +69,10 @@ def run_training_pipeline(training_params: TrainingParams) -> None:
     # serialize model
     logger.info(f"Serializing model to {training_params.output_model_path} ...")
     serialize_model(model, training_params.output_model_path)
+    return metrics
 
 
-@hydra.main(version_base=None, config_path="../../configs/.", config_name="train_config")
+@hydra.main(version_base=None, config_path="../../configs", config_name="train_config_rf")
 def train_pipeline(config_params: TrainingParams) -> None:
     """Function to read terminal arguments"""
     run_training_pipeline(config_params)

diff --git a/tests/features_test/build_features_test.py b/tests/features_test/build_features_test.py
@@ -1,9 +1,25 @@
 import unittest
 
+from src.features.build_features import Transformer
+from src.data.make_dataset import read_data
+from src.enities.feature_params import FeatureParams
 
-class MyTestCase(unittest.TestCase):
-    def test_something(self):
-        self.assertEqual(True, False)  # add assertion here
+
+class BuildFeaturesTest(unittest.TestCase):
+    def setUp(self) -> None:
+        self.params = FeatureParams(
+            categorical_features=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'],
+            numerical_features=['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],
+            features_to_drop=[],
+            target='condition'
+            )
+
+    def test_transformer(self):
+        data = read_data("./tests/test_data/synthetic_data.csv")
+        transformer = Transformer(self.params)
+        transformer.fit(data)
+        data = transformer.transform()
+        self.assertEqual(data.shape, (50, 15))
 
 
 if __name__ == '__main__':

diff --git a/tests/make_dataset_test/make_dataset_test.py b/tests/make_dataset_test/make_dataset_test.py
@@ -1,10 +1,13 @@
 import unittest
 
 
-class MyTestCase(unittest.TestCase):
-    def test_something(self):
+class MakeDatasetTest(unittest.TestCase):
+    def test_train_val_split(self):
         self.assertEqual(True, False)  # add assertion here
 
+    def test_read_data(self):
+        pass
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/model_test/predict_model_pipeline_test.py b/tests/model_test/predict_model_pipeline_test.py
@@ -1,9 +1,32 @@
 import unittest
+import os
 
+from src.model.predict_model_pipeline import run_predict_pipeline
+from src.enities.predict_params import PredictParams
+from src.enities.feature_params import FeatureParams
 
-class MyTestCase(unittest.TestCase):
-    def test_something(self):
-        self.assertEqual(True, False)  # add assertion here
+
+class PredictModelPipeline(unittest.TestCase):
+
+    def setUp(self) -> None:
+
+        feature_params = FeatureParams(
+            categorical_features=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'],
+            numerical_features=['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],
+            features_to_drop=[],
+            target='condition'
+        )
+        self.predict_params = PredictParams(
+            input_data_path="tests/test_data/data_for_predict.csv",
+            input_model_path="models/model_rf.pkl",
+            output_predictions_path="tests/test_data/predictions.csv",
+            feature_params=feature_params
+        )
+
+    def train_model_test(self):
+        predictions = run_predict_pipeline(self.predict_params)
+        self.assertEqual(predictions.shape, (50, 1))
+        self.assertTrue(os.path.exists("tests/test_data/predictions.csv"))
 
 
 if __name__ == '__main__':

diff --git a/tests/model_test/predict_model_tests.py b/tests/model_test/predict_model_tests.py
@@ -1,9 +1,21 @@
 import unittest
 
 
-class MyTestCase(unittest.TestCase):
-    def test_something(self):
-        self.assertEqual(True, False)  # add assertion here
+class PredictModelTests(unittest.TestCase):
+    def test_load_model(self):
+        self.assertEqual(True, False)
+
+    def test_serialize_model(self):
+        pass
+
+    def test_evaluate(self):
+        pass
+
+    def test_predict_model(self):
+        pass
+
+    def test_train_model(self):
+        pass
 
 
 if __name__ == '__main__':

diff --git a/tests/model_test/train_model_pipeline_test.py b/tests/model_test/train_model_pipeline_test.py
@@ -1,9 +1,49 @@
 import unittest
+import os
 
+from src.model.train_model_pipeline import run_training_pipeline
+from src.enities.training_params import TrainingParams
+from src.enities.splitting_params import SplittingParams
+from src.enities.model_params import ModelParams
+from src.enities.feature_params import FeatureParams
 
-class MyTestCase(unittest.TestCase):
-    def test_something(self):
-        self.assertEqual(True, False)  # add assertion here
+
+class TrainModelPipelineTests(unittest.TestCase):
+
+    def setUp(self) -> None:
+        split_params = SplittingParams(
+            val_size=0.2,
+            random_state=42
+        )
+        model_params = ModelParams(
+            model="RF",
+            random_state=42,
+            n_estimators=100
+        )
+        feature_params = FeatureParams(
+            categorical_features=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'],
+            numerical_features=['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],
+            features_to_drop=[],
+            target='condition'
+        )
+
+        self.train_params = TrainingParams(
+            input_data_path="tests/test_data/synthetic_data.csv",
+            output_model_path="",
+            splitting_params=split_params,
+            model_params=model_params,
+            feature_params=feature_params
+        )
+
+    def train_model_test(self):
+        metrics = run_training_pipeline(self.train_params)
+
+        self.assertTrue(metrics["accuracy"] > 0)
+        self.assertTrue(metrics["recall"] > 0)
+        self.assertTrue(metrics["f1"] > 0)
+        self.assertTrue(metrics["precision"] > 0)
+
+        self.assertTrue(os.path.exists("./models/model.pkl"))
 
 
 if __name__ == '__main__':