Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
vBykoff committed Nov 2, 2022
1 parent 807767e commit 873f0e1
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 48 deletions.
37 changes: 36 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,36 @@
# bykov_vladimir
├── LICENSE\
├── Makefile <- Makefile with commands like `make data` or `make train`\
├── README.md <- The top-level README for developers using this project.\
├── data\\
│   ├── external <- Data from third party sources.\
│   ├── interim <- Intermediate data that has been transformed.\
│   ├── processed <- The final, canonical data sets for modeling.\
│   └── raw <- The original, immutable data dump.\
\
├── docs <- A default Sphinx project; see sphinx-doc.org for details\
\
├── models <- Trained and serialized models, model predictions, or model summaries\
\
├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),\
│ the creator's initials, and a short `-` delimited description, e.g.\
`1.0-jqp-initial-data-exploration`.\
\
├── references <- Data dictionaries, manuals, and all other explanatory materials.\
\
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.\
│   └── figures <- Generated graphics and figures to be used in reporting\
\
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.\
│ generated with `pip freeze > requirements.txt`\
\
├── setup.py <- makes project pip installable (pip install -e .) so src can be imported\
├── ml_example <- Source code for use in this project.\
│   ├── __init__.py <- Makes src a Python module\
│ │\
│   ├── data <- code to download or generate data\
│ │\
│   ├── features <- code to turn raw data into features for modeling\
│ │\
│   ├── models <- code to train models and then use trained models to make\
│ │\
└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io\
6 changes: 3 additions & 3 deletions configs/predict_config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
input_data_path: "../../data/raw/heart_cleveland_upload.csv"
input_model_path: "../../models/model.pkl"
output_predictions_path: "../../data/predictions/predictions.csv"
input_data_path: "./tests/test_data/synthetic_data.csv"
input_model_path: "./models/model.pkl"
output_predictions_path: "./data/predictions/predictions.csv"
feature_params:
categorical_features:
- 'sex'
Expand Down
27 changes: 0 additions & 27 deletions configs/train_config.yaml

This file was deleted.

2 changes: 2 additions & 0 deletions src/model/predict_model_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def run_predict_pipeline(predict_params: PredictParams):
logger.info(f"Setting prediction to {predict_params.output_predictions_path}...")
pd.DataFrame(predictions).to_csv(predict_params.output_predictions_path)

return predictions


@hydra.main(version_base=None, config_path="../../configs/.", config_name="predict_config")
def predict_pipeline(config_params: PredictParams) -> None:
Expand Down
9 changes: 6 additions & 3 deletions src/model/train_model_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import logging
import sys
import hydra
from typing import Dict

import hydra.core
from hydra.core.config_store import ConfigStore


Expand All @@ -25,7 +27,7 @@
logger.propagate = False


def run_training_pipeline(training_params: TrainingParams) -> None:
def run_training_pipeline(training_params: TrainingParams) -> dict[str, float]:
"""Main training pipeline"""

# read data
Expand Down Expand Up @@ -67,9 +69,10 @@ def run_training_pipeline(training_params: TrainingParams) -> None:
# serialize model
logger.info(f"Serializing model to {training_params.output_model_path} ...")
serialize_model(model, training_params.output_model_path)
return metrics


@hydra.main(version_base=None, config_path="../../configs/.", config_name="train_config")
@hydra.main(version_base=None, config_path="../../configs", config_name="train_config_rf")
def train_pipeline(config_params: TrainingParams) -> None:
"""Function to read terminal arguments"""
run_training_pipeline(config_params)
Expand Down
22 changes: 19 additions & 3 deletions tests/features_test/build_features_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
import unittest

from src.features.build_features import Transformer
from src.data.make_dataset import read_data
from src.enities.feature_params import FeatureParams

class MyTestCase(unittest.TestCase):
def test_something(self):
self.assertEqual(True, False) # add assertion here

class BuildFeaturesTest(unittest.TestCase):
def setUp(self) -> None:
self.params = FeatureParams(
categorical_features=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'],
numerical_features=['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],
features_to_drop=[],
target='condition'
)

def test_transformer(self):
data = read_data("./tests/test_data/synthetic_data.csv")
transformer = Transformer(self.params)
transformer.fit(data)
data = transformer.transform()
self.assertEqual(data.shape, (50, 15))


if __name__ == '__main__':
Expand Down
7 changes: 5 additions & 2 deletions tests/make_dataset_test/make_dataset_test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import unittest


class MyTestCase(unittest.TestCase):
def test_something(self):
class MakeDatasetTest(unittest.TestCase):
def test_train_val_split(self):
self.assertEqual(True, False) # add assertion here

def test_read_data(self):
pass


if __name__ == '__main__':
unittest.main()
29 changes: 26 additions & 3 deletions tests/model_test/predict_model_pipeline_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
import unittest
import os

from src.model.predict_model_pipeline import run_predict_pipeline
from src.enities.predict_params import PredictParams
from src.enities.feature_params import FeatureParams

class MyTestCase(unittest.TestCase):
def test_something(self):
self.assertEqual(True, False) # add assertion here

class PredictModelPipeline(unittest.TestCase):

def setUp(self) -> None:

feature_params = FeatureParams(
categorical_features=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'],
numerical_features=['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],
features_to_drop=[],
target='condition'
)
self.predict_params = PredictParams(
input_data_path="tests/test_data/data_for_predict.csv",
input_model_path="models/model_rf.pkl",
output_predictions_path="tests/test_data/predictions.csv",
feature_params=feature_params
)

def train_model_test(self):
predictions = run_predict_pipeline(self.predict_params)
self.assertEqual(predictions.shape, (50, 1))
self.assertTrue(os.path.exists("tests/test_data/predictions.csv"))


if __name__ == '__main__':
Expand Down
18 changes: 15 additions & 3 deletions tests/model_test/predict_model_tests.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
import unittest


class MyTestCase(unittest.TestCase):
def test_something(self):
self.assertEqual(True, False) # add assertion here
class PredictModelTests(unittest.TestCase):
def test_load_model(self):
self.assertEqual(True, False)

def test_serialize_model(self):
pass

def test_evaluate(self):
pass

def test_predict_model(self):
pass

def test_train_model(self):
pass


if __name__ == '__main__':
Expand Down
46 changes: 43 additions & 3 deletions tests/model_test/train_model_pipeline_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,49 @@
import unittest
import os

from src.model.train_model_pipeline import run_training_pipeline
from src.enities.training_params import TrainingParams
from src.enities.splitting_params import SplittingParams
from src.enities.model_params import ModelParams
from src.enities.feature_params import FeatureParams

class MyTestCase(unittest.TestCase):
def test_something(self):
self.assertEqual(True, False) # add assertion here

class TrainModelPipelineTests(unittest.TestCase):

def setUp(self) -> None:
split_params = SplittingParams(
val_size=0.2,
random_state=42
)
model_params = ModelParams(
model="RF",
random_state=42,
n_estimators=100
)
feature_params = FeatureParams(
categorical_features=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'],
numerical_features=['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],
features_to_drop=[],
target='condition'
)

self.train_params = TrainingParams(
input_data_path="tests/test_data/synthetic_data.csv",
output_model_path="",
splitting_params=split_params,
model_params=model_params,
feature_params=feature_params
)

def train_model_test(self):
metrics = run_training_pipeline(self.train_params)

self.assertTrue(metrics["accuracy"] > 0)
self.assertTrue(metrics["recall"] > 0)
self.assertTrue(metrics["f1"] > 0)
self.assertTrue(metrics["precision"] > 0)

self.assertTrue(os.path.exists("./models/model.pkl"))


if __name__ == '__main__':
Expand Down

0 comments on commit 873f0e1

Please sign in to comment.