In [None]:
import os

In [None]:
%pwd

In [None]:
# Change to the main directory
# So, it's executed from main directory
os.chdir("../")

In [None]:
with open('.env') as f:
    os.environ.update(
        line.strip().split('=') for line in f
)

In [None]:
%pwd

### Unit Testing Config

This code will be apply in `src/MarketplaceReviews/entity/config_entity.py`.

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class UnitTestConfig:
    root_dir: Path
    mlflow_tracking_uri: str
    mlflow_model_name: str
    mlflow_deploy_model_alias: str
    mlflow_input_example_path: Path
    app_endpoint: str
    

### Unit Testing Config Manager

This code will be apply in `src/MarketplaceReviews/config/configurations.py`.

In [None]:
from MarketplaceReviews.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from MarketplaceReviews.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_unit_test_config(self) -> UnitTestConfig:
        """read training evaluation config file and store as 
        config entity then apply the dataclasses
        
        Returns:
            config: UnitTestConfig type
        """
        predict_config = self.config.predict
        unit_test_config = self.config.unit_test

        create_directories([unit_test_config.root_dir])

        config = UnitTestConfig(
            root_dir=unit_test_config.root_dir,
            mlflow_tracking_uri=os.environ["MLFLOW_TRACKING_URI"],
            mlflow_model_name=predict_config.mlflow_model_name,
            mlflow_deploy_model_alias=os.environ["MLFLOW_DEPLOY_MODEL_ALIAS"],
            mlflow_input_example_path=unit_test_config.mlflow_input_example_path,
            app_endpoint=os.environ["APP_ENDPOINT"]
        )

        return config

In [None]:
from mlflow.artifacts import download_artifacts
from mlflow import MlflowClient
from mlflow import pyfunc

---

**Debug**: Explain when doing the preparation test in the notebook with MLflow like load input example and etc.

In [None]:
config = ConfigurationManager()
unit_test_config = config.get_unit_test_config()

Select the deployed model from MLflow.

In [None]:
client = MlflowClient(tracking_uri=unit_test_config.mlflow_tracking_uri)
selected_model = client.get_model_version_by_alias(
    unit_test_config.mlflow_model_name, 
    unit_test_config.mlflow_deploy_model_alias
)

selected_model.source

In [None]:
loaded_model = pyfunc.load_model(model_uri=selected_model.source)
loaded_model

Get the model `run_id`.

In [None]:
selected_run_id = selected_model.run_id
selected_run_id

In [None]:
download_artifacts(
    run_id=selected_run_id,
    artifact_path=unit_test_config.mlflow_input_example_path,
    dst_path=unit_test_config.root_dir
)

In [None]:
import json

f = open(f"{unit_test_config.root_dir}/{unit_test_config.mlflow_input_example_path}")
input_example = json.load(f)
input_example

We use the input data from MLflow input examples and try to match with the MLflow input example format.

In [None]:
request_body = {
    input_example["columns"][0]: input_example['data'][0][0]
}

request_body

Test the `app.py` with http request with MLflow input data example.

In [None]:
import requests

result = requests.post(url=unit_test_config.app_endpoint, json=request_body)
y_predict = result.json()

In [None]:
len(y_predict)

In [None]:
y_predict

In [None]:
type(y_predict)

---

### Unit Testing

This code in `src/MarketplaceReviews/components/unit_testing.py`.

In [None]:
import json
import requests

from MarketplaceReviews import logger

class UnitTesting:
    def __init__(self, config: UnitTestConfig):
        self.config = config
        self.req_body_key = None
        self.req_body = None
    
    def set_request_body(self) -> None:
        """predict the data with linear regression model

        Raises:
            client_error: error when access mlflow to get deployed model
            download_error: error when download vectorizer from mlflow artifact
        """
        try:
            logger.info("Set MLflow Client.")
            client = MlflowClient(tracking_uri=self.config.mlflow_tracking_uri)
            selected_model = client.get_model_version_by_alias(
                self.config.mlflow_model_name, 
                self.config.mlflow_deploy_model_alias
            )
            
            logger.info("Get the deployed model run id.")
            selected_run_id = selected_model.run_id
        except Exception as client_error:
            logger.error(client_error)
            raise client_error

        try:
            logger.info("Downloading vectorizer from MLflow's artifacts.")
            download_artifacts(
                run_id=selected_run_id,
                artifact_path=self.config.mlflow_input_example_path,
                dst_path=self.config.root_dir
            )
        except Exception as download_error:
            logger.error(download_error)
            raise download_error
        
        logger.info("Open MLflow input example.")
        f = open(f"{self.config.root_dir}/{self.config.mlflow_input_example_path}")
        input_example = json.load(f)

        # handle mlflow input example data
        data_key = input_example["columns"][0]
        data_val = input_example['data'][0][0]

        # request params
        self.req_body_key = data_key
        self.req_body = {
            data_key: data_val
        }
        
    def get_request_body_value(self) -> list:
        """get the request body data

        Returns:
            req_body: list type
        """
        logger.info("Get MLflow input example value.")
        req_body_value = self.req_body[self.req_body_key]
        return req_body_value
    
    def get_output_length(self):
        """get the output length of the predict result

        Returns:
            len_result: list type
        """
        logger.info("Get predicted result length.")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        len_result = len(result.json())
        return len_result

    def is_output_type_list(self) -> bool:
        """check if the output file is list data type

        Returns:
            is_list: bool type
        """
        logger.info("Check is the predicted output is list.")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        is_list = type(result.json()) is list
        return is_list

    def is_output_type_consistent(self) -> bool:
        """check if the output file have consistent
        data type inside a list

        Returns:
            bool type
        """
        logger.info("Check is each predicted output is integer")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        for result in result.json():
            if type(result) is not int:
                return False
        return True

### Run Testing

**Debug**: Simulate the unit testing without library.

In [None]:
try:
    config = ConfigurationManager()
    unit_testing_config = config.get_unit_test_config()
    unit_test = UnitTesting(config=unit_testing_config)
    unit_test.set_request_body()
    
    print("Review Contents: ")
    for content in unit_test.get_request_body_value():
        print(content)
    
    print("\nBegin tests:")
    print(f"Is same size: {unit_test.get_output_length() == len(unit_test.get_request_body_value())}")
    print(f"Is the output is list: {unit_test.is_output_type_list() == True}")
    print(f"Is the output consistent: {unit_test.is_output_type_consistent() == True}")
except Exception as e:
    logger.error(e)
    raise e