In [1]:
import os

In [2]:
%pwd

'/home/fachruzaini/lazada-id-reviews/notebooks'

In [3]:
# Change to the main directory
# So, it's executed from main directory
os.chdir("../")

In [4]:
with open('.env') as f:
    os.environ.update(
        line.strip().split('=') for line in f
)

In [5]:
%pwd

'/home/fachruzaini/lazada-id-reviews'

### Unit Testing Config

This code will be apply in `src/LadazaIDReview/entity/config_entity.py`.

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class UnitTestConfig:
    root_dir: Path
    mlflow_tracking_uri: str
    mlflow_model_name: str
    mlflow_deploy_model_alias: str
    mlflow_input_example_path: Path
    app_endpoint: str
    

### Unit Testing Config Manager

This code will be apply in `src/LazadaIDReview/config/configurations.py`.

In [7]:
from LazadaIDReviews.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from LazadaIDReviews.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_unit_test_config(self) -> UnitTestConfig:
        """read training evaluation config file and store as 
        config entity then apply the dataclasses
        
        Returns:
            config: UnitTestConfig type
        """
        predict_config = self.config.predict
        unit_test_config = self.config.unit_test

        create_directories([unit_test_config.root_dir])

        config = UnitTestConfig(
            root_dir=unit_test_config.root_dir,
            mlflow_tracking_uri=os.environ["MLFLOW_TRACKING_URI"],
            mlflow_model_name=predict_config.mlflow_model_name,
            mlflow_deploy_model_alias=os.environ["MLFLOW_DEPLOY_MODEL_ALIAS"],
            mlflow_input_example_path=unit_test_config.mlflow_input_example_path,
            app_endpoint=os.environ["APP_ENDPOINT"]
        )

        return config

In [9]:
from mlflow.artifacts import download_artifacts
from mlflow import MlflowClient
from mlflow import pyfunc

  import pkg_resources  # noqa: TID251


---

**Debug**: Explain when doing the preparation test in the notebook with MLflow like load input example and etc.

In [10]:
config = ConfigurationManager()
unit_test_config = config.get_unit_test_config()

[2025-07-02 16:38:18,025: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-07-02 16:38:18,031: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2025-07-02 16:38:18,032: INFO: common: created directory at: artifacts]
[2025-07-02 16:38:18,035: INFO: common: created directory at: artifacts/test]


Select the deployed model from MLflow.

In [11]:
client = MlflowClient(tracking_uri=unit_test_config.mlflow_tracking_uri)
selected_model = client.get_model_version_by_alias(
    unit_test_config.mlflow_model_name, 
    unit_test_config.mlflow_deploy_model_alias
)

selected_model.source

'mlflow-artifacts:/1/c9d5e14ac4384fc7a3fba18602467c69/artifacts/models'

In [12]:
loaded_model = pyfunc.load_model(model_uri=selected_model.source)
loaded_model

Downloading artifacts:   0%|          | 0/10 [00:00<?, ?it/s]

mlflow.pyfunc.loaded_model:
  artifact_path: models
  flavor: mlflow.sklearn
  run_id: c9d5e14ac4384fc7a3fba18602467c69

Get the model `run_id`.

In [13]:
selected_run_id = selected_model.run_id
selected_run_id

'c9d5e14ac4384fc7a3fba18602467c69'

In [14]:
download_artifacts(
    run_id=selected_run_id,
    artifact_path=unit_test_config.mlflow_input_example_path,
    dst_path=unit_test_config.root_dir
)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/home/fachruzaini/lazada-id-reviews/artifacts/test/models/input_example.json'

In [15]:
import json

f = open(f"{unit_test_config.root_dir}/{unit_test_config.mlflow_input_example_path}")
input_example = json.load(f)
input_example

{'columns': ['reviewContents'],
 'data': [[['barang sudah di terima dengan baik, dah di coba Oke.terima kasih Ladaza',
    'sangat puas trimakasih lazada',
    'D pasang ke SS J2 nggak bisa. Apa kartuny yg rusak?',
    'rekomendet bgt lahh barang bagus cpt bgt sampe y padahal tmpat aq desa plosok bgt tpi cuman 2hri dah sampe',
    'Film ato lagu2 saat d putar dri plashdisk error..smuanya filmku error gx bsa d putar😭😭😭',
    'bagus',
    'Toshiba 1 TB Hitam dengan banyak GRATIS Usb TOSHIBA 32GB + Pouch Harddisk & Usb OTG Reader Android NICE !!!!',
    'Produk original dan awet',
    'Barang bagus mulus wlau hnya paking buble warp, kulitas lumayan lh dgn harga sgitu.. Rekomended bgt ni brg..',
    'mantabb, barang bagus sesuai pesanan dan datangnya cepat 👍👍']]]}

We use the input data from MLflow input examples and try to match with the MLflow input example format.

In [16]:
request_body = {
    input_example["columns"][0]: input_example['data'][0][0]
}

request_body

{'reviewContents': ['barang sudah di terima dengan baik, dah di coba Oke.terima kasih Ladaza',
  'sangat puas trimakasih lazada',
  'D pasang ke SS J2 nggak bisa. Apa kartuny yg rusak?',
  'rekomendet bgt lahh barang bagus cpt bgt sampe y padahal tmpat aq desa plosok bgt tpi cuman 2hri dah sampe',
  'Film ato lagu2 saat d putar dri plashdisk error..smuanya filmku error gx bsa d putar😭😭😭',
  'bagus',
  'Toshiba 1 TB Hitam dengan banyak GRATIS Usb TOSHIBA 32GB + Pouch Harddisk & Usb OTG Reader Android NICE !!!!',
  'Produk original dan awet',
  'Barang bagus mulus wlau hnya paking buble warp, kulitas lumayan lh dgn harga sgitu.. Rekomended bgt ni brg..',
  'mantabb, barang bagus sesuai pesanan dan datangnya cepat 👍👍']}

Test the `app.py` with http request with MLflow input data example.

In [17]:
import requests

result = requests.post(url=unit_test_config.app_endpoint, json=request_body)
y_predict = result.json()

In [18]:
len(y_predict)

10

In [19]:
y_predict

[5, 5, 1, 5, 2, 5, 1, 5, 5, 5]

In [20]:
type(y_predict)

list

---

### Unit Testing

This code in `src/LazadaIDReview/components/unit_testing.py`.

In [21]:
import json
import requests

from LazadaIDReviews import logger

class UnitTesting:
    def __init__(self, config: UnitTestConfig):
        self.config = config
        self.req_body_key = None
        self.req_body = None
    
    def set_request_body(self) -> None:
        """predict the data with linear regression model

        Raises:
            client_error: error when access mlflow to get deployed model
            download_error: error when download vectorizer from mlflow artifact
        """
        try:
            logger.info("Set MLflow Client.")
            client = MlflowClient(tracking_uri=self.config.mlflow_tracking_uri)
            selected_model = client.get_model_version_by_alias(
                self.config.mlflow_model_name, 
                self.config.mlflow_deploy_model_alias
            )
            
            logger.info("Get the deployed model run id.")
            selected_run_id = selected_model.run_id
        except Exception as client_error:
            logger.error(client_error)
            raise client_error

        try:
            logger.info("Downloading vectorizer from MLflow's artifacts.")
            download_artifacts(
                run_id=selected_run_id,
                artifact_path=self.config.mlflow_input_example_path,
                dst_path=self.config.root_dir
            )
        except Exception as download_error:
            logger.error(download_error)
            raise download_error
        
        logger.info("Open MLflow input example.")
        f = open(f"{self.config.root_dir}/{self.config.mlflow_input_example_path}")
        input_example = json.load(f)

        # handle mlflow input example data
        data_key = input_example["columns"][0]
        data_val = input_example['data'][0][0]

        # request params
        self.req_body_key = data_key
        self.req_body = {
            data_key: data_val
        }
        
    def get_request_body_value(self) -> list:
        """get the request body data

        Returns:
            req_body: list type
        """
        logger.info("Get MLflow input example value.")
        req_body_value = self.req_body[self.req_body_key]
        return req_body_value
    
    def get_output_length(self):
        """get the output length of the predict result

        Returns:
            len_result: list type
        """
        logger.info("Get predicted result length.")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        len_result = len(result.json())
        return len_result

    def is_output_type_list(self) -> bool:
        """check if the output file is list data type

        Returns:
            is_list: bool type
        """
        logger.info("Check is the predicted output is list.")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        is_list = type(result.json()) is list
        return is_list

    def is_output_type_consistent(self) -> bool:
        """check if the output file have consistent
        data type inside a list

        Returns:
            bool type
        """
        logger.info("Check is each predicted output is integer")
        result = requests.post(
            url=self.config.app_endpoint, 
            json=self.req_body
        )
        for result in result.json():
            if type(result) is not int:
                return False
        return True

### Run Testing

**Debug**: Simulate the unit testing without library.

In [22]:
try:
    config = ConfigurationManager()
    unit_testing_config = config.get_unit_test_config()
    unit_test = UnitTesting(config=unit_testing_config)
    unit_test.set_request_body()
    
    print("Review Contents: ")
    for content in unit_test.get_request_body_value():
        print(content)
    
    print("\nBegin tests:")
    print(f"Is same size: {unit_test.get_output_length() == len(unit_test.get_request_body_value())}")
    print(f"Is the output is list: {unit_test.is_output_type_list() == True}")
    print(f"Is the output consistent: {unit_test.is_output_type_consistent() == True}")
except Exception as e:
    logger.error(e)
    raise e

[2025-07-02 16:38:24,162: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-07-02 16:38:24,165: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2025-07-02 16:38:24,167: INFO: common: created directory at: artifacts]
[2025-07-02 16:38:24,170: INFO: common: created directory at: artifacts/test]
[2025-07-02 16:38:24,173: INFO: 1595305458: Set MLflow Client.]
[2025-07-02 16:38:24,195: INFO: 1595305458: Get the deployed model run id.]
[2025-07-02 16:38:24,197: INFO: 1595305458: Downloading vectorizer from MLflow's artifacts.]


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

[2025-07-02 16:38:24,307: INFO: 1595305458: Open MLflow input example.]
Review Contents: 
[2025-07-02 16:38:24,310: INFO: 1595305458: Get MLflow input example value.]
barang sudah di terima dengan baik, dah di coba Oke.terima kasih Ladaza
sangat puas trimakasih lazada
D pasang ke SS J2 nggak bisa. Apa kartuny yg rusak?
rekomendet bgt lahh barang bagus cpt bgt sampe y padahal tmpat aq desa plosok bgt tpi cuman 2hri dah sampe
Film ato lagu2 saat d putar dri plashdisk error..smuanya filmku error gx bsa d putar😭😭😭
bagus
Toshiba 1 TB Hitam dengan banyak GRATIS Usb TOSHIBA 32GB + Pouch Harddisk & Usb OTG Reader Android NICE !!!!
Produk original dan awet
Barang bagus mulus wlau hnya paking buble warp, kulitas lumayan lh dgn harga sgitu.. Rekomended bgt ni brg..
mantabb, barang bagus sesuai pesanan dan datangnya cepat 👍👍

Begin tests:
[2025-07-02 16:38:24,314: INFO: 1595305458: Get predicted result length.]
[2025-07-02 16:38:24,870: INFO: 1595305458: Get MLflow input example value.]
Is same si