In [None]:
import os
import rootutils
from pathlib import Path
from dotenv import load_dotenv

rootutils.setup_root(
    os.path.abspath(''), indicator=['.git', 'pyproject.toml'], pythonpath=True
)

from helpers.data.preprocessing.preproc_pipeline_manager import PreprocessingPipeline
from helpers.data.preprocessing.preproc_strategy_split import SplitStep
from helpers.data.preprocessing.preproc_strategy_tile import TilingStep
from src.data.components.utils import clear_directory

load_dotenv()

In [None]:
data_dir = os.environ.get('pcb_data_path')
overwrite_data = True
# initialize the preprocessing pipeline
preprocessing_pipeline = PreprocessingPipeline(
    steps=[
        SplitStep(split_ratio=[0.7, 0.2, 0.1], seed=42, merge_classes=True),
        TilingStep(
            tile_size=(224, 224),
            min_defective_area_th=0.1,
            discard_background_th=0.0,
            overlap=10,
            contour_iter_step_size=10,
            iterate_over_defective_areas=True,
        ),
    ]
)

In [None]:
data_path = Path(data_dir)
base_path = data_path.parent
last_subdir = data_path.name
output_path = base_path / f'{last_subdir}_processed'

initial_data = {'initial_data': data_dir}
if output_path.exists():
    if overwrite_data:
        clear_directory(output_path)
        output_path.rmdir()
        preprocessed_data = preprocessing_pipeline.run(initial_data)
    else:
        preprocessed_data = preprocessing_pipeline.get_processed_data_path(initial_data)
else:
    preprocessed_data = preprocessing_pipeline.run(initial_data)