In [6]:
import os
import zipfile
from dataclasses import dataclass
from pathlib import Path
from urllib import request

In [7]:
# !ls

In [8]:
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_path: Path
    unzip_dir: Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE: Path
    ALL_REQUIRED_FILES: list

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path
    tokenizer_name: str

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: str

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

In [9]:
from src.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.utils import read_yaml, get_size
from src.logger import logging


In [10]:
read_yaml(CONFIG_FILE_PATH)

FileNotFoundError: [Errno 2] No such file or directory: 'config\\config.yaml'

In [None]:
class ConfigurationManager:
    def __init__(self, config_file_path=CONFIG_FILE_PATH,
    param_file_path=PARAMS_FILE_PATH) -> None:
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(param_file_path)

        os.makedirs(self.config.artifacts_root, exist_ok=True)
    
    def get_data_ingestion_config(self):
        config = self.config.data_ingestion
        os.makedirs(config.root_dir, exist_ok=True)

        data_ingestion_config = DataIngestionConfig(
            root_dir=self.config.root_dir,
            source_URL=self.config.source_URL,
            local_data_path=self.config.local_data_path,
            unzip_dir=self.config.unzip_dir 
        )
        return data_ingestion_config
    
    def get_data_validation_config(self):
        config = self.config.data_validation
        os.makedirs(config.root_dir, exist_ok=True)

        data_validation_config = DataValidationConfig(
            root_dir=self.config.root_dir,
            STATUS_FILE=self.config.STATUS_FILE,
            ALL_REQUIRED_FILES=self.config.ALL_REQUIRED_FILES 
        )
        return data_validation_config
    
    def get_data_transformation_config(self):
        config = self.config.data_transformation
        os.makedirs(config.root_dir, exist_ok=True)

        data_transformation_config = DataTransformationConfig(
            root_dir=self.config.root_dir,
            data_path=self.config.data_path,
            tokenizer_name=self.config.tokenizer_name
        )
        return data_transformation_config
    
    def get_model_trainer_config(self):
        config = self.config.model_trainer
        os.makedirs(config.root_dir, exist_ok=True)

        model_trainer_config = ModelTrainerConfig(
            root_dir=self.config.root_dir,
            data_path=self.config.data_path,
            model_ckpt=self.config.model_ckpt
        )
        return model_trainer_config
    
    def get_model_evaluation_config(self):
        config = self.config.model_evaluation
        os.makedirs(config.root_dir, exist_ok=True)

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=self.config.root_dir,
            data_path=self.config.data_path,
            model_ckpt=self.config.model_ckpt
        )
        return model_evaluation_config

In [None]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_dataset(self):
        if not os.path.exists(self.config.local_data_path):
            file, header = request.urlretrieve(url=self.config.source_URL,
                                               filename=self.config.local_data_path)
            logging.info(f"{file} successfully downloaded! Info: \n{header}")
        else:
            logging.info(f"File already exists of size: {get_size(Path(self.config.local_data_path))}")

def extract_zip(self, unzip_path=None):
    """Extract zip file to specified path

    Args:
        unzip_path: str, optional
            The path to extract the zip file to. If not provided, it defaults to `self.config.unzip_dir`.
    """
    if unzip_path is None:
        unzip_path = self.config.unzip_dir

    os.makedirs(unzip_path, exist_ok=True)
    with zipfile.ZipFile(self.config.local_data_path, 'r') as zip_ref:
        zip_ref.extractall(unzip_path)


In [None]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e