In [12]:
%reload_ext autoreload
%autoreload 2
import os
import sys
from dotenv import load_dotenv, find_dotenv
from dataclasses import dataclass
from pathlib import Path
load_dotenv(find_dotenv())
sys.path.append(os.getenv("PROJECT_FOLDER"))
from src.utils.common import logger

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_path: Path
    unzip_dir: Path 

In [13]:
# os.chdir("..")

In [14]:
from src.utils.common import read_yaml, create_directories

In [15]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath: str = os.getenv("CONFIG_FILE_PATH"),
        params_filepath: str = os.getenv("PARAMS_FILE_PATH"),
        schema_filepath: str = os.getenv("SCHEMA_FILE_PATH"),
    ):
        self.config = read_yaml(Path(config_filepath))
        self.params = read_yaml(Path(params_filepath))
        self.schema = read_yaml(Path(schema_filepath))
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            source_path  = config.source_path,
            unzip_dir = config.unzip_dir,
        )
        return data_ingestion_config

In [18]:
import zipfile

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        """
        Instantiate `DataIngestion` class

        Args:
            config (DataIngestionConfig): configuration for data ingestion
        """
        self.config = config

    def extract_zip_file(self):
        """Extract `.zip` file"""
        unzip_dir = self.config.unzip_dir
        os.makedirs(unzip_dir, exist_ok=True)
        with zipfile.ZipFile(self.config.source_path, "r") as zip_ref:
            zip_ref.extractall(unzip_dir)

In [20]:
try:
    logger.info("Ingesting data")
    configuration_manager = ConfigurationManager()
    data_ingestion = DataIngestion(config=configuration_manager.get_data_ingestion_config())
    data_ingestion.extract_zip_file()
except Exception as e:
    logger.error(e)


2024-03-06 17:01:57,464 - sentiment-classifier-logger - INFO - Ingesting data
2024-03-06 17:01:57,467 - sentiment-classifier-logger - INFO - yaml file: D:\Documents\GitHub\customer-product-reviews-sentiment-classifier\config\config.yaml loaded successfully
2024-03-06 17:01:57,471 - sentiment-classifier-logger - INFO - yaml file: D:\Documents\GitHub\customer-product-reviews-sentiment-classifier\params.yaml loaded successfully
2024-03-06 17:01:57,472 - sentiment-classifier-logger - INFO - yaml file: D:\Documents\GitHub\customer-product-reviews-sentiment-classifier\schema.yaml loaded successfully
2024-03-06 17:01:57,475 - sentiment-classifier-logger - INFO - Created directory at: artifacts
2024-03-06 17:01:57,476 - sentiment-classifier-logger - INFO - Created directory at: artifacts/data_ingestion
