In [1]:
import os
os.chdir("..")
%pwd

'c:\\ML\\NLP-Text-Summarization'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataIngestionEntity:
    root_dir: Path
    source_url: str
    local_raw_folder: Path
    unzip_folder: Path


In [3]:
from src.constants import *
from src.utils.common import CommonUtils
from box import ConfigBox
from src.custom_exception import CustomException
import sys

class ConfigurationManager:
    def __init__(self, config_path=CONFIG_FILE_PATH, params_path = PARAMS_FILE_PATH):
        self.config: ConfigBox = CommonUtils.read_yaml(config_path)
        self.params: ConfigBox = CommonUtils.read_yaml(params_path)

        CommonUtils.create_directories([self.config.artifacts_root])

    def DataIngestionConfiguration(self) -> DataIngestionEntity:
        try:
            data_ingestion_config = self.config.data_ingestion

            CommonUtils.create_directories([data_ingestion_config.root_dir])

            return DataIngestionEntity(root_dir=data_ingestion_config.root_dir, source_url=data_ingestion_config.source_url, local_raw_folder=data_ingestion_config.local_raw_folder, unzip_folder=data_ingestion_config.unzip_folder)
        except Exception as exp:
            raise CustomException(exp, sys)
        

In [14]:
import urllib.request as request
from src.logger import logging
import zipfile
class DataIngestion:
    def __init__(self, config: DataIngestionEntity):
        self.config = config

    def download_source_file(self)->bool:
        is_file_downloaded = False
        try:
            if not os.path.exists(self.config.local_raw_folder):
                filename, header = request.urlretrieve(
                    url=self.config.source_url, 
                    filename=self.config.local_raw_folder
                )
                logging.info("Source file downloaded into the system.")
                is_file_downloaded = True
            else:
                logging.info("Source File Already present in the system.")
            return is_file_downloaded
        except Exception as exp:
            raise CustomException(exp, sys)
        
    def unzip_source_file(self):
        try:
            os.makedirs(self.config.unzip_folder, exist_ok=True)
            with zipfile.ZipFile(self.config.local_raw_folder, 'r') as zip_file:
                zip_file.extractall(self.config.unzip_folder)
            logging.info("Extracted all files into the system.")
        except Exception as exp:
            raise CustomException(exp, sys)


In [16]:
try:
    config_manager_obj = ConfigurationManager()
    data_ingestion_config: DataIngestionEntity = config_manager_obj.DataIngestionConfiguration()
    data_ingestion_obj = DataIngestion(data_ingestion_config)
    is_file_download = data_ingestion_obj.download_source_file()
    if is_file_download: data_ingestion_obj.unzip_source_file()
except Exception as exp:
            raise CustomException(exp, sys)