In [1]:
import os
import yaml

from pathlib import Path

# Print the current working directory
print("Current working directory:", os.getcwd())


project_root = Path('C:/Test-Summarizer')
os.chdir(project_root)
print("Current working directory:", os.getcwd())


# Specify the paths to your configuration and parameters files
config_filepath = project_root / 'config' / 'config.yaml'
params_filepath = project_root / 'params.yaml'




Current working directory: c:\Test-Summarizer\research
Current working directory: C:\Test-Summarizer


In [2]:
from __future__ import annotations


import zipfile
import urllib.request as request
from textSummarizer.logging import logger
from textSummarizer.utils.common import get_size


from pathlib import Path
from dataclasses import dataclass


from textSummarizer.constants import *
# from textSummarizer.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH 
from textSummarizer.utils.common import read_yaml, create_directories

In [3]:
# check working dir
%pwd

'C:\\Test-Summarizer'

In [4]:
# change dir
os.chdir("../")

In [5]:
# now check working dir
# %pwd

In [6]:
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path
    

In [7]:
# from constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH


# class ConfigurationManager:
#     def __init__(
#         self,
#         config_filepath: Path,
#         params_filepath: Path):
        
        
        
#         self.config = read_yaml(config_filepath)
#         self.params = read_yaml(params_filepath)
        
#         create_directories([self.config.artifacts_root])
        
        
#     def get_data_ingestion_config(self) -> DataIngestionConfig:
#         config = self.config.data_ingestion
        
#         create_directories([config.root_dir])
        
#         data_ingestion_config = DataIngestionConfig(
#             root_dir=config.root_dir,
#             source_URL=config.source_URL,
#             local_data_file=config.local_data_file,
#             unzip_dir=config.unzip_dir
#         )
        
#         return data_ingestion_config



# Assuming other imports and dataclass definitions remain the same

class ConfigurationManager:
    def __init__(
        self,
        config_filepath: Path,  # Changed from CONFIG_FILE_PATH to Path for clarity
        params_filepath: Path):  # Changed from PARAMS_FILE_PATH to Path for clarity
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config['artifacts_root']])  # Adjusted based on actual usage
        
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config['data_ingestion']
        
        create_directories([Path(config['root_dir'])])  # Ensure directory is a Path object
        
        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config['root_dir']),  # Ensure these are Path objects
            source_URL=config['source_URL'],
            local_data_file=Path(config['local_data_file']),
            unzip_dir=Path(config['unzip_dir'])
        )
        
        return data_ingestion_config

        
 

In [8]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
        
    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename, headers = request.urlretrieve(
                url = self.config.source_URL,
                filename = self.config.local_data_file
            )
            logger.info(f"{filename} downloaded with following info: \n{headers}")
        else:
            logger.info(f"file already exists of sie: {get_size(Path(self.config.local_data_file))}")
            
            
    def unzip_data(self):
        """
        zip_file_path: str
        Extract the zip file into the data directory
        Function returns None
        """
        unzip_path = self.config.unzip_dir
        # print(f"Unzipping to {self.config.unzip_dir}")
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, "r") as zip_ref:
            zip_ref.extractall(unzip_path)
        
        
        

In [9]:
try:
    config = ConfigurationManager(config_filepath=config_filepath, params_filepath=params_filepath)
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.unzip_data()
except Exception as e:
    raise e

[ 2024-03-25 23:51:22,766 - INFO - common -yaml file: C:\Test-Summarizer\config\config.yaml loaded successfully]
[ 2024-03-25 23:51:22,768 - INFO - common -yaml file: C:\Test-Summarizer\params.yaml loaded successfully]
[ 2024-03-25 23:51:22,773 - INFO - common -Created directory at: artifacts]
[ 2024-03-25 23:51:22,776 - INFO - common -Created directory at: artifacts\data_ingestion]
[ 2024-03-25 23:51:22,779 - INFO - 907021102 -file already exists of sie: ~ 7718 KB]
