In [10]:
import os

In [11]:
%pwd

'c:\\Users\\owais\\Desktop\\python\\company\\DEEP LEARNING\\Chicken-Disease-Classification--Project'

In [12]:
os.chdir("../")

In [13]:
%pwd

'c:\\Users\\owais\\Desktop\\python\\company\\DEEP LEARNING'

In [14]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_path: Path
    local_data_file: Path
    unzip_file: Path
    

In [15]:
from cnnClassifier.constants import  *
from cnnClassifier.UTILS.common import read_yaml ,create_directories

In [85]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(str(config_filepath))
        self.params = read_yaml(str(params_filepath))
        create_directories([self.config['artifacts_root']])
        print(f"Configuration loaded: {self.config}")

    def get_data_ingestion(self) -> DataIngestionConfig:
        # Debugging output
        print(f"Config loaded: {self.config}")

        # Access `data_ingestion` section
        config = self.config.get('data_ingestion', None)

        if config is None:
            raise ValueError("Data ingestion configuration is missing or not defined correctly.")
        
        # Check if required keys are present
        required_keys = ['root_dir', 'source_url', 'local_data_file', 'unzip_dir']
        for key in required_keys:
            if key not in config:
                raise ValueError(f"Missing required key '{key}' in data ingestion configuration.")
        
        create_directories([config['root_dir']])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir=config['root_dir'],
            source_url=config['source_url'],
            local_data_file=config['local_data_file'],
            unzip_dir=config['unzip_dir']
        )
        
        return data_ingestion_config


In [87]:
import os
import zipfile
from urllib import request
from pathlib import Path
import logging

# Set up logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# Function to get file size
def get_size(path: Path) -> int:
    return os.path.getsize(path)

class DataIngestionConfig:
    def __init__(self, root_dir: str, source_url: str, local_data_file: str, unzip_dir: str):
        self.root_dir = root_dir
        self.source_url = source_url
        self.local_data_file = local_data_file
        self.unzip_dir = unzip_dir

    def download_file(self):
        try:
            if not os.path.exists(self.local_data_file):
                # Download the file
                filename, headers = request.urlretrieve(
                    url=self.source_url,
                    filename=self.local_data_file
                )
                logger.info(f"{filename} downloaded with the following info:\n{headers}")
            else:
                # File already exists, log its size
                file_size = get_size(Path(self.local_data_file))
                logger.info(f"File already exists of size: {file_size}")
        except Exception as e:
            logger.error(f"Failed to download file: {e}")
            raise

    def extract_zip_file(self):
        """Extract zip file into the data directory."""
        try:
            # Ensure the directory exists
            os.makedirs(self.unzip_dir, exist_ok=True)

            # Extract the zip file
            with zipfile.ZipFile(self.local_data_file, 'r') as zip_ref:
                zip_ref.extractall(self.unzip_dir)
            logger.info(f"Extracted files to {self.unzip_dir}")
        except Exception as e:
            logger.error(f"Failed to extract zip file: {e}")
            raise


In [88]:
try:
    config = ConfigurationManager()
    
    # Use the method `get_data_ingestion` instead of `get_data_ingestion_config`
    data_ingestion_config = config.get_data_ingestion()
    
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()


except Exception as e:
    print(f"An error occurred: {e}")
    raise e


YAML content: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_url': 'https://github.com/syedowais57/DATASET-REPOSITORY/raw/main/archive%20(9).zip', 'local_data_file': 'artifacts/data_ingestion/data.zip', 'unzip_dir': 'artifacts/data_ingestion'}}
YAML content: {'param1': 'value1', 'param2': 'value2'}
[2024-09-02 18:09:52,677 : INFO : common : Created directory at: artifacts]
Configuration loaded: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_url': 'https://github.com/syedowais57/DATASET-REPOSITORY/raw/main/archive%20(9).zip', 'local_data_file': 'artifacts/data_ingestion/data.zip', 'unzip_dir': 'artifacts/data_ingestion'}}
Config loaded: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_url': 'https://github.com/syedowais57/DATASET-REPOSITORY/raw/main/archive%20(9).zip', 'local_data_file': 'artifacts/data_ingestion/data.zip', 'unzip_dir'