In [1]:
import os

In [2]:
%pwd

'd:\\Data Science\\project Series\\Chest_Cancer_Classification_DL\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\Data Science\\project Series\\Chest_Cancer_Classification_DL'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    """
    Data class to store configuration for data ingestion
    """
    root_dir: Path
    source_url: str
    local_data_file: Path
    unzip_data_dir: Path

In [6]:
from Cnnclassifier.utils.common import read_yaml, create_directories
from Cnnclassifier.constants import * 

In [7]:
class ConfigManager:
    def __init__(self,
                 config_filepath: Path = CONFIG_FILE_PATH,
                 params_filepath: Path = PARAMS_FILE_PATH):
        # Read config and params YAML
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        # Create artifacts root directory
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config.root_dir),
            source_url=config.source_url,          # match YAML key
            local_data_file=Path(config.local_data_file),
            unzip_data_dir=Path(config.unzip_data_dir)
        )

        return data_ingestion_config

In [9]:
# src/components/data_ingestion.py

import gdown
import zipfile
from pathlib import Path
#from Cnnclassificatier.config.configuration import ConfigManager
#from Cnnclassificatier.entitys.data_ingestion_entity import DataIngestionArtifact
from Cnnclassifier import logger
import os

class DataIngestion:
    def __init__(self, config: ConfigManager):
        # Correctly get the DataIngestionConfig object
        self.config = config.get_data_ingestion_config()

    def download_data(self) -> Path:
        """Download the ZIP file from Google Drive"""
        try:
            os.makedirs(self.config.root_dir, exist_ok=True)
            logger.info(f"Downloading data from: {self.config.source_url}")
            gdown.download(self.config.source_url, str(self.config.local_data_file), quiet=False)
            logger.info(f"Data downloaded successfully at: {self.config.local_data_file}")
            return self.config.local_data_file
        except Exception as e:
            logger.exception(f"Error occurred while downloading data: {e}")
            raise e

    def unzip_data(self, zip_file_path: Path) -> Path:
        """Unzip the downloaded file"""
        try:
            logger.info(f"Extracting ZIP file: {zip_file_path}")
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(self.config.unzip_data_dir)
            logger.info(f"Data extracted successfully at: {self.config.unzip_data_dir}")
            return self.config.unzip_data_dir
        except Exception as e:
            logger.exception(f"Error occurred while extracting ZIP file: {e}")
            raise e

    def initiate_data_ingestion(self) -> DataIngestionConfig:
        """Main method to run download and unzip sequentially"""
        zip_file = self.download_data()
        unzip_dir = self.unzip_data(zip_file)
        return DataIngestionConfig(local_data_file=zip_file, unzip_data_dir=unzip_dir)


In [None]:

config = ConfigManager()
data_ingestion = DataIngestion(config)
artifact = data_ingestion.initiate_data_ingestion()

print(f"ZIP file path: {artifact.local_data_file}")
print(f"Extracted data path: {artifact.unzip_data_dir}")


[2025-12-18 05:27:14,320 : INFO : cnnclassifierlogger : yaml file: config\config.yaml loaded successfully]
[2025-12-18 05:27:14,322 : INFO : cnnclassifierlogger : yaml file: params.yaml loaded successfully]
[2025-12-18 05:27:14,323 : INFO : cnnclassifierlogger : Created directory at: artifacts]
[2025-12-18 05:27:14,326 : INFO : cnnclassifierlogger : Downloading data from: https://drive.google.com/uc?export=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY]
[2025-12-18 05:27:25,636 : ERROR : cnnclassifierlogger : Error occurred while downloading data: HTTPSConnectionPool(host='drive.google.com', port=443): Max retries exceeded with url: /uc?export=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY (Caused by NameResolutionError("HTTPSConnection(host='drive.google.com', port=443): Failed to resolve 'drive.google.com' ([Errno 11001] getaddrinfo failed)"))]
Traceback (most recent call last):
  File "c:\Users\Qosain\anaconda3\envs\chest_cancer_dl\Lib\site-packages\urllib3\connection.py", line 204, i

ConnectionError: HTTPSConnectionPool(host='drive.google.com', port=443): Max retries exceeded with url: /uc?export=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY (Caused by NameResolutionError("HTTPSConnection(host='drive.google.com', port=443): Failed to resolve 'drive.google.com' ([Errno 11001] getaddrinfo failed)"))