In [53]:
import os

In [54]:
%cd "C:\Users\dmsum\OneDrive\Desktop\End-to-End-Deep-Learning-Project-Chicken-Disease-Classification"

C:\Users\dmsum\OneDrive\Desktop\End-to-End-Deep-Learning-Project-Chicken-Disease-Classification


In [55]:
%pwd

'C:\\Users\\dmsum\\OneDrive\\Desktop\\End-to-End-Deep-Learning-Project-Chicken-Disease-Classification'

In [56]:
# os.chdir("../")

In [57]:
# %pwd

## Update the entity

In [58]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    local_data_file: Path
    unzip_dir: Path
    service_account_file: str
    root_folder_id: str
    scopes: list
    local_dir: str

## Update the configuration manager in src config

In [59]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [60]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH):

            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)

            create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir,
            service_account_file=config.service_account_file,
            root_folder_id=config.root_folder_id,
            scopes=config.scopes,
            local_dir=config.local_dir
        )

        return data_ingestion_config


## Update the components

In [61]:
import os
import zipfile
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from oauth2client.service_account import ServiceAccountCredentials
from cnnClassifier import logger
from cnnClassifier.utils.common import get_size
from dotenv import load_dotenv

In [68]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        self.drive = self._authenticate()

    def _authenticate(self):
        """Authenticate Google Drive using service account credentials."""
        credentials = ServiceAccountCredentials.from_json_keyfile_name(
            self.config.service_account_file, scopes=self.config.scopes
        )
        gauth = GoogleAuth()
        gauth.credentials = credentials
        return GoogleDrive(gauth)

    def download_folder(self, folder_id=None):
        """Download all files (recursively) from a Google Drive folder.
        Zip file will be saved directly as local_data_file."""
        
        if folder_id is None:
            folder_id = self.config.root_folder_id

        os.makedirs(self.config.root_dir, exist_ok=True)

        file_list = self.drive.ListFile(
            {'q': f"'{folder_id}' in parents and trashed=false"}
        ).GetList()

        for file in file_list:
            file_title = file['title']
            file_id = file['id']

            if file['mimeType'] == 'application/vnd.google-apps.folder':
                # ignore subfolders or handle recursively if needed
                continue
            else:
                # Save zip directly to local_data_file
                print(f"Downloading {file_title}...")
                file.GetContentFile(self.config.local_data_file)  # overwrite path
                print(f"{file_title} downloaded at {self.config.local_data_file}!")


    def extract_zip_file(self):
        """
        zip_file_path: str
        Extracts the zip file into the data directory
        Function return None
        """

        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)



## Update the pipeline

In [69]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)

    data_ingestion.download_folder()

    data_ingestion.extract_zip_file()

except Exception as e:
    raise e

[2025-10-04 19:26:42,803: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-10-04 19:26:42,807: INFO: common: yaml file: params.yaml loaded successfully]
[2025-10-04 19:26:42,809: INFO: common: created directory at: artifacts]
[2025-10-04 19:26:42,812: INFO: common: created directory at: artifacts/data_ingestion]
[2025-10-04 19:26:42,843: INFO: transport: Attempting refresh to obtain initial access_token]
[2025-10-04 19:26:42,847: INFO: client: Refreshing access_token]
Downloading Chicken-fecal-images.zip...
Chicken-fecal-images.zip downloaded at Downloaded_Folder/Chicken-fecal-images.zip!
