In [14]:
import os

In [15]:
os.chdir('../')
%pwd

'd:\\codes\\mlops\\Kidney-Disease-Classification'

In [16]:
from pathlib import Path
from dataclasses import dataclass


@dataclass(frozen= True)
class DataIngestionConfig:
    root_dir : Path
    source_url : str
    local_data_file : Path
    unzip_dir : Path

In [17]:
import os, sys
from src.cnnClassifier.constants import *
from src.cnnClassifier.utils.utils import read_yaml , create_direcories
from src.cnnClassifier.exception.exception import customexception

In [18]:
class ConfigurationManager():
    def __init__(self, CONFIG_FILE_PATH =CONFIG_FILE_PATH , PARAMS_FILE_PATH = CONFIG_FILE_PATH):
        self.config = read_yaml(CONFIG_FILE_PATH)
        self.params = read_yaml(PARAMS_FILE_PATH)
        create_direcories([self.config.artifacts_root])
        
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        try:
            config  = self.config.data_ingestion
            
            create_direcories([config.root_dir])
            data_ingestion_config = DataIngestionConfig(
                root_dir = Path(config.root_dir),
                source_url = config.source_url,
                local_data_file = Path(config.local_data_file),
                unzip_dir = Path(config.unzip_dir)
            )
            return data_ingestion_config
        except Exception as e:
            raise customexception(e, sys)
            

In [19]:
import os , sys
import urllib.request as request
import zipfile
import gdown
from src.cnnClassifier import logger
from src.cnnClassifier.exception.exception import customexception

In [20]:
class DataIngestion():
    def __init__(self, config : DataIngestionConfig):
        self.config = config
    
    def download_data(self):
        try: 
            dataset_url = self.config.source_url
            zip_download_dir = self.config.local_data_file
            os.makedirs("artifacts/data_ingestion", exist_ok=True)
            logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")

            file_id = dataset_url.split("/")[-2]
            prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix + file_id, str(zip_download_dir))

            logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")

        except Exception as e:
            raise e
        
    def extract_zip_file(self):
      
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)

In [21]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(data_ingestion_config)
    data_ingestion.download_data()
    data_ingestion.extract_zip_file()
    
except Exception as e:
    raise customexception(e, sys)

[2024-12-31 23:32:54,773: INFO: utils: yaml file : config\config.yaml read successfully]
[2024-12-31 23:32:54,777: INFO: utils: yaml file : config\config.yaml read successfully]
[2024-12-31 23:32:54,780: INFO: utils: directory : artifacts created successfully]
[2024-12-31 23:32:54,782: INFO: utils: directory : artifacts\data_ingestion created successfully]
[2024-12-31 23:32:54,783: INFO: 3625337964: Downloading data from https://drive.google.com/file/d/1aRKQLbdBbrSb0B--OI_LXUpLH2JpVfAU/view?usp=sharing into file artifacts\data_ingestion\data.zip]


Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1aRKQLbdBbrSb0B--OI_LXUpLH2JpVfAU
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1aRKQLbdBbrSb0B--OI_LXUpLH2JpVfAU&confirm=t&uuid=0c53dbbf-1fde-45ee-9580-b8d9b2f13c6b
To: d:\codes\mlops\Kidney-Disease-Classification\artifacts\data_ingestion\data.zip
100%|██████████| 57.7M/57.7M [00:04<00:00, 12.2MB/s]


[2024-12-31 23:33:04,327: INFO: 3625337964: Downloaded data from https://drive.google.com/file/d/1aRKQLbdBbrSb0B--OI_LXUpLH2JpVfAU/view?usp=sharing into file artifacts\data_ingestion\data.zip]
