In [2]:
import os

In [3]:
%pwd

'c:\\Users\\supre\\PycharmProjects\\RenalHealth-AI\\research'

In [4]:
os.chdir("../")

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
  root_dir: Path
  source_URL: str
  local_data_file: Path
  unzip_dir: Path

In [6]:
from cnn_classifier.constants import *
from cnn_classifier.utils.common import read_yaml, create_directories 

In [7]:
class ConfigurationManager:
  def __init__(
      self,
      config_file_path: Path = CONFIG_FILE_PATH,
      params_file_path: Path = PARAMS_FILE_PATH,
  ):
    self.config = read_yaml(config_file_path)
    self.params = read_yaml(params_file_path)

    create_directories([self.config.artifacts_root])

  
  def get_data_ingestion_config(self) -> DataIngestionConfig:
    config = self.config.data_ingestion

    create_directories([config.root_dir])

    data_ingestion_config = DataIngestionConfig(
      root_dir=config.root_dir,
      source_URL=config.source_URL,
      local_data_file=config.local_data_file,
      unzip_dir=config.unzip_dir,
    )

    return data_ingestion_config

In [8]:
import os
import zipfile
import gdown
from cnn_classifier import logger
from cnn_classifier.utils.common import get_size

In [9]:
class DataIngestion:
  def __init__(self, config: DataIngestionConfig):
    self.config = config

  def download_data(self) -> str:
    """fetch data from the url

    :return: _description_
    :rtype: str
    """
    try:
      dataset_url = self.config.source_URL
      zip_download_dir = self.config.local_data_file
      root_dir = self.config.root_dir
      os.makedirs(root_dir, exist_ok=True)
      logger.info("Downloading data from {dataset_url} into file {zip_download_dir}")

      file_id = dataset_url.split("/")[-2]
      prefix = "https://drive.google.com/uc?/export=download&id="
      gdown.download(prefix + file_id, zip_download_dir)

      logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")
    
    except Exception as e:
      raise e
  
  def unzip_data(self):
    """extract the zip file into the data dictionary
    """
    unzip_path = self.config.unzip_dir
    os.makedirs(unzip_path,exist_ok=True)
    with zipfile.ZipFile(self.config.local_data_file, "r") as zip_ref:
      zip_ref.extractall(unzip_path)

    

In [11]:
try:
  config = ConfigurationManager()
  data_ingestion_config = config.get_data_ingestion_config()
  data_ingestion = DataIngestion(config=data_ingestion_config)
  data_ingestion.download_data()
  data_ingestion.unzip_data()
except Exception as e:
  raise e
  

[2024-04-01 10:07:18,548: INFO: common: yaml file: config\config.yml loaded successfully]
[2024-04-01 10:07:18,549: INFO: common: yaml file: params.yml loaded successfully]
[2024-04-01 10:07:18,550: INFO: common: created directory at: artifiacts]
[2024-04-01 10:07:18,551: INFO: common: created directory at: artifacts/data_ingestion]
[2024-04-01 10:07:18,552: INFO: 1118113068: Downloading data from {dataset_url} into file {zip_download_dir}]


Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1MXrb5wLHJUfpj1-k-DB6glKmwPwealsO
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1MXrb5wLHJUfpj1-k-DB6glKmwPwealsO&confirm=t&uuid=31e1e92f-1a5a-4588-92a6-3c6f3f35e364
To: c:\Users\supre\PycharmProjects\RenalHealth-AI\artifacts\data_ingestion\data.zip
100%|██████████| 1.63G/1.63G [01:48<00:00, 15.1MB/s]

[2024-04-01 10:09:09,645: INFO: 1118113068: Downloaded data from https://drive.google.com/file/d/1MXrb5wLHJUfpj1-k-DB6glKmwPwealsO/view?usp=sharing into file artifacts/data_ingestion/data.zip]



