In [1]:
!python -V

Python 3.10.10


In [6]:
!pwd

/teamspace/studios/this_studio


In [7]:
import os
os.chdir('/teamspace/studios/this_studio/100-class-image-classifaction')

In [18]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir : Path
    source_url: str
    local_data_file: Path

In [19]:
from src.LCIC.constants import *
from src.LCIC.utils.common import read_yaml, create_directories
import opendatasets as od

In [33]:
class ConfigurationManager:
    def __init__(self, 
            config_path:Path = CONFIG_FILE_PATH, 
            params_path : Path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_path)

        create_directories([self.config.artifacts_root])
    
    def get_data_ingestion_config(self)->DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])

        data_ingestion_cfg = DataIngestionConfig(
            root_dir= config.root_dir,
            source_url= config.source_url,
            local_data_file= config.local_data_file
        )
        return data_ingestion_cfg

In [34]:
import os
import opendatasets as od
from src.LCIC import logger

In [37]:
class DataIngestion:

    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_data(self):
        """
        Fetch the specified dataset from internet and store the dataset locally.
        return: None
        """

        try:
            download_url = self.config.source_url
            save_at = self.config.local_data_file
            logger.info(f"Downloading dataset form {download_url}")
            od.download(dataset_id_or_url=download_url, data_dir=save_at,dry_run=True)
            logger.info(f"Successfully saved at {save_at}")
        except Exception as e:
            logger.error(f"Error in downloading data. {e}")
            raise e


In [39]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion_obj = DataIngestion(config=data_ingestion_config)
    data_ingestion_obj.download_data()
except Exception as e:
    raise e

[2024-01-09 13:34:40,389]: INFO: common: yaml file: config/config.yaml loaded successfully!
[2024-01-09 13:34:40,392]: INFO: common: yaml file: params.yaml loaded successfully!
[2024-01-09 13:34:40,394]: INFO: common: Created directory at : artifacts
[2024-01-09 13:34:40,395]: INFO: common: Created directory at : artifacts/data_ingestion
[2024-01-09 13:34:40,396]: INFO: 3969202021: Downloading dataset form https://www.kaggle.com/datasets/gpiosenka/sports-classification
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:Your Kaggle Key:This is a dry run, skipping..
[2024-01-09 13:34:52,960]: INFO: 3969202021: Successfully saved at artifacts/data
