## Test data ingestion in NB before building the components

In [1]:
import os

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Tom\\HKA\\7_Semester\\Domänenprojekt_2\\DoPro'

### Data Ingestion Config Class (entity)

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    """Entity-Class for data ingestion config params."""

    root_dir: Path
    """Directory into which data will be loaded."""

    source_url: str
    """URL where the data is located."""

In [6]:
from src.dopro2_HEFTcom_challenge.constants import PARAMS_FILE_PATH, CONFIG_FILE_PATH
import yaml
from loguru import logger

In [7]:
class ConfigurationManager:
    """Class to manage all configurations."""

    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH
    ) -> None:
        """
        Constructor for ConfigurationManager Class.
        Creates artifacts folder.

        :param config_filepath: Path to config.yaml file
        :param params_filepath: Path to params.yaml file

        """
        with config_filepath.open("r") as f:
            self.config: dict = yaml.safe_load(f)

        # with params_filepath.open("r") as f:
        #     self.params: dict = yaml.safe_load(f)

        os.makedirs(self.config["artifacts_root"], exist_ok=True)
        logger.info("created directory at: {}", self.config["artifacts_root"])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        """
        Get all config params and create folder in artifacts dir.
        
        :return: values from config.yaml
        :rtype: DataIngestionConfig
        """
        config = self.config["data_ingestion"]

        os.makedirs(config["root_dir"], exist_ok=True)
        logger.info("created directory at: {}", config["root_dir"])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config["root_dir"],
            source_url=config["source_url"]
        )

        return data_ingestion_config

In [8]:
import gdown

In [12]:
class DataIngestion:
    """Class to performe data ingestion."""

    def __init__(self, config: DataIngestionConfig) -> None:
        """
        Constructor for DataIngestion class.

        :param config: config values from config.yaml
        """

        self.config = config

    def download_files(self) -> None:
        """Fetch data from source url"""

        try:
            data_url: str = self.config.source_url
            download_dir: str = self.config.root_dir
            logger.info("Downloading data from {} into folder {}", data_url, download_dir)

            gdown.download_folder(data_url, output=download_dir)
            logger.info("Downloaded data from {} into folder {}", data_url, download_dir)
        except Exception as e:
            raise e    

In [13]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_files()
except Exception as e:
    raise e

[32m2024-10-07 20:13:43.809[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m24[0m - [1mcreated directory at: artifacts[0m
[32m2024-10-07 20:13:43.811[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_data_ingestion_config[0m:[36m36[0m - [1mcreated directory at: artifacts/data_ingestion[0m
[32m2024-10-07 20:13:43.812[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_files[0m:[36m19[0m - [1mDownloading data from https://drive.google.com/drive/folders/1bNRBY0G1ylOUsJEK5DirXKsBBZkNmmRP?usp=sharing into folder artifacts/data_ingestion[0m
Retrieving folder contents


Processing file 1ZaSlslhGaj0aSWkUMhvSmHhY1y_2qzS4 dwd_icon_eu_hornsea_1_20200920_20231027_ws.parquet
Processing file 1V8Ta3H6DcSK0O7o7w6vxJZbGAxK35nD9 dwd_icon_eu_pes10_20200920_20231027_sdr.parquet
Processing file 1Y8ryBYnb3CewCUJjUpseyju2xW3WusSU Energy_Data_20200920_20231027.csv
Processing file 1cpi-co6y6tL7E_8t98pBXUKRrumjHn_S Energy_Data_20200920_20240118.csv
Processing file 1SF1GU4LT2CPdNDiWWyAS5NgscVisKiz5 Energy_Data_20240119_20240519.csv


Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1ZaSlslhGaj0aSWkUMhvSmHhY1y_2qzS4
To: c:\Tom\HKA\7_Semester\Domänenprojekt_2\DoPro\artifacts\data_ingestion\dwd_icon_eu_hornsea_1_20200920_20231027_ws.parquet
100%|██████████| 3.18M/3.18M [00:00<00:00, 4.48MB/s]
Downloading...
From: https://drive.google.com/uc?id=1V8Ta3H6DcSK0O7o7w6vxJZbGAxK35nD9
To: c:\Tom\HKA\7_Semester\Domänenprojekt_2\DoPro\artifacts\data_ingestion\dwd_icon_eu_pes10_20200920_20231027_sdr.parquet
100%|██████████| 3.53M/3.53M [00:00<00:00, 4.38MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Y8ryBYnb3CewCUJjUpseyju2xW3WusSU
To: c:\Tom\HKA\7_Semester\Domänenprojekt_2\DoPro\artifacts\data_ingestion\Energy_Data_20200920_20231027.csv
100%|██████████| 4.45M/4.45M [00:00<00:00, 4.81MB/s]
Downloading...
From: https://drive.google.com/uc?id=1cpi-co6y6tL7E_8t98pBXUKRrumjHn_S
To: c:\Tom\HKA\7_Semester\Domänenproj