In [1]:
%pwd

'/Users/koushal/vine_quanlity/research'

In [2]:
import os

In [3]:
os.chdir("../")

In [4]:
%pwd

'/Users/koushal/vine_quanlity'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [21]:
import os
import yaml
import requests


class ConfigurationManager:
    def __init__(self, config_file_path="config/config.yaml"):
        self.config_file_path = config_file_path
        self.config = self.read_yaml()

    def read_yaml(self):
        """
        Reads the YAML configuration file.
        """
        try:
            with open(self.config_file_path, "r") as file:
                config = yaml.safe_load(file)
            return config
        except FileNotFoundError:
            raise Exception(f"Configuration file not found at {self.config_file_path}.")
        except yaml.YAMLError as e:
            raise Exception(f"Error parsing the YAML file: {e}")

    def get_data_ingestion_config(self):
        """
        Get the data ingestion configuration.
        """
        try:
            return self.config["data_ingestion"]
        except KeyError:
            raise Exception("Key 'data_ingestion' not found in the configuration file.")


class DataIngestion:
    def __init__(self, config):
        self.config = config

    def download_file(self):
        """
        Download the file from the source URL and save it to the local path.
        """
        source_url = self.config["source_URL"]
        local_file_path = self.config["local_data_file"]
        root_dir = self.config["root_dir"]

        # Ensure the root directory exists
        os.makedirs(root_dir, exist_ok=True)

        try:
            print(f"Downloading data from {source_url}...")
            response = requests.get(source_url, stream=True)
            response.raise_for_status()  # Raise an error for invalid status codes

            # Save the file locally
            with open(local_file_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        file.write(chunk)

            print(f"File downloaded successfully: {local_file_path}")

        except requests.exceptions.RequestException as e:
            raise Exception(f"Error downloading the file: {e}")

    def extract_zip_file(self):
        """
        Extract the ZIP file to the target directory.
        """
        import zipfile

        local_file_path = self.config["local_data_file"]
        unzip_dir = self.config["unzip_dir"]

        # Ensure the unzip directory exists
        os.makedirs(unzip_dir, exist_ok=True)

        try:
            print(f"Extracting {local_file_path} to {unzip_dir}...")
            with zipfile.ZipFile(local_file_path, "r") as zip_ref:
                zip_ref.extractall(unzip_dir)
            print(f"Extraction completed successfully.")
        except zipfile.BadZipFile as e:
            raise Exception(f"Error extracting the ZIP file: {e}")


# Example Usage
if __name__ == "__main__":
    try:
        # Load configuration
        config = ConfigurationManager()
        data_ingestion_config = config.get_data_ingestion_config()

        # Create and run data ingestion
        data_ingestion = DataIngestion(data_ingestion_config)
        data_ingestion.download_file()
        data_ingestion.extract_zip_file()

    except Exception as e:
        print(f"Error: {e}")


Downloading data from https://github.com/entbappy/Branching-tutorial/raw/master/winequality-data.zip...
File downloaded successfully: artifacts/data_ingestion/data.zip
Extracting artifacts/data_ingestion/data.zip to artifacts/data_ingestion...
Extraction completed successfully.
