In [1]:
import os
%pwd
os.chdir("../")
%pwd

'e:\\ObjectDetection\\ChessPieceDetection'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_url: str
    local_data_file: Path
    unzip_dir: Path

In [3]:
from src.CHESS_PIECE_DETECTION.constants import *
from src.CHESS_PIECE_DETECTION.utils.common import read_yaml, create_directories

In [4]:
class ConfigureationManager:
    def __init__(self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_cofig = DataIngestionConfig(
            root_dir = config.root_dir,
            source_url = config.source_url,
            local_data_file = config.local_data_file,
            unzip_dir = config.unzip_dir
        )

        return data_ingestion_cofig

In [5]:
import os
import threading
import time
import sys
import shutil
import random
import gdown
import zipfile
from src.CHESS_PIECE_DETECTION import logger

In [6]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        self.stop_animation = False
        self.current_message = ""
        self.animation_lock = threading.Lock()
    
    def animate(self):
        symbols = ['-', '\\', '|', '/']
        i = 0
        while not self.stop_animation:
            with self.animation_lock:
                message = self.current_message
            sys.stdout.write(f"\r{message} {symbols[i % len(symbols)]}")
            sys.stdout.flush()
            time.sleep(0.1)
            i += 1
    
    def download_data(self) -> str:
        try:
            data_url = self.config.source_url
            zip_download_dir = self.config.local_data_file
            os.makedirs("artifacts/data_ingestion", exist_ok=True)
            logger.info(f"Downloading data from {data_url} into file {zip_download_dir}")

            file_id = data_url.split("/")[-2]
            prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix + file_id, zip_download_dir)

            logger.info(f"Downloaded data from {data_url} into file {zip_download_dir}")
        except Exception as e:
            raise e
        
    def unzip_data(self):
        try:
            unzip_path = self.config.unzip_dir
            zip_download_dir = self.config.local_data_file
            os.makedirs(unzip_path, exist_ok=True)
            logger.info(f"Unzipping data from {zip_download_dir} into file {unzip_path}")
            
            with zipfile.ZipFile(zip_download_dir, 'r') as zip_ref:
                total_files = len(zip_ref.infolist())
                start_time = time.time()
                extracted_files = 0

                # Start the animation thread
                self.stop_animation = False
                animation_thread = threading.Thread(target=self.animate)
                animation_thread.start()

                for file in zip_ref.infolist():
                    zip_ref.extract(file, unzip_path)
                    extracted_files += 1
                    elapsed_time = time.time() - start_time
                    remaining_files = total_files - extracted_files
                    estimated_total_time = elapsed_time / extracted_files * total_files
                    estimated_remaining_time = estimated_total_time - elapsed_time
                    minutes, seconds = divmod(estimated_remaining_time, 60)
                    time_remaining = f"{int(minutes)}m {int(seconds)}s"
                    
                    with self.animation_lock:
                        self.current_message = f"Unzipping data {' ' * (len(str(total_files)) - len(str(extracted_files)))}{extracted_files}/{total_files} - Estimated time remaining: {time_remaining}"
            
            # Stop the animation
            self.stop_animation = True
            animation_thread.join()

            logger.info(f"Unzipping completed data from {zip_download_dir} into file {unzip_path}")
        except Exception as e:
            self.stop_animation = True
            raise e

In [7]:
try:
    config = ConfigureationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_data()
    data_ingestion.unzip_data()
except Exception as e:
    raise e

[2024-09-19 19:42:47,120: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-09-19 19:42:47,127: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-19 19:42:47,131: INFO: common: created directory at: datasets/artifacts]
[2024-09-19 19:42:47,133: INFO: common: created directory at: datasets/artifacts/data_ingestion]
[2024-09-19 19:42:47,136: INFO: 1682826885: Downloading data from https://drive.google.com/file/d/1CUDIA_8KffPmvnOfMTnN6P5AyDJRvBBa/view?usp=sharing into file datasets/artifacts/data_ingestion/data.zip]


Downloading...
From: https://drive.google.com/uc?/export=download&id=1CUDIA_8KffPmvnOfMTnN6P5AyDJRvBBa
To: e:\ObjectDetection\ChessPieceDetection\datasets\artifacts\data_ingestion\data.zip
100%|██████████| 17.5M/17.5M [00:01<00:00, 16.7MB/s]

[2024-09-19 19:43:00,478: INFO: 1682826885: Downloaded data from https://drive.google.com/file/d/1CUDIA_8KffPmvnOfMTnN6P5AyDJRvBBa/view?usp=sharing into file datasets/artifacts/data_ingestion/data.zip]
[2024-09-19 19:43:00,480: INFO: 1682826885: Unzipping data from datasets/artifacts/data_ingestion/data.zip into file datasets/artifacts/data_ingestion]
 -




Unzipping data 1350/1398 - Estimated time remaining: 0m 0s /[2024-09-19 19:43:01,821: INFO: 1682826885: Unzipping completed data from datasets/artifacts/data_ingestion/data.zip into file datasets/artifacts/data_ingestion]
