In [1]:
import os
%pwd

'c:\\Users\\lenovo\\Desktop\\Stage\\text-to-3D_Model_Generation\\research'

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\lenovo\\Desktop\\Stage\\text-to-3D_Model_Generation'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_url: str
    number_of_samples_to_download: int
    local_data_file: Path

In [5]:
from textTo3DModelGen.constants import *
from textTo3DModelGen.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
            self, 
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = HYPER_PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([
            self.config.artifacts_root
        ])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([
            config.root_dir
        ])

        data_ingestion_config = DataIngestionConfig(
            root_dir= config.root_dir,
            source_url= config.source_url,
            number_of_samples_to_download= config.number_of_samples_to_download, 
            local_data_file= Path(config.local_data_file)
        )

        return data_ingestion_config


In [7]:
from textTo3DModelGen import logger
from textTo3DModelGen.utils.common import get_size, load_from_url, save_csv, create_directories, load_from_objaverse
import multiprocessing
import pandas as pd
import objaverse

In [8]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_data(self):
        try:
            root_dir = self.config.root_dir
            create_directories([root_dir])
            source_url = self.config.source_url
            num_of_samples = self.config.number_of_samples_to_download
            local_data_file = self.config.local_data_file
            
            descriptions = load_from_url(url= source_url, num_of_samples= num_of_samples)
            logger.info(f"Downloaded description from {source_url} with lenght {len(descriptions)}.")

            processes = multiprocessing.cpu_count()
            objects = load_from_objaverse(uids = descriptions['uids'], processes = processes)
            logger.info(f"Downloaded {len(objects)} objects from Objaverse Dataset.")

            paths = objaverse._load_object_paths()
            saved_path = ['/root/.objaverse/hf-objaverse-v1/' + str(paths[uid]) for uid in descriptions['uids']]
            descriptions['saved_path'] = saved_path
            save_csv(local_data_file, descriptions)
            logger.info(f"saved objects with description data into {local_data_file} with size {get_size(local_data_file)}.")
        except Exception as e:
            raise e

In [9]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config= data_ingestion_config)
    data_ingestion.download_data()
except Exception as e:
    raise e

[2024-08-19 01:42:38,859: INFO: yaml file: config\config.yaml loaded successfully]
[2024-08-19 01:42:38,862: INFO: yaml file: hyper_params.yaml loaded successfully]
[2024-08-19 01:42:38,864: INFO: created directory at: artifacts]
[2024-08-19 01:42:38,867: INFO: created directory at: artifacts/data_ingestion]
[2024-08-19 01:42:38,869: INFO: created directory at: artifacts/data_ingestion]
[2024-08-19 01:43:00,458: INFO: Downloaded description from https://huggingface.co/datasets/tiange/Cap3D/resolve/main/Cap3D_automated_Objaverse_full.csv with lenght 2.]
[2024-08-19 01:43:02,214: INFO: Downloaded 0 objects from Objaverse Dataset.]
[2024-08-19 01:43:03,692: INFO: saved objects with description data into artifacts\data_ingestion\objaverse_with_description.csv with size ~ 0 MB.]
