In [1]:

import multiprocessing
import os
import tempfile
import time
from multiprocessing import Pool
from typing import Callable, Dict, Optional, Tuple

import fsspec
import pandas as pd
import requests
from loguru import logger
from tqdm import tqdm

from objaverse.utils import get_file_hash
from objaverse.xl.abstract import ObjaverseSource



In [2]:
import objaverse.xl as oxl
annotations = oxl.get_annotations(
    download_dir="~/.objaverse" # default download directory
)
annotations

[32m2024-02-01 19:10:36.885[0m | [1mINFO    [0m | [36mobjaverse.xl.github[0m:[36m_get_annotations[0m:[36m65[0m - [1mDownloading https://huggingface.co/datasets/allenai/objaverse-xl/resolve/main/github/github.parquet to ~/.objaverse/github/github.parquet[0m
[32m2024-02-01 19:10:57.198[0m | [1mINFO    [0m | [36mobjaverse.xl.thingiverse[0m:[36mget_annotations[0m:[36m46[0m - [1mDownloading https://huggingface.co/datasets/allenai/objaverse-xl/resolve/main/thingiverse/thingiverse.parquet to ~/.objaverse/thingiverse/thingiverse.parquet[0m
[32m2024-02-01 19:11:06.360[0m | [1mINFO    [0m | [36mobjaverse.xl.smithsonian[0m:[36mget_annotations[0m:[36m47[0m - [1mDownloading https://huggingface.co/datasets/allenai/objaverse-xl/resolve/main/smithsonian/smithsonian.parquet to ~/.objaverse/smithsonian/smithsonian.parquet[0m
[32m2024-02-01 19:11:06.561[0m | [1mINFO    [0m | [36mobjaverse.xl.sketchfab[0m:[36m_get_annotations[0m:[36m52[0m - [1mDownloading htt

Unnamed: 0,fileIdentifier,source,license,fileType,sha256,metadata
0,https://github.com/GameGC/Testovoe-SPB-Git/blo...,github,,obj,fca3990d6d91e110cb920aa3da1a84e54b4a00cde7ad1e...,{}
1,https://github.com/9-zzz/RitualRex-GGJ2016/blo...,github,,blend,09dc2689b8d0510885e19a7b525ee44709849d2601add8...,{}
2,https://github.com/mattoverby/mesh-data/blob/2...,github,MIT License,obj,f9d17190b54b548c6a3e6feebe7b2951a9b8060c507874...,{}
3,https://github.com/0010cha/data_origami/blob/0...,github,MIT License,ply,f25184898312af8ed5e77cfe675d2251c9893ac5ccc3a1...,{}
4,https://github.com/Xrvitd/Point2Skeleton_withC...,github,MIT License,ply,d721e6b1b97a83d1f6700045efcb478a1db4686b0571f6...,{}
...,...,...,...,...,...,...
9767006,https://sketchfab.com/3d-models/cc2c9ac5149b40...,sketchfab,Creative Commons - Attribution,glb,76cc3e0343be2452fb7a1d8f6eb295f9067e859f353bb8...,{}
9767007,https://sketchfab.com/3d-models/48ad63b00c4448...,sketchfab,Creative Commons - Attribution,glb,15524e631cecaf3537bbbd9756a94ffa78b275ce77e683...,{}
9767008,https://sketchfab.com/3d-models/3f61c303139f4d...,sketchfab,Creative Commons - Attribution,glb,4b4a2c0e5f4bd89b15c9551c7c34c0083774ead2fb6354...,{}
9767009,https://sketchfab.com/3d-models/bc79ba06d75740...,sketchfab,Creative Commons - Attribution,glb,9e71bc509985344446e075bac884cd1a76c24b09f82217...,{}


In [3]:
from typing import Any, Dict, Hashable

def handle_found_object(
    local_path: str,
    file_identifier: str,
    sha256: str,
    metadata: Dict[Hashable, Any]
) -> None:
    print("\n\n\n---HANDLE_FOUND_OBJECT CALLED---\n",
          f"  {local_path=}\n  {file_identifier=}\n  {sha256=}\n  {metadata=}\n\n\n")

In [4]:
def handle_modified_object(
    local_path: str,
    file_identifier: str,
    new_sha256: str,
    old_sha256: str,
    metadata: Dict[Hashable, Any],
) -> None:
    print("\n\n\n---HANDLE_MODIFIED_OBJECT CALLED---\n",
          f"  {local_path=}\n  {file_identifier=}\n  {old_sha256=}\n  {new_sha256}\n  {metadata=}\n\n\n")

In [5]:
def handle_missing_object(
    file_identifier: str,
    sha256: str,
    metadata: Dict[Hashable, Any]
) -> None:
    print("\n\n\n---HANDLE_MISSING_OBJECT CALLED---\n",
          f"  {file_identifier=}\n  {sha256=}\n  {metadata=}\n\n\n")

In [6]:
# oxl.download_objects(
#     # Base parameters:
#     objects = annotations,
#     download_dir = "~/objaverse",
#     processes = 18,  # None => multiprocessing.cpu_count()
# )

[32m2024-02-01 19:16:21.611[0m | [1mINFO    [0m | [36mobjaverse.xl.smithsonian[0m:[36mdownload_objects[0m:[36m312[0m - [1mFound 0 Smithsonian Objects already downloaded[0m
[32m2024-02-01 19:16:21.612[0m | [1mINFO    [0m | [36mobjaverse.xl.smithsonian[0m:[36mdownload_objects[0m:[36m315[0m - [1mDownloading 2407 Smithsonian Objects with 18 processes[0m
Downloading Smithsonian Objects: 100%|██████████| 2407/2407 [02:01<00:00, 19.75it/s]
[32m2024-02-01 19:18:42.570[0m | [1mINFO    [0m | [36mobjaverse.xl.github[0m:[36mdownload_objects[0m:[36m602[0m - [1mProvided 219231 repoIds with 5236361 objects to process.[0m
[32m2024-02-01 19:18:42.634[0m | [1mINFO    [0m | [36mobjaverse.xl.github[0m:[36mdownload_objects[0m:[36m614[0m - [1mFound 219231 repoIds not yet downloaded. Downloading now...[0m
Grouping objects by repository:  66%|██████▋   | 145742/219231 [00:11<00:05, 13178.11it/s]


KeyboardInterrupt: 