In [2]:
from huggingface_hub import snapshot_download
import os
import time

# --- Configuration ---
REPO_ID = "iisc-aim/UVH-26"
LOCAL_DIR = os.path.join(os.getcwd(), "UVH-26_dataset")
MAX_RETRIES = 100  

def download_with_retry():
    attempt = 0
    while attempt < MAX_RETRIES:
        try:
            print(f"\n[Attempt {attempt+1}/{MAX_RETRIES}] checking for missing files...")
            
            snapshot_download(
                repo_id=REPO_ID,
                repo_type="dataset",
                local_dir=LOCAL_DIR,
                allow_patterns=["UVH-26-Train/*", "UVH-26-Val/*"],
                max_workers=4,            # Keep low for stability
                resume_download=True,     # Resumes partial files
                local_dir_use_symlinks=False  # CRITICAL: Ensures it checks REAL files, not links
            )
            
            print("\nSUCCESS: All files are present!")
            return True

        except Exception as e:
            # If it's a network error, it prints simple text.
            # If it's a "KeyboardInterrupt" (you press Ctrl+C), it stops cleanly.
            if "KeyboardInterrupt" in str(e):
                raise e
            
            print(f"\n[!] Connection dropped. Retrying in 5 seconds...")
            time.sleep(5)
            attempt += 1

    print("\n[X] Failed after maximum retries.")
    return False

# --- Verification Step ---
def verify_counts():
    print("\n--- Verifying File Counts ---")
    train_dir = os.path.join(LOCAL_DIR, "UVH-26-Train", "images")
    val_dir = os.path.join(LOCAL_DIR, "UVH-26-Val", "images")

    def count_imgs(path):
        if not os.path.exists(path): return 0
        # Count fast using a generator
        return sum(1 for _ in os.scandir(path) if _.name.endswith(('.jpg', '.png', '.jpeg')))

    t_count = count_imgs(train_dir)
    v_count = count_imgs(val_dir)
    total = t_count + v_count
    
    print(f"Train Images: {t_count}")
    print(f"Val Images:   {v_count}")
    print(f"Total Found:  {total}")
    
    if total > 26000:
        print("Status: COMPLETE. You are ready to train.")
    else:
        print(f"Status: INCOMPLETE ({26646 - total} missing). Script will auto-retry.")

if __name__ == "__main__":
    if download_with_retry():
        verify_counts()


[Attempt 1/100] checking for missing files...


Fetching ... files: 26650it [8:40:11,  1.17s/it]


SUCCESS: All files are present!

--- Verifying File Counts ---
Train Images: 0
Val Images:   0
Total Found:  0
Status: INCOMPLETE (26646 missing). Script will auto-retry.



