In [2]:
import os
from huggingface_hub import HfApi, login
from dotenv import load_dotenv

load_dotenv() # loads HF_TOKEN; token should have write permissions

# Enable hf_transfer
os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = "1"

DATA_PATH = 'data'

In [3]:
import zipfile
from tqdm import tqdm
from pathlib import Path
import os

def zip_image_folders(root='data', overwrite=False, image_exts=('.jpg', '.jpeg', '.png', '.webp')):
    root = Path(root)
    for folder in root.iterdir():
        if folder.is_dir() and not folder.name.startswith('.'):  # Skip hidden folders

            # Recursively find all image files
            files = [f for f in folder.rglob('*') if f.is_file() and f.suffix.lower() in image_exts]
            if not files:
                continue  # Skip empty or non-image folders

            zip_path = folder.with_suffix('.zip')
            if not overwrite and zip_path.exists():
                print(f"Already exists: {zip_path.name}")
                continue

            print(f"\nZipping {folder.name} → {zip_path.name}")
            with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
                for file in tqdm(files, desc=f"Zipping {folder.name}"):
                    arcname = file.relative_to(folder)  # relative to inner folder, no top folder
                    zipf.write(file, arcname=arcname)
        else:
            print(f"Skipping hidden or invalid folder: {folder.name}")

# Usage
zip_image_folders(
    DATA_PATH,
    overwrite=False
)

Already exists: images_WB_OZ_100.zip
Skipping hidden or invalid folder: WB_OZ_100.csv
Skipping hidden or invalid folder: WB_OZ_100.xlsx
Skipping hidden or invalid folder: new_labeled_v4.csv
Skipping hidden or invalid folder: .cache
Skipping hidden or invalid folder: new_labeled.csv
Skipping hidden or invalid folder: images_WB_OZ_100.zip
Already exists: images_7k.zip
Skipping hidden or invalid folder: images_labeled.zip
Already exists: images_labeled.zip
Skipping hidden or invalid folder: WB_OZ_100_conjugated.csv
Skipping hidden or invalid folder: WB_OZ_100_conjugated_product_names.csv
Skipping hidden or invalid folder: images_7k.zip


In [4]:
api = HfApi()
api.upload_folder(
    folder_path=DATA_PATH,  # Path to the local directory
    repo_id="INDEEPA/clip-siamese",
    repo_type="dataset",
    ignore_patterns=['**/*.jpg', "**/*.webp"]
)

CommitInfo(commit_url='https://huggingface.co/datasets/INDEEPA/clip-siamese/commit/a6764ca78da7259dacf41423c6ed29a58678f758', commit_message='Upload folder using huggingface_hub', commit_description='', oid='a6764ca78da7259dacf41423c6ed29a58678f758', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/INDEEPA/clip-siamese', endpoint='https://huggingface.co', repo_type='dataset', repo_id='INDEEPA/clip-siamese'), pr_revision=None, pr_num=None)