In [None]:
!pip install gdown kaggle

In [None]:
import os
import json
import gdown
import zipfile
import shutil
from pathlib import Path
from tqdm.notebook import tqdm
from kaggle.api.kaggle_api_extended import KaggleApi

In [None]:
# Output data directory
OUTPUT_DIR = "./CATI-FAS_dataset"

# Create output directories
for subdir in ["live", "spoof"]:
    os.makedirs(os.path.join(OUTPUT_DIR, subdir), exist_ok=True)

In [None]:
# Download dataset folders from Google Drive
def download_folders():
    """Download dataset folders from Google Drive"""
    gdrive_url = (
        "https://drive.google.com/drive/folders/1nkdy6mvS2MinErRILDAARdeQUNuUWT_O"
    )

    print("Downloading folders using gdown...")
    try:
        output = gdown.download_folder(
            url=gdrive_url, output="temp_download", quiet=False
        )
        if not output:
            print("\nDownload failed. Please download manually from:", gdrive_url)
            return False
        return True
    except Exception as e:
        print(f"Download error: {e}")
        return False


def extract_and_move_files():
    """Extract zip files and move contents to appropriate folders"""

    # Process both live and spoof folders
    for data_type in ["live", "spoof"]:
        source_dir = os.path.join("temp_download", data_type)
        target_dir = os.path.join(OUTPUT_DIR, data_type)

        if not os.path.exists(source_dir):
            print(f"Source directory {source_dir} not found")
            continue

        # Find all zip files
        zip_files = list(Path(source_dir).rglob("*.zip"))

        print(f"\nProcessing {data_type} files...")
        for zip_file in tqdm(zip_files):
            # Extract to temporary folder
            temp_extract = os.path.join("temp_extract", zip_file.stem)
            os.makedirs(temp_extract, exist_ok=True)

            with zipfile.ZipFile(zip_file, "r") as zip_ref:
                zip_ref.extractall(temp_extract)

            # Move image files to target directory
            for img_file in Path(temp_extract).rglob("*.*"):
                if img_file.suffix.lower() in [".jpg", ".jpeg", ".png"]:
                    # Only move if file doesn't exist in target
                    target_path = os.path.join(target_dir, img_file.name)
                    if not os.path.exists(target_path):
                        shutil.move(str(img_file), target_dir)

            # Clean up temporary extraction folder
            shutil.rmtree(temp_extract)


def upload_to_kaggle():
    """Upload dataset to Kaggle"""

    # Create dataset metadata
    metadata = {
        "title": "CATI-FAS - Face Anti-Spoofing Dataset",
        "id": "your_kaggle_username/cati-fas-face-anti-spoofing-dataset",
        "licenses": [{"name": "CC0-1.0"}],
    }

    metadata_path = os.path.join(OUTPUT_DIR, "dataset-metadata.json")
    with open(metadata_path, "w") as f:
        json.dump(metadata, f, indent=4)

    print("\nUploading to Kaggle...")
    try:
        api = KaggleApi()
        api.authenticate()
        api.dataset_create_new(folder=OUTPUT_DIR, dir_mode="zip", quiet=False)
        print("Dataset created successfully on Kaggle!")
        return True
    except Exception as e:
        print(f"Error uploading to Kaggle: {e}")
        return False

In [None]:
# Main execution flow
if download_folders():
    extract_and_move_files()

    # Clean up temporary download folder
    shutil.rmtree("temp_download")

    # Upload to Kaggle
    upload_to_kaggle()
else:
    print("Download failed, please try again.")