In [1]:
!pip install kaggle



In [2]:
import os
import json
from pathlib import Path
from tqdm.notebook import tqdm
from google.colab import userdata

In [3]:
# Setup Kaggle API credentials
os.makedirs("/root/.kaggle", exist_ok=True)
kaggle_token = {
    "username": userdata.get('username'),
    "key": userdata.get('kaggle-api-key')
}

with open("/root/.kaggle/kaggle.json", "w") as f:
    json.dump(kaggle_token, f)
!chmod 600 /root/.kaggle/kaggle.json

In [4]:
from kaggle.api.kaggle_api_extended import KaggleApi

In [5]:
# Download dataset from Kaggle
api = KaggleApi()
api.authenticate()

dataset_dir = Path("./data/CATI-FAS_dataset")
if not dataset_dir.exists():
    dataset_dir.mkdir(parents=True)
    api.dataset_download_files(
        "n24q02m/cati-fas-face-anti-spoofing-dataset", path=str(dataset_dir), unzip=True
    )
    print("Downloading dataset...")
else:
    print("Dataset already exists.")

Dataset URL: https://www.kaggle.com/datasets/n24q02m/cati-fas-face-anti-spoofing-dataset
Downloading dataset...


In [6]:
# Rename files in live and spoof folders
def rename_files(directory):
    files = list(Path(directory).rglob("*.*"))
    for idx, file in enumerate(tqdm(files, desc=f"Renaming files in {directory}")):
        new_name = f"{idx+1:06d}{file.suffix}"
        new_path = file.parent / new_name
        file.rename(new_path)


rename_files(dataset_dir / "live")
rename_files(dataset_dir / "spoof")

Renaming files in data/CATI-FAS_dataset/live:   0%|          | 0/2013 [00:00<?, ?it/s]

Renaming files in data/CATI-FAS_dataset/spoof:   0%|          | 0/16414 [00:00<?, ?it/s]

In [7]:
# Upload updated dataset to Kaggle as a new version
metadata = {
    "title": "CATI-FAS - Face Anti-Spoofing Dataset",
    "id": "n24q02m/cati-fas-face-anti-spoofing-dataset",
    "licenses": [{"name": "CC0-1.0"}],
}
metadata_path = dataset_dir / "dataset-metadata.json"
with open(metadata_path, "w") as f:
    json.dump(metadata, f, indent=4)

api = KaggleApi()
api.authenticate()
print("Updating dataset version on Kaggle...")
api.dataset_create_version(
    folder=str(dataset_dir),
    version_notes="Renamed files in live and spoof folders",
    dir_mode="zip",
    quiet=False,
)
print("Dataset version created successfully on Kaggle!")

Updating dataset version on Kaggle...
Starting upload for file spoof.zip


100%|██████████| 28.4G/28.4G [04:23<00:00, 116MB/s]


Upload successful: spoof.zip (28GB)
Starting upload for file live.zip


100%|██████████| 2.71G/2.71G [00:25<00:00, 115MB/s]


Upload successful: live.zip (3GB)
Dataset version created successfully on Kaggle!
