In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%%shell
# Both, with custom grid and parallelism
python drive/MyDrive/mad/tests/occlusion_dataset_builder.py \
  --mode both \
  --input_path drive/MyDrive/mad/datasets/frll-morphs/ \
  --output_path output \
  --grid_rows_count 6 \
  --grid_columns_count 5 \
  --landmark_scale_factor 1.5 \
  --landmark_precision bbox \
  --multiprocessing --workers 12


Writing thousands of files directly to Drive in Colab can:
  - Be extremely slow due to network-backed file system
  - Cause incomplete writes or missing files even after waiting
  - Be unreliable with multiprocessing

✅ Recommended workflow for Colab + Drive:
  1. Save all files locally in /content (Colab ephemeral storage)
  2. Zip the entire dataset into a single archive
  3. Copy or move the .zip file to Google Drive
  4. Unzip locally when needed, not inside Drive

This ensures faster, more reliable writes and avoids sync issues.

Downloading Dlib 68-point shape predictor...
Extracting predictor...
✅ Model downloaded and extracted successfully!
2025-08-31 13:19:51,322 [INFO] === Processing started ===
2025-08-31 13:20:33,697 [INFO] Found 7265 images under drive/MyDrive/mad/datasets/frll-morphs
2025-08-31 13:20:33,697 [INFO] Using multiprocessing with 12 workers.
2025-08-31 13:22:38,857 [INFO] Processed 50/7265 images...
2025-08-31 13:23:16,059 [INFO] Processed 100/7265 images...




In [3]:
from pathlib import Path
from zipfile import ZipFile, ZIP_DEFLATED
from tqdm import tqdm

# --- Paths ---
src = Path("output")  # folder to zip
zip_local = Path("/content/output.zip")  # temporary zip in Colab local storage
zip_drive = Path("/content/drive/MyDrive/mad/tests/output_occlusion.zip")  # final zip on Drive

# --- 1. Zip with progress bar ---
files = [f for f in src.rglob("*") if f.is_file()]

print("Zipping files...")
with ZipFile(zip_local, "w", compression=ZIP_DEFLATED) as zipf:
    for f in tqdm(files, desc="Zipping", unit="file"):
        rel = f.relative_to(src)
        zipf.write(f, rel)

# --- 2. Copy zip to Drive with progress bar ---
print("Copying zip to Drive...")
buffer_size = 1024 * 1024  # 1 MB chunks
total_size = zip_local.stat().st_size

with open(zip_local, "rb") as fsrc, open(zip_drive, "wb") as fdst:
    with tqdm(total=total_size, unit="B", unit_scale=True, desc="Copying") as pbar:
        while True:
            buf = fsrc.read(buffer_size)
            if not buf:
                break
            fdst.write(buf)
            pbar.update(len(buf))

print(f"\n✅ Zip saved at: {zip_drive}")

Zipping files...


Zipping: 100%|██████████| 247011/247011 [22:02<00:00, 186.74file/s]


Copying zip to Drive...


Copying: 100%|██████████| 13.6G/13.6G [02:22<00:00, 95.1MB/s]

✅ Zip saved at: /content/drive/MyDrive/mad/tests/output.zip





In [4]:
import time
from google.colab import drive, runtime

drive.flush_and_unmount()

runtime.unassign()