# FF++ Pipeline Audit - Step 7 & Step 8 Verification

In [None]:
# 1. Mount Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# 2. Clone repo
!git clone https://github.com/Incharajayaram/Team-Converge.git /content/Team-Converge
%cd /content/Team-Converge/Finetune1
!pip install -q mediapipe pyyaml tqdm

In [None]:
# 3. Download FF++ data from Drive (with large file handling)
import os
import subprocess

FILE_ID = "1zdmdO0_rUyMspOkZtAi3b0oN_NOxYyLm"
ZIP_PATH = "/content/ffpp_data.zip"

# Remove any corrupted previous download
if os.path.exists(ZIP_PATH):
    os.remove(ZIP_PATH)

# Use gdown with confirm flag for large files
!pip install -q --upgrade gdown
!gdown --id {FILE_ID} --output {ZIP_PATH} --fuzzy

# Verify download
size_gb = os.path.getsize(ZIP_PATH) / 1e9
print(f"\nDownloaded: {size_gb:.2f} GB")
if size_gb < 1.0:
    print("WARNING: File seems too small! Download may have failed.")
    print("Try: !cp '/content/drive/MyDrive/YOUR_PATH/ffpp_data.zip' /content/ffpp_data.zip")

In [None]:
# 4. Extract to local disk
!mkdir -p /content/data/raw/ffpp
!unzip -q /content/ffpp_data.zip -d /content/data/raw/ffpp
!ls /content/data/raw/ffpp | head -10
print(f"Total items: {len(os.listdir('/content/data/raw/ffpp'))}")

In [None]:
# 5. Run Audit (300 steps)
!python train.py --config config.yaml \
    --override dataset.ffpp_root=/content/data/raw/ffpp \
    --override caching.cache_dir=/content/cache/faces \
    --audit_steps 300 --audit_every 25 --dump_aug 32

In [None]:
# 6. View Audit Report
import json
with open('artifacts/reports/audit_report.json') as f:
    r = json.load(f)
print(f"Valid batches: {r['valid_batches']}/{r['total_batches']}")
print(f"Video violations: {r['video_violations']}")
print(f"Group violations: {r['group_violations']}")
print(f"Cache hit rate: {r['cache_hit_rate']:.2%}")

In [None]:
# 7. View Augmented Samples
import matplotlib.pyplot as plt
from PIL import Image
aug_dir = 'artifacts/aug_debug'
files = sorted(os.listdir(aug_dir))[:16]
fig, axes = plt.subplots(4, 4, figsize=(14, 14))
for ax, f in zip(axes.flat, files):
    ax.imshow(Image.open(f'{aug_dir}/{f}'))
    ax.set_title(f[:25], fontsize=7)
    ax.axis('off')
plt.tight_layout()
plt.show()