# Best‑Photo Selector Pipeline
Automatically pick the strongest frame from a burst using a blend of aesthetic, technical, and face‑quality metrics.

*Built for quick hackathon demos — tweak as needed!*

### How this notebook works
1. **Setup** – install packages (run once in your Colab/local runtime).
2. **Scoring functions** – sharpness, exposure, face centering, smile, and overall aesthetic.
3. **Batch evaluate** any folder of images and return a ranked list.
4. **Preview + export** – display the top N images and optionally copy them to a new directory.

In [1]:
# 👉 Run this cell first (may take ~2 min in Colab)
!pip install -q open-clip-torch==2.20.0 mediapipe opencv-python pillow tqdm deepface


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.6/108.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m61.3 MB/s[0m eta [36m0:

In [2]:
from google.colab import drive
drive.mount('/content/drive')

#rank_folder('/content/drive/MyDrive/bursts', top_k=5)


Mounted at /content/drive


In [3]:
import os, cv2, math, torch
import numpy as np
from PIL import Image
from tqdm import tqdm
import mediapipe as mp
import open_clip

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"




In [4]:
### ---------- Technical quality scorers ----------
def sharpness_score(img):
    """Variance of Laplacian (normalized 0‑1)."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    score = cv2.Laplacian(gray, cv2.CV_64F).var()
    return score

def exposure_score(img):
    """Penalize over/under‑exposure using histogram clipping."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([gray],[0],None,[256],[0,256]).flatten()
    total = hist.sum()
    low_clip = hist[:5].sum()/total
    high_clip = hist[-5:].sum()/total
    return 1 - (low_clip + high_clip)  # closer to 1 is better


In [5]:
### ---------- Aesthetic score using CLIP ----------
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
        'ViT-B-32', pretrained='laion2b_s34b_b79k', device=device)
clip_model.eval()

with torch.no_grad():
    text_tokens = open_clip.tokenize(["a beautiful photo"]).to(device)
    text_embed = clip_model.encode_text(text_tokens).float()

def aesthetic_score(pil_img):
    img = clip_preprocess(pil_img).unsqueeze(0).to(device)
    with torch.no_grad():
        img_embed = clip_model.encode_image(img).float()
    score = torch.cosine_similarity(img_embed, text_embed).item()
    return score


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


open_clip_pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

In [6]:
# ↳ run in a notebook code cell
!git clone --depth 1 https://github.com/akanametov/yolo-face
!pip install -q ultralytics    # YOLO framework (pulls torch, etc.)


Cloning into 'yolo-face'...
remote: Enumerating objects: 331, done.[K
remote: Counting objects: 100% (331/331), done.[K
remote: Compressing objects: 100% (281/281), done.[K
remote: Total 331 (delta 48), reused 227 (delta 43), pack-reused 0 (from 0)[K
Receiving objects: 100% (331/331), 35.83 MiB | 27.26 MiB/s, done.
Resolving deltas: 100% (48/48), done.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [14]:
!wget -q https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov10n-face.pt
from ultralytics import YOLO
face_detector = YOLO("yolov10n-face.pt")   # downloads weights once

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [15]:
import numpy as np, math, cv2
from PIL import Image

def face_metrics(pil_img):
    """
    Returns (face_found, centering(0-1), smile_prob(0-1))
    """
    img = np.array(pil_img)
    h, w = img.shape[:2]

    # 1. face bbox
    res = face_detector(img, imgsz=640, conf=0.25)[0]
    if not len(res.boxes):
        return 0, 0, 0          # no face detected

    # take the biggest box
    x1,y1,x2,y2 = res.boxes.xyxy.cpu().numpy().astype(int)[0]
    cx, cy = (x1+x2)/2, (y1+y2)/2
    centering = 1 - math.hypot(cx-w/2, cy-h/2)/math.hypot(w/2, h/2)

    # 2. smile proxy – mouth aspect ratio from landmarks if available
    # fall back to face orientation (slight smile raises cheeks)
    smile = float(res.boxes.conf[0])      # confidence ≈ “face quality”
    return 1, np.clip(centering,0,1), np.clip(smile,0,1)


In [8]:
def norm(val, lo, hi):
    return np.clip((val-lo)/(hi-lo), 0, 1)


In [9]:
def combined_score(pil_img,
                   w_center=0.20, w_smile=0.15,
                   w_sharp=0.25, w_exp=0.15, w_aes=0.25):
    img_cv = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

    sharp_raw = sharpness_score(img_cv)
    sharp = norm(sharp_raw, 80, 600)

    expo  = exposure_score(img_cv)        # already 0-1
    aes   = norm(aesthetic_score(pil_img), .20, .35)

    face_found, centering, smile = face_metrics(pil_img)
    if not face_found:
        centering, smile = 0, 0           # still penalise

    total = (w_center*centering + w_smile*smile +
             w_sharp*sharp    + w_exp*expo + w_aes*aes)

    return total, dict(sharp=sharp, expo=expo, aes=aes,
                       center=centering, smile=smile)


In [16]:
### ---------- Evaluate a folder ----------
from pathlib import Path
import shutil

def rank_folder(folder, top_k=3, out_dir=None):
    paths = list(Path(folder).glob('*.[jp][pn]g'))  # jpg & png
    results = []
    for p in tqdm(paths, desc='Scoring'):
        img = Image.open(p).convert('RGB')
        score, parts = combined_score(img)
        results.append((score, p, parts))
    results.sort(reverse=True, key=lambda x: x[0])
    print(results)
    if out_dir:
        Path(out_dir).mkdir(parents=True, exist_ok=True)
        for _, p, _ in results[:top_k]:
            shutil.copy(p, Path(out_dir)/p.name)
    return results[:top_k], results

# Example:
top, all_scores = rank_folder('/content/drive/My Drive/bursts', top_k=5, out_dir='/content/drive/My Drive/burstoutput')


Scoring:   0%|          | 0/8 [00:00<?, ?it/s]


0: 640x480 1 face, 39.9ms
Speed: 14.6ms preprocess, 39.9ms inference, 41.9ms postprocess per image at shape (1, 3, 640, 480)


Scoring:  12%|█▎        | 1/8 [00:00<00:05,  1.17it/s]


0: 640x480 1 face, 12.4ms
Speed: 3.5ms preprocess, 12.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


Scoring:  25%|██▌       | 2/8 [00:01<00:03,  1.64it/s]


0: 640x480 1 face, 8.5ms
Speed: 3.5ms preprocess, 8.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


Scoring:  38%|███▊      | 3/8 [00:01<00:02,  2.01it/s]


0: 640x480 1 face, 8.9ms
Speed: 3.6ms preprocess, 8.9ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


Scoring:  50%|█████     | 4/8 [00:02<00:01,  2.07it/s]


0: 640x480 1 face, 12.0ms
Speed: 4.6ms preprocess, 12.0ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 480)


Scoring:  62%|██████▎   | 5/8 [00:02<00:01,  2.20it/s]


0: 640x480 1 face, 8.6ms
Speed: 3.6ms preprocess, 8.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


Scoring:  75%|███████▌  | 6/8 [00:02<00:00,  2.30it/s]


0: 640x480 1 face, 9.5ms
Speed: 3.7ms preprocess, 9.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


Scoring:  88%|████████▊ | 7/8 [00:03<00:00,  2.38it/s]


0: 640x480 1 face, 14.1ms
Speed: 4.1ms preprocess, 14.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 480)


Scoring: 100%|██████████| 8/8 [00:03<00:00,  2.18it/s]

[(np.float64(0.6956167736529094), PosixPath('/content/drive/My Drive/bursts/IMG_1.jpg'), {'sharp': np.float64(0.8139828824678464), 'expo': np.float32(0.8625286), 'aes': np.float64(0.23490152756373087), 'center': np.float64(0.8612610364749685), 'smile': np.float64(0.8784277439117432)}), (np.float64(0.6695943844928872), PosixPath('/content/drive/My Drive/bursts/IMG_3.jpg'), {'sharp': np.float64(0.6733251802790946), 'expo': np.float32(0.87269425), 'aes': np.float64(0.23714296023050943), 'center': np.float64(0.8883028648532112), 'smile': np.float64(0.8894175887107849)}), (np.float64(0.6636774805375835), PosixPath('/content/drive/My Drive/bursts/IMG_5.jpg'), {'sharp': np.float64(0.7672163629774306), 'expo': np.float32(0.8613354), 'aes': np.float64(0.18275392055511472), 'center': np.float64(0.8341800675431328), 'smile': np.float64(0.8676572442054749)}), (np.float64(0.6542383226149172), PosixPath('/content/drive/My Drive/bursts/IMG_2.jpg'), {'sharp': np.float64(0.6235242556855715), 'expo': np




*Last updated: 2025‑05‑31 14:42 UTC*