In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
import joblib

from pathlib import Path

MODEL_DIR = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

TASKS = {
    "eyes": ["Narrow_Eyes", "Arched_Eyebrows"],
    "nose": ["Big_Nose", "Pointy_Nose"],
    "lips": ["Big_Lips"],
    "face": ["Oval_Face", "Pale_Skin"],
    "cheeks": ["High_Cheekbones", "Rosy_Cheeks"]
}


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np

def evaluate_binary_task(name, show_pca=True):
    X = np.load(f"{name}_embeddings.npy")
    y = np.load(f"{name}_labels.npy")

    if len(np.unique(y)) < 2:
        print("Only one class present â€” skipping.")
        return None

    print(f"\n{name}")
    print("X shape:", X.shape)
    print("Labels:", np.unique(y, return_counts=True))

    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=0.1,
        random_state=42,
        stratify=y
    )

    clf = LogisticRegression(max_iter=3000)
    clf.fit(X_train, y_train)

    joblib.dump(
    clf,
    MODEL_DIR / f"{name}_classifier.joblib"

    
)

    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("Accuracy:", acc)
    print("Confusion matrix:")
    print(cm)

    if show_pca:
        pca = PCA(n_components=2)
        X_pca = pca.fit_transform(X)

        plt.figure(figsize=(6,6))
        for label in [0, 1]:
            idx = y == label
            plt.scatter(
                X_pca[idx, 0],
                X_pca[idx, 1],
                label="Positive" if label == 1 else "Negative",
                alpha=0.5
            )

        plt.legend()
        plt.title(f"{name} Embeddings (PCA)")
        plt.xlabel("PC1")
        plt.ylabel("PC2")
        plt.show()

    return acc


In [8]:
results = {}

for region, attrs in TASKS.items():
    for attr in attrs:
        task_name = f"{region}_{attr}"
        try:
            acc = evaluate_binary_task(task_name, show_pca=False)
            results[task_name] = acc
        except Exception as e:
            print(f"Skipping {task_name}: {e}")



eyes_Narrow_Eyes
X shape: (988, 512)
Labels: (array([0, 1]), array([500, 488]))
Accuracy: 1.0
Confusion matrix:
[[50  0]
 [ 0 49]]

eyes_Arched_Eyebrows
X shape: (988, 512)
Labels: (array([0, 1]), array([488, 500]))
Accuracy: 0.98989898989899
Confusion matrix:
[[49  0]
 [ 1 49]]

nose_Big_Nose
X shape: (21951, 512)
Labels: (array([0, 1]), array([11499, 10452]))
Accuracy: 0.7691256830601093
Confusion matrix:
[[928 222]
 [285 761]]

nose_Pointy_Nose
X shape: (21951, 512)
Labels: (array([0, 1]), array([10452, 11499]))
Accuracy: 0.76183970856102
Confusion matrix:
[[754 292]
 [231 919]]
Skipping lips_Big_Lips: [Errno 2] No such file or directory: 'lips_Big_Lips_embeddings.npy'

face_Oval_Face
X shape: (1447, 512)
Labels: (array([0, 1]), array([949, 498]))
Accuracy: 0.6758620689655173
Confusion matrix:
[[73 22]
 [25 25]]

face_Pale_Skin
X shape: (1447, 512)
Labels: (array([0, 1]), array([955, 492]))
Accuracy: 0.7793103448275862
Confusion matrix:
[[80 16]
 [16 33]]

cheeks_High_Cheekbones
X 

In [3]:
# Cosine similarity
from sklearn.metrics.pairwise import cosine_similarity

def evaluate_cosine_similarity(embeddings_dir="embeddings"):
    results = []

    for fname in os.listdir(embeddings_dir):
        if not fname.endswith("_embeddings.npy"):
            continue

        prefix = fname.replace("_embeddings.npy", "")
        emb_path = os.path.join(embeddings_dir, fname)
        label_path = os.path.join(embeddings_dir, prefix + "_labels.npy")

        if not os.path.exists(label_path):
            print(f"Missing labels for {prefix}, skipping")
            continue

        # Load data
        X = np.load(emb_path)
        y = np.load(label_path)

        # Safety checks
        if len(np.unique(y)) < 2:
            print(f"Skipping {prefix} (single class)")
            continue

        X_pos = X[y == 1]
        X_neg = X[y == 0]

        # Skip if too few samples
        if len(X_pos) < 2 or len(X_neg) < 2:
            print(f"Skipping {prefix} (not enough samples)")
            continue

        # Cosine similarities
        sim_pos = cosine_similarity(X_pos)
        sim_neg = cosine_similarity(X_neg)
        sim_cross = cosine_similarity(X_pos, X_neg)

        # Remove self-similarity (diagonal)
        pos_mean = sim_pos[np.triu_indices_from(sim_pos, k=1)].mean()
        neg_mean = sim_neg[np.triu_indices_from(sim_neg, k=1)].mean()
        cross_mean = sim_cross.mean()

        results.append((prefix, pos_mean, neg_mean, cross_mean))

    # Print nicely
    print("\n=== Cosine Similarity Results ===")
    for prefix, pos, neg, cross in results:
        print(f"{prefix}")
        print(f"  Same-label (+/+): {pos:.3f}")
        print(f"  Same-label (-/-): {neg:.3f}")
        print(f"  Cross-label (+/-): {cross:.3f}")
        print()

    return results

results = evaluate_cosine_similarity("embeddings")


=== Cosine Similarity Results ===
face_Pale_Skin
  Same-label (+/+): 0.802
  Same-label (-/-): 0.770
  Cross-label (+/-): 0.770

face_Oval_Face
  Same-label (+/+): 0.804
  Same-label (-/-): 0.764
  Cross-label (+/-): 0.775

cheeks_Rosy_Cheeks
  Same-label (+/+): 0.850
  Same-label (-/-): 0.809
  Cross-label (+/-): 0.822

eyes_Narrow_Eyes
  Same-label (+/+): 0.798
  Same-label (-/-): 0.855
  Cross-label (+/-): 0.795

cheeks_High_Cheekbones
  Same-label (+/+): 0.809
  Same-label (-/-): 0.850
  Cross-label (+/-): 0.822

eyes_Arched_Eyebrows
  Same-label (+/+): 0.855
  Same-label (-/-): 0.798
  Cross-label (+/-): 0.795

nose_Big_Nose
  Same-label (+/+): 0.811
  Same-label (-/-): 0.844
  Cross-label (+/-): 0.818

nose_Pointy_Nose
  Same-label (+/+): 0.844
  Same-label (-/-): 0.811
  Cross-label (+/-): 0.818

