In [4]:
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
import joblib

from pathlib import Path

MODEL_DIR = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

TASKS = {
    "eyes": ["Narrow_Eyes", "Arched_Eyebrows"],
    "nose": ["Big_Nose", "Pointy_Nose"],
    "lips": ["Big_Lips"],
    "face": ["Oval_Face", "Pale_Skin"],
    "cheeks": ["High_Cheekbones", "Rosy_Cheeks"]
}


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np

def evaluate_binary_task(name, show_pca=True):
    X = np.load(f"{name}_embeddings.npy")
    y = np.load(f"{name}_labels.npy")

    if len(np.unique(y)) < 2:
        print("Only one class present â€” skipping.")
        return None

    print(f"\n{name}")
    print("X shape:", X.shape)
    print("Labels:", np.unique(y, return_counts=True))

    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=0.1,
        random_state=42,
        stratify=y
    )

    clf = LogisticRegression(max_iter=3000)
    clf.fit(X_train, y_train)

    joblib.dump(
    clf,
    MODEL_DIR / f"{name}_classifier.joblib"

    
)

    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("Accuracy:", acc)
    print("Confusion matrix:")
    print(cm)

    if show_pca:
        pca = PCA(n_components=2)
        X_pca = pca.fit_transform(X)

        plt.figure(figsize=(6,6))
        for label in [0, 1]:
            idx = y == label
            plt.scatter(
                X_pca[idx, 0],
                X_pca[idx, 1],
                label="Positive" if label == 1 else "Negative",
                alpha=0.5
            )

        plt.legend()
        plt.title(f"{name} Embeddings (PCA)")
        plt.xlabel("PC1")
        plt.ylabel("PC2")
        plt.show()

    return acc


In [8]:
results = {}

for region, attrs in TASKS.items():
    for attr in attrs:
        task_name = f"{region}_{attr}"
        try:
            acc = evaluate_binary_task(task_name, show_pca=False)
            results[task_name] = acc
        except Exception as e:
            print(f"Skipping {task_name}: {e}")



eyes_Narrow_Eyes
X shape: (988, 512)
Labels: (array([0, 1]), array([500, 488]))
Accuracy: 1.0
Confusion matrix:
[[50  0]
 [ 0 49]]

eyes_Arched_Eyebrows
X shape: (988, 512)
Labels: (array([0, 1]), array([488, 500]))
Accuracy: 0.98989898989899
Confusion matrix:
[[49  0]
 [ 1 49]]

nose_Big_Nose
X shape: (21951, 512)
Labels: (array([0, 1]), array([11499, 10452]))
Accuracy: 0.7691256830601093
Confusion matrix:
[[928 222]
 [285 761]]

nose_Pointy_Nose
X shape: (21951, 512)
Labels: (array([0, 1]), array([10452, 11499]))
Accuracy: 0.76183970856102
Confusion matrix:
[[754 292]
 [231 919]]
Skipping lips_Big_Lips: [Errno 2] No such file or directory: 'lips_Big_Lips_embeddings.npy'

face_Oval_Face
X shape: (1447, 512)
Labels: (array([0, 1]), array([949, 498]))
Accuracy: 0.6758620689655173
Confusion matrix:
[[73 22]
 [25 25]]

face_Pale_Skin
X shape: (1447, 512)
Labels: (array([0, 1]), array([955, 492]))
Accuracy: 0.7793103448275862
Confusion matrix:
[[80 16]
 [16 33]]

cheeks_High_Cheekbones
X 