In [1]:
import argparse, numpy as np, pandas as pd, torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import random
from pathlib import Path

In [2]:
# ---------- pick 5 binary attrs (the closer to 0.5, the better for training) ----------

csv_path = Path(r"list_attr_celeba.csv")
df = pd.read_csv(csv_path)

# Convert CelebA labels from {-1, 1} to {0, 1} for binary classification.
attr_cols = [c for c in df.columns if c != "image_id"]
for c in attr_cols:
    if set(df[c].unique()) <= {-1, 1}:
        df[c] = (df[c] == 1).astype(int)

# compute the positive-class ratio, then compute how close it is to a perfectly balanced 50/50 split
stats = (
    df[attr_cols].mean().to_frame("pos_rate")
      .assign(balance=lambda d: (d["pos_rate"] - 0.5).abs())
      .sort_values("balance"))

# Show the 5 most balanced attributes
print(stats.head(5))         

                     pos_rate   balance
Attractive           0.512505  0.012505
Mouth_Slightly_Open  0.483428  0.016572
Smiling              0.482080  0.017920
Wearing_Lipstick     0.472436  0.027564
High_Cheekbones      0.455032  0.044968


In [3]:
# model.py will produce two files: features_256.pt and features_filenames.txt

## ----------step1. fix 5 binary attrs ----------
# export_first_300_features.py (generates features from the first 300 images; just an example to verify the pipeline, can be deleted).

X = torch.load(r"data\features_256.pt")               # e.g., torch.Size([300, 256])
names = Path(r"data\features_filenames.txt").read_text().splitlines()
assert len(names) == X.shape[0], f"names({len(names)}) != X({X.shape[0]})"

# Read CelebA attributes and convert {-1, 1} to {0, 1}
def load_attrs(csv_path: str):
    df = pd.read_csv(csv_path)
    attr_cols = [c for c in df.columns if c != "image_id"]
    for c in attr_cols:
        vals = set(df[c].unique())
        if vals <= {-1, 1}:                      # only convert if it's -1/1
            df[c] = (df[c] == 1).astype(int)     # -> 0/1
    return df, attr_cols

df, attr_cols = load_attrs(r"list_attr_celeba.csv")

# Manually fix the 5 attributes you want to use
PICK_ATTRS = [              
    "Attractive",
    "Mouth_Slightly_Open",
    "Smiling",
    "Wearing_Lipstick",
    "High_Cheekbones",]

# Safety check: columns must exist in the CSV
missing = set(PICK_ATTRS) - set(attr_cols)
assert not missing, f"Unknown attrs: {missing}"

# Align the attribute table to the same order as features using the saved filenames
dfA = df.set_index("image_id").loc[names].reset_index()

# Build the label tensor Y for the 5 tasks (shape: n×5). You can also take one column at a time.
Y = torch.tensor(dfA[PICK_ATTRS].values, dtype=torch.float32)

print("X shape:", X.shape)  # [n, 256]
print("Y shape:", Y.shape)  # [n, 5]
print(dfA[["image_id"] + PICK_ATTRS].head())  # quick sanity check

X shape: torch.Size([300, 256])
Y shape: torch.Size([300, 5])
     image_id  Attractive  Mouth_Slightly_Open  Smiling  Wearing_Lipstick  \
0  000001.jpg           1                    1        1                 1   
1  000002.jpg           0                    1        1                 0   
2  000003.jpg           0                    0        0                 0   
3  000004.jpg           1                    0        0                 1   
4  000005.jpg           1                    0        0                 1   

   High_Cheekbones  
0                1  
1                1  
2                0  
3                0  
4                0  


In [4]:
# ---------- Step 2: create FeatureDataset  ----------
class FeatureDataset(Dataset):
    def __init__(self, X, y): self.X, self.y = X.float(), y.float().view(-1,1)
    def __len__(self): return self.X.size(0)
    def __getitem__(self, i): return self.X[i], self.y[i]

In [5]:
# ---- Step 3: Supervised bootstrap tuning (lr & betas) ----

# Tune hyperparameters using the pre-extracted features X:[n,256] and a binary label y:[n]

# Bootstrap splitting in the same style as the unsupervised script (sampling with replacement; out-of-bag as validation)
def bootstrap_split_indices(n, B=10, seed=90051):
    random.seed(seed)
    splits = []
    for _ in range(B):
        train_idx = [random.randint(0, n - 1) for _ in range(n)]
        val_idx = list(set(range(n)) - set(train_idx))  # Out-of-bag (OOB)
        if len(val_idx) == 0:                           # Fallback for extreme cases
            val_idx = list(range(0, n, max(1, n // 5)))
        splits.append((train_idx, val_idx))
    return splits

# A very lightweight linear / two-layer MLP classifier (same as step 4)
class Classifier(nn.Module):
    def __init__(self, in_dim=256, hidden=None):
        super().__init__()
        if hidden is None:
            self.net = nn.Linear(in_dim, 1)  # Logistic regression
        else:
            self.net = nn.Sequential(
                nn.Linear(in_dim, hidden), nn.ReLU(),
                nn.Linear(hidden, 1)
            )
    def forward(self, x): return self.net(x)

def _train_epochs(model, dl_tr, epochs, optimizer, device):
    loss_fn = nn.BCEWithLogitsLoss(reduction="sum")  # Use 'sum' reduction, same as in the unsupervised script
    model.to(device)
    for _ in range(epochs):
        model.train()
        for xb, yb in dl_tr:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = loss_fn(logits, yb)
            optimizer.zero_grad(); loss.backward(); optimizer.step()

@torch.no_grad()
def _eval_loss(model, dl_va, device):
    loss_fn = nn.BCEWithLogitsLoss(reduction="sum")
    model.eval()
    total = 0.0
    for xb, yb in dl_va:
        xb, yb = xb.to(device), yb.to(device)
        total += loss_fn(model(xb), yb).item()
    return total

def tune_supervised_with_bootstrap(
    X, y, device="cpu", B=3, epochs=8, batch_size=64,
    lr_candidates=(1e-2, 1e-3, 5e-4),
    betas_candidates=((0.9, 0.999), (0.9, 0.99), (0.5, 0.999)),
    hidden=None
):
    """
    Returns: (best_lr, best_betas)
    Note: This is only for hyperparameter tuning. In each bootstrap fold it trains a temporary small 
    classifier and selects hyperparameters based on the mean OOB (out-of-bag) validation loss.
    It does not perform a final train on the full training set or any test-set evaluation.
    """
    n = X.size(0)
    num_workers = 0  # more stable for windows

    # ------- Step 1: Fix betas; grid-search the learning rate (lr) -------
    betas_fixed = (0.9, 0.999)
    best_lr, best_loss = None, float("inf")
    for lr in lr_candidates:
        losses = []
        for train_ids, val_ids in bootstrap_split_indices(n, B):
            ds_tr = FeatureDataset(X[train_ids], y[train_ids])
            ds_va = FeatureDataset(X[val_ids],  y[val_ids])
            dl_tr = DataLoader(ds_tr, batch_size=batch_size, shuffle=True,
                               num_workers=num_workers, pin_memory=(device=="cuda"))
            dl_va = DataLoader(ds_va, batch_size=batch_size, shuffle=False,
                               num_workers=num_workers, pin_memory=(device=="cuda"))
            model = Classifier(in_dim=256, hidden=hidden)
            optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=betas_fixed)
            _train_epochs(model, dl_tr, epochs, optimizer, device)
            val_loss = _eval_loss(model, dl_va, device)
            losses.append(val_loss)

        avg_loss = sum(losses) / len(losses)
        print(f"[Step 1] lr={lr:.1e}, betas={betas_fixed} -> mean OOB val loss={avg_loss:.4f}")
        if avg_loss < best_loss:
            best_lr, best_loss = lr, avg_loss
    print(f"Best lr={best_lr} (mean OOB loss={best_loss:.4f})")

    # ------- Step 2: Fix the best lr; grid-search betas -------
    best_betas, best_loss = None, float("inf")
    for betas in betas_candidates:
        losses = []
        for train_ids, val_ids in bootstrap_split_indices(n, B):
            ds_tr = FeatureDataset(X[train_ids], y[train_ids])
            ds_va = FeatureDataset(X[val_ids],  y[val_ids])
            dl_tr = DataLoader(ds_tr, batch_size=batch_size, shuffle=True,
                               num_workers=num_workers, pin_memory=(device=="cuda"))
            dl_va = DataLoader(ds_va, batch_size=batch_size, shuffle=False,
                               num_workers=num_workers, pin_memory=(device=="cuda"))
            model = Classifier(in_dim=256, hidden=hidden)
            optimizer = torch.optim.Adam(model.parameters(), lr=best_lr, betas=betas)
            _train_epochs(model, dl_tr, epochs, optimizer, device)
            val_loss = _eval_loss(model, dl_va, device)
            losses.append(val_loss)

        avg_loss = sum(losses) / len(losses)
        print(f"[Step 2] betas={betas}, lr={best_lr:.1e} -> mean OOB val loss={avg_loss:.4f}")
        if avg_loss < best_loss:
            best_betas, best_loss = betas, avg_loss

    print(f"\nFinal best params: lr={best_lr}, betas={best_betas}, mean OOB val loss={best_loss:.4f}")
    return best_lr, best_betas

In [6]:
# sample about how to use (using export_first_300_features.py), only to ensure the code runs, can be deleted.
# Assume you already have:
# X: torch.Size([n, 256])  generated by `export_first_300_features.py`
# dfA: an attribute table aligned with `X` (including `"image_id"` and your 5 selected attributes)

import torch
task = "Smiling"  # pick any one attribute
y_task = torch.tensor(dfA[task].values, dtype=torch.float32)

best_lr, best_betas = tune_supervised_with_bootstrap(
    X, y_task,
    device="cpu",      # set `device="cuda"` if a GPU is available
    B=3, epochs=8, batch_size=64,
    lr_candidates=(1e-2, 1e-3, 5e-4),
    betas_candidates=((0.9,0.999),(0.9,0.99),(0.5,0.999)),
    hidden=None        # if you plan to use a two-layer FC as in step 4, set `hidden=128` here (for tuning only)
)
print("Chosen:", best_lr, best_betas)

[Step 1] lr=1.0e-02, betas=(0.9, 0.999) -> mean OOB val loss=75.5357
[Step 1] lr=1.0e-03, betas=(0.9, 0.999) -> mean OOB val loss=75.5162
[Step 1] lr=5.0e-04, betas=(0.9, 0.999) -> mean OOB val loss=75.5593
Best lr=0.001 (mean OOB loss=75.5162)
[Step 2] betas=(0.9, 0.999), lr=1.0e-03 -> mean OOB val loss=75.5155
[Step 2] betas=(0.9, 0.99), lr=1.0e-03 -> mean OOB val loss=75.5262
[Step 2] betas=(0.5, 0.999), lr=1.0e-03 -> mean OOB val loss=75.5363

Final best params: lr=0.001, betas=(0.9, 0.999), mean OOB val loss=75.5155
Chosen: 0.001 (0.9, 0.999)


In [None]:
# ---------- Step 4: 2-layer fully connected model ----------


