<a href="https://colab.research.google.com/github/rcharan05/UGP/blob/main/Best_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install and import all dependencies, load all the needed data

In [1]:
# 0. Install & imports
!pip install -q pose-format scikit-learn

import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
from google.colab import drive

# 1. Mount Drive & set paths
drive.mount('/content/drive', force_remount=True)
DATA_DIR  = "/content/drive/MyDrive/UGP"
PROTO_CSV = os.path.join(DATA_DIR, "prototype.csv")
TEST_CSV  = os.path.join(DATA_DIR, "test.csv")
I3D_PKL   = os.path.join(DATA_DIR, "I3D_features.pkl")

# 2. Read splits
proto_df = pd.read_csv(PROTO_CSV)
test_df  = pd.read_csv(TEST_CSV)
proto_df["gloss"] = proto_df["gloss"].astype(str)
test_df["gloss"]  = test_df["gloss"].astype(str)

uids_train = proto_df["uid"].tolist()
uids_test  = test_df["uid"].tolist()
y_train    = proto_df["gloss"].tolist()
y_test     = test_df["gloss"].tolist()

# 3. Load I3D features
i3d_df = pd.read_pickle(I3D_PKL)
i3d_raw = {
    row["id"]: np.array(row["I3D_features"], dtype=np.float32).squeeze((0,3,4))
    for _, row in i3d_df.iterrows()
}

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/97.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.7/97.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Max‐Ensemble Top-k Accuracies: {1: np.float64(20.08752735229759), 5: np.float64(25.076586433260395), 10: np.float64(28.315098468271334)}
Best α = 0.40
Weighted‐Ensemble Top-k Accuracies: {1: np.float64(19.956236323851204), 5: np.float64(25.38293216630197), 10: np.float64(28.183807439824943)}


All the pooling functions and evaluation functions defined

In [None]:
# 4. Define pooling, PCA & similarity
def ssr_pool(arr):
    mn, mx, sd = arr.min(1), arr.max(1), arr.std(1)
    feat = np.concatenate([mn, mx, sd], axis=0)
    feat = np.sign(feat) * np.sqrt(np.abs(feat) + 1e-8)
    return normalize(feat[None], axis=1).ravel()

def gem_pool(arr, p=3.0, eps=1e-6):
    feat = (np.mean((arr+eps)**p, axis=1) + eps)**(1.0/p)
    return normalize(feat[None], axis=1).ravel()

def pca_whiten(Xtr, Xte, n_comp):
    pca = PCA(n_components=n_comp, whiten=True, random_state=0)
    T = pca.fit_transform(Xtr)
    Q = pca.transform(Xte)
    return normalize(T, axis=1), normalize(Q, axis=1)

def build_features(pool_type, pca_comp, gem_p=None):
    if pool_type == "ssr":
        Xtr = np.stack([ssr_pool(i3d_raw[u]) for u in uids_train])
        Xte = np.stack([ssr_pool(i3d_raw[u]) for u in uids_test])
    elif pool_type == "gem":
        Xtr = np.stack([gem_pool(i3d_raw[u], p=gem_p) for u in uids_train])
        Xte = np.stack([gem_pool(i3d_raw[u], p=gem_p) for u in uids_test])
    else:
        raise ValueError(pool_type)
    return pca_whiten(Xtr, Xte, n_comp=pca_comp)

def cosine_sim(Xq, Xg):
    return Xq.dot(Xg.T)

def topk_acc(S, y_train, y_test, ks=[1,5,10]):
    ranks = np.argsort(-S, axis=1)
    out = {}
    for k in ks:
        topk = ranks[:, :k]
        correct = [
            y_test[i] in [y_train[j] for j in topk[i]]
            for i in range(len(y_test))
        ]
        out[k] = np.mean(correct) * 100
    return out

Best hyper-parameters are gotten from grid searching

In [None]:
# 5. Compute similarity matrices with best hyper-params
best_ssr_pca = 1024
best_gem_pca = 1024
best_gem_p   = 4.0


The SSR (mean, max, std-dev) and GeM feature sets are built

In [None]:
Xtr_ssr, Xte_ssr = build_features("ssr", best_ssr_pca)
S_ssr = cosine_sim(Xte_ssr, Xtr_ssr)

Xtr_gem, Xte_gem = build_features("gem", best_gem_pca, gem_p=best_gem_p)
S_gem = cosine_sim(Xte_gem, Xtr_gem)


Both the above matrices are compared and element-wise maximum is taken

In [None]:
# 6a. Ensemble via element-wise max
S_max = np.maximum(S_ssr, S_gem)
acc_max = topk_acc(S_max, y_train, y_test)
print("Max‐Ensemble Top-k Accuracies:", acc_max)

Both the matrices are compared and the elements are fused with hyper-parameter alpha which is gotten from grid-searching

In [None]:
# 6b. Ensemble via weighted sum (grid-search α)
alphas = np.linspace(0, 1, 21)
best_alpha, best_top1 = None, -1
best_accs = None

for alpha in alphas:
    S_w = alpha * S_ssr + (1 - alpha) * S_gem
    accs = topk_acc(S_w, y_train, y_test)
    if accs[1] > best_top1:
        best_top1 = accs[1]
        best_alpha = alpha
        best_accs = accs

In [None]:
print(f"Best α = {best_alpha:.2f}")
print("Weighted‐Ensemble Top-k Accuracies:", best_accs)

We try with ZCA instead of PCA-Whitening

In [3]:
# 4. Pooling functions
def ssr_pool(arr):
    mn, mx, sd = arr.min(1), arr.max(1), arr.std(1)
    feat = np.concatenate([mn, mx, sd], axis=0)
    feat = np.sign(feat) * np.sqrt(np.abs(feat) + 1e-8)
    return normalize(feat[None], axis=1).ravel()

def gem_pool(arr, p=3.0, eps=1e-6):
    feat = (np.mean((arr+eps)**p, axis=1) + eps)**(1.0/p)
    return normalize(feat[None], axis=1).ravel()

# 5. ZCA whitening
def zca_whiten(Xtr, Xte, eps=1e-6):
    mu    = Xtr.mean(axis=0)
    Xtr_c = Xtr - mu
    Xte_c = Xte - mu
    sigma = np.cov(Xtr_c, rowvar=False)
    U, S, _ = np.linalg.svd(sigma, full_matrices=False)
    W_zca = U @ np.diag(1.0/np.sqrt(S + eps)) @ U.T
    Xtr_zca = Xtr_c @ W_zca
    Xte_zca = Xte_c @ W_zca
    return normalize(Xtr_zca, axis=1), normalize(Xte_zca, axis=1)

# 6. Feature builder with ZCA
def build_features(pool_type, gem_p=None):
    if pool_type == "ssr":
        Xtr = np.stack([ssr_pool(i3d_raw[u]) for u in uids_train])
        Xte = np.stack([ssr_pool(i3d_raw[u]) for u in uids_test])
    elif pool_type == "gem":
        Xtr = np.stack([gem_pool(i3d_raw[u], p=gem_p) for u in uids_train])
        Xte = np.stack([gem_pool(i3d_raw[u], p=gem_p) for u in uids_test])
    else:
        raise ValueError(f"Unknown pool_type {pool_type}")
    return zca_whiten(Xtr, Xte)

Mounted at /content/drive
Best GeM p = 4.0 → Top-1: 19.52%
Max‐Ensemble Top-k Accuracies: {1: np.float64(19.51859956236324), 5: np.float64(24.770240700218817), 10: np.float64(27.702407002188185)}
Best α = 0.50
Weighted‐Ensemble Top-k Accuracies: {1: np.float64(20.13129102844639), 5: np.float64(25.38293216630197), 10: np.float64(28.577680525164112)}


Build the SSR similarity matrix

In [None]:
# 8. Build SSR similarity
Xtr_ssr, Xte_ssr = build_features("ssr")
S_ssr = cosine_sim(Xte_ssr, Xtr_ssr)

Re-search for the best p for GeM

In [None]:
# 9. Grid‐search best GeM p
GEM_PS = [2.0, 3.0, 4.0]
best_gem_p, best_gem_acc = None, -1
for p in GEM_PS:
    Xtr_gem, Xte_gem = build_features("gem", gem_p=p)
    S_gem_tmp = cosine_sim(Xte_gem, Xtr_gem)
    acc1 = topk_acc(S_gem_tmp, y_train, y_test)[1]
    if acc1 > best_gem_acc:
        best_gem_acc = acc1
        best_gem_p   = p

# 10. Build GeM similarity with best p
Xtr_gem, Xte_gem = build_features("gem", gem_p=best_gem_p)
S_gem = cosine_sim(Xte_gem, Xtr_gem)
print(f"Best GeM p = {best_gem_p:.1f} → Top-1: {best_gem_acc:.2f}%")

The two ensemble matrices are built with similar logic as before

In [None]:
# 11a. Ensemble via elementwise max
S_max = np.maximum(S_ssr, S_gem)
acc_max = topk_acc(S_max, y_train, y_test)
print("Max‐Ensemble Top-k Accuracies:", acc_max)


In [None]:
# 11b. Ensemble via weighted sum (α grid-search)
alphas = np.linspace(0, 1, 21)
best_alpha, best_top1, best_weighted = None, -1, None
for alpha in alphas:
    S_w = alpha * S_ssr + (1 - alpha) * S_gem
    accs = topk_acc(S_w, y_train, y_test)
    if accs[1] > best_top1:
        best_top1    = accs[1]
        best_alpha   = alpha
        best_weighted = accs

Printing the results

In [None]:
print(f"Best α = {best_alpha:.2f}")
print("Weighted‐Ensemble Top-k Accuracies:", best_weighted)

Best GeM p = 4.0 → Top-1: 19.52%

Max‐Ensemble Top-k Accuracies: {1: np.float64(19.51859956236324), 5: np.float64(24.770240700218817), 10: np.float64(27.702407002188185)}

Best α = 0.50
Weighted‐Ensemble Top-k Accuracies: {1: np.float64(20.13129102844639), 5: np.float64(25.38293216630197), 10: np.float64(28.577680525164112)}