<a href="https://colab.research.google.com/github/rcharan05/UGP/blob/main/Mahalanobis%2C_PCA_and_Velocity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Mounting drive, installing libraries and loading dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install -q pose-format

Mounted at /content/drive
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.7/97.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os, numpy as np, pandas as pd
from pose_format import Pose
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize

In [None]:
DATA_DIR       = "/content/drive/MyDrive/UGP"
VIDEO_POSE_DIR = f"{DATA_DIR}/CISLR_v1.5-a_videos_poses"
I3D_PKL        = f"{DATA_DIR}/I3D_features.pkl"
PROTO_CSV      = f"{DATA_DIR}/prototype.csv"
TEST_CSV       = f"{DATA_DIR}/test.csv"

In [None]:
proto_df = pd.read_csv(PROTO_CSV); proto_df["gloss"]=proto_df["gloss"].astype(str)
test_df  = pd.read_csv(TEST_CSV);  test_df["gloss"]=test_df["gloss"].astype(str)
y_tr, y_te = proto_df.gloss.tolist(), test_df.gloss.tolist()

i3d_df = pd.read_pickle(I3D_PKL)
i3d_dict = {r["id"]: np.array(r["I3D_features"],dtype=np.float32)
            for _,r in i3d_df.iterrows()}

Function for evaluation defined

In [None]:
def topk_from_S(S, y_tr, y_te, k):
    ranks = np.argsort(-S,axis=1)
    return np.mean([ y_te[i] in [y_tr[j] for j in ranks[i,:k]]
                     for i in range(len(y_te)) ])*100

#Basic function for the model

Improved I3D - Mean, Max and Std-dev pooling of I3D

In [None]:
def improved_i3d_feat(uid):
    arr = i3d_dict[uid].squeeze((0,3,4))  # (1024,S)
    m,M,s = arr.mean(axis=1), arr.max(axis=1), arr.std(axis=1)
    f = np.concatenate([m,M,s])
    f = np.sign(f)*np.sqrt(np.abs(f)+1e-8)
    return f/np.linalg.norm(f)

Velocity based only on face and hands since body doesn't contribute much in Sign Language and varies from signer to signer

In [None]:
def pose_velocity_feat(uid):
    buf    = open(f"{VIDEO_POSE_DIR}/{uid}.pose","rb").read()
    p      = Pose.read(buf)
    coords = p.body.data.squeeze(1)[...,:2]  # (T,576,2)
    if coords.shape[0]<2:
        feat = np.zeros(576*2)
    else:
        # face+hands only
        face = coords[:,33:33+468]
        lh   = coords[:,501:501+21]
        rh   = coords[:,522:522+21]
        pts  = np.concatenate([face,lh,rh],axis=1)  # (T,510,2)
        diffs= np.linalg.norm(pts[1:]-pts[:-1],axis=2)
        feat = np.concatenate([diffs.mean(0), diffs.max(0)])
    feat = np.sign(feat)*np.sqrt(np.abs(feat)+1e-8)
    return feat/np.linalg.norm(feat)


Building the improved I3D and velocity features

In [None]:
X_imp = np.stack([improved_i3d_feat(u) for u in proto_df.uid])
X_imp_te = np.stack([improved_i3d_feat(u) for u in test_df.uid])
X_vel = np.stack([pose_velocity_feat(u) for u in proto_df.uid])
X_vel_te = np.stack([pose_velocity_feat(u) for u in test_df.uid])

Mahalanobis scaling on the improved-I3D

In [None]:
# Mahalanobis‑scale
var = X_imp.var(0)+1e-6
W   = 1/np.sqrt(var)
X_ms   = X_imp * W[None,:]
X_ms_te= X_imp_te * W[None,:]

#Check for the best PCA dimensions on improved I3D and check for best accuracy

In [None]:
# ================================
# 3. SCAN PCA‑WHITEN DIMS
# ================================
dims = [128,256,512,768,1024]
best = (0,0)  # (score,dim)
S_ms = normalize(X_ms,axis=1).dot(normalize(X_ms_te,axis=1).T)  # for fusion later

print("=== PCA‑Whiten scan ===")
for d in dims:
    pca = PCA(whiten=True,n_components=d).fit(X_imp)
    X_pw   = pca.transform(X_imp)
    X_pw_te= pca.transform(X_imp_te)
    S_pw = normalize(X_pw_te,axis=1).dot(normalize(X_pw,axis=1).T)
    acc1 = topk_from_S(S_pw, y_tr, y_te,1)
    acc5 = topk_from_S(S_pw, y_tr, y_te,5)
    acc10= topk_from_S(S_pw, y_tr, y_te,10)
    print(f"dim={d}: Top-1={acc1:.2f} Top-5={acc5:.2f} Top-10={acc10:.2f}")
    if acc1>best[0]:
        best=(acc1,d)
print(f">>> Best PCA dim={best[1]} with Top-1={best[0]:.2f}%\n")

# Recompute best PCA sim matrix
pca = PCA(whiten=True,n_components=best[1]).fit(X_imp)
X_pw   = pca.transform(X_imp);    X_pw_te = pca.transform(X_imp_te)
S_pw = normalize(X_pw_te,axis=1).dot(normalize(X_pw,axis=1).T)


=== PCA‑Whiten scan ===
dim=128: Top-1=18.60 Top-5=23.24 Top-10=26.26
dim=256: Top-1=18.60 Top-5=23.76 Top-10=26.74
dim=512: Top-1=18.86 Top-5=24.42 Top-10=27.57
dim=768: Top-1=18.99 Top-5=24.64 Top-10=27.66
dim=1024: Top-1=18.95 Top-5=24.51 Top-10=27.35
>>> Best PCA dim=768 with Top-1=18.99%



#New model -> PCA + Mahalanobis + Velocity

Normalize all the datasets

In [None]:
# ================================
# 4. VELOCITY SIM MATRIX
# ================================
S_vel = normalize(X_vel_te,axis=1).dot(normalize(X_vel,axis=1).T)


In [None]:
S_pw  = normalize(X_pw_te,axis=1).dot(normalize(X_pw,axis=1).T)    # (N_test, N_train)
S_ms  = normalize(X_ms_te,axis=1).dot(normalize(X_ms,axis=1).T)    # (N_test, N_train)
S_vel = normalize(X_vel_te,axis=1).dot(normalize(X_vel,axis=1).T)  # (N_test, N_train)

Scan for the best hyper-parameters --> We get that velocity doesn't contribute positively

In [None]:
import numpy as np

def topk_from_S(S, y_tr, y_te, k):
    ranks = np.argsort(-S, axis=1)
    return np.mean([
        y_te[i] in { y_tr[j] for j in ranks[i,:k] }
        for i in range(len(y_te))
    ]) * 100

# grid over α, β, γ=1-α-β
best = (0.0, (0,0,0))
print("α   β   γ    Top-1   Top-5   Top-10")
for α in np.linspace(0,1,11):
    for β in np.linspace(0,1-α,11):
        γ = 1 - α - β
        # fuse
        S_fuse = α*S_pw + β*S_ms + γ*S_vel
        acc1  = topk_from_S(S_fuse, y_tr, y_te, 1)
        acc5  = topk_from_S(S_fuse, y_tr, y_te, 5)
        acc10 = topk_from_S(S_fuse, y_tr, y_te,10)
        print(f"{α:.2f} {β:.2f} {γ:.2f}   {acc1:5.2f}%   {acc5:5.2f}%   {acc10:5.2f}%")
        if acc1 > best[0]:
            best = (acc1, (α,β,γ))

print("\nBest Top-1 = {:.2f}% with α,β,γ = {:.2f}, {:.2f}, {:.2f}".format(
    best[0], *best[1]
))

α   β   γ    Top-1   Top-5   Top-10
0.00 0.00 1.00    4.20%    5.95%    7.18%
0.00 0.10 0.90   12.39%   14.88%   16.81%
0.00 0.20 0.80   13.39%   16.28%   18.47%
0.00 0.30 0.70   13.96%   16.81%   19.12%
0.00 0.40 0.60   14.18%   17.59%   19.47%
0.00 0.50 0.50   14.44%   17.86%   19.78%
0.00 0.60 0.40   14.62%   18.16%   20.22%
0.00 0.70 0.30   14.97%   18.38%   20.39%
0.00 0.80 0.20   15.14%   19.12%   21.01%
0.00 0.90 0.10   16.89%   20.61%   22.58%
0.00 1.00 0.00   17.37%   21.53%   23.37%
0.10 0.00 0.90   15.40%   19.61%   21.88%
0.10 0.09 0.81   15.75%   19.96%   22.23%
0.10 0.18 0.72   15.67%   19.96%   22.32%
0.10 0.27 0.63   15.97%   20.00%   22.19%
0.10 0.36 0.54   16.06%   20.13%   22.28%
0.10 0.45 0.45   16.06%   20.04%   22.41%
0.10 0.54 0.36   16.46%   20.57%   22.93%
0.10 0.63 0.27   16.98%   21.36%   23.85%
0.10 0.72 0.18   17.99%   21.97%   24.29%
0.10 0.81 0.09   18.42%   22.54%   24.90%
0.10 0.90 0.00   18.73%   23.37%   25.25%
0.20 0.00 0.80   15.75%   20.61%   22.80

Our best model for top-5 and top-10 accuracy

In [None]:
# now best values
α, β, γ = 0.6, 0.4, 0.0
S_fuse = α*S_pw + β*S_ms + γ*S_vel

print("=== Fused Scores for best accuracy (α,β,γ)=(",α,β,γ,") ===")
for k in (1,5,10):
    print(f"Top-{k}: {topk_from_S(S_fuse, y_tr, y_te, k):.2f}%")

=== Fused Scores for best accuracy (α,β,γ)=( 0.6 0.4 0.0 ) ===
Top-1: 19.21%
Top-5: 25.16%
Top-10: 28.18%
