<a href="https://colab.research.google.com/github/rcharan05/UGP/blob/main/Basic_checking_of_feature_sets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Mounting drive, installing dependencies and loading the dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Define local paths (update if needed)
DATA_DIR = "/content/drive/MyDrive/UGP"
VIDEO_POSE_DIR = f"{DATA_DIR}/CISLR_v1.5-a_videos_poses"
I3D_PKL = f"{DATA_DIR}/I3D_features.pkl"
PROTO_CSV = f"{DATA_DIR}/prototype.csv"
TEST_CSV = f"{DATA_DIR}/test.csv"

Mounted at /content/drive


In [None]:
# Install required package for pose feature handling
!pip install -q pose-format

# Standard imports
import os, pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize
from pose_format import Pose  # Ensure this package is installed via pip
import concurrent.futures
from joblib import Parallel, delayed


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/97.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m92.2/97.7 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.7/97.7 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Load CSV files and ensure glosses are strings:
proto_df = pd.read_csv(PROTO_CSV)
test_df = pd.read_csv(TEST_CSV)
proto_df["gloss"] = proto_df["gloss"].astype(str)
test_df["gloss"]  = test_df["gloss"].astype(str)
# Extract labels as lists for evaluation:
y_train = proto_df["gloss"].tolist()
y_test  = test_df["gloss"].tolist()

# --------------------------
# Load I3D features and build a lookup dictionary.
# The I3D pickle file should have a DataFrame with a column "id" and "I3D_features"
i3d_df = pd.read_pickle(I3D_PKL)
i3d_dict = {}
for _, row in i3d_df.iterrows():
    vid = row["id"]
    # Convert raw I3D features to a float32 NumPy array (assumed shape: (1,1024,11,1,1))
    arr = np.array(row["I3D_features"], dtype=np.float32)
    # We'll need the raw array later for per-segment processing.
    i3d_dict[vid] = arr

#Feature Extraction Functions

In [None]:
# -- Utility: Signed Square-Root (SSR)
def apply_ssr(feat):
    # Add a small epsilon to avoid taking sqrt of zero.
    return np.sign(feat) * np.sqrt(np.abs(feat) + 1e-8)

# -- Improved I3D Feature Extraction with Mean + Max Pooling
def get_improved_i3d_feature(uid):
    """
    For a given uid, extract its raw I3D feature array (shape: (1,1024,11,1,1)).
    Remove singletons, then pool along the segments axis (axis=2 after squeezing).
    We take both mean and max pooling to capture average and extreme activations.
    Finally, apply SSR and L2-normalize.
    """
    arr = i3d_dict[uid]  # shape: (1,1024,11,1,1)
    arr = np.squeeze(arr, axis=(0,3,4))  # Now shape becomes (1024, 11)
    mean_pool = np.mean(arr, axis=1)      # (1024,)
    max_pool  = np.max(arr, axis=1)        # (1024,)
    feat = np.concatenate([mean_pool, max_pool])  # shape: (2048,)
    feat = apply_ssr(feat)
    # L2 normalize:
    norm = np.linalg.norm(feat)
    return feat / norm if norm > 0 else feat

# -- Improved Pose Feature Extraction with Mean + Max Pooling
def get_improved_pose_feature(uid, segments=11):
    """
    For a given uid, load the .pose file.
    The original pose data is assumed to be of shape (frames, 1, 576, 3).
    After squeezing, we get (frames, 576, 3). We then segment along the temporal axis.
    For each segment, compute mean and max pooling, concatenate, apply SSR and normalize.
    """
    pose_filepath = os.path.join(VIDEO_POSE_DIR, f"{uid}.pose")
    with open(pose_filepath, "rb") as f:
        buf = f.read()
    pose = Pose.read(buf)
    coords = pose.body.data  # (frames, 1, 576, 3)
    coords = np.squeeze(coords, axis=1)  # now (frames, 576, 3)
    T = coords.shape[0]
    if T >= segments:
        seg_size = T // segments
        means, maxes = [], []
        for i in range(segments):
            start = i * seg_size
            end = T if i == segments - 1 else (i + 1) * seg_size
            segment = coords[start:end]
            means.append(np.mean(segment, axis=0).flatten())
            maxes.append(np.max(segment, axis=0).flatten())
        mean_pool = np.mean(np.stack(means), axis=0)
        max_pool  = np.max(np.stack(maxes), axis=0)
    else:
        mean_pool = np.mean(coords, axis=0).flatten()
        max_pool  = np.max(coords, axis=0).flatten()
    feat = np.concatenate([mean_pool, max_pool])
    feat = apply_ssr(feat)
    norm = np.linalg.norm(feat)
    return feat / norm if norm > 0 else feat

# -- Build feature arrays concurrently for a given modality
def build_improved_features(df, modality="i3d", segments=11):
    """
    Uses concurrent processing to extract improved features.
    modality: "i3d" or "pose"
    """
    uids = df["uid"].tolist()
    with concurrent.futures.ThreadPoolExecutor() as executor:
        if modality == "i3d":
            feats = list(executor.map(get_improved_i3d_feature, uids))
        elif modality == "pose":
            feats = list(executor.map(lambda uid: get_improved_pose_feature(uid, segments=segments), uids))
        else:
            raise ValueError("Modality must be either 'i3d' or 'pose'.")
    return np.stack(feats)

In [None]:
print("Extracting improved I3D features...")
X_i3d_train_improved = build_improved_features(proto_df, modality="i3d")
X_i3d_test_improved  = build_improved_features(test_df, modality="i3d")

print("Extracting improved Pose features...")
X_pose_train_improved = build_improved_features(proto_df, modality="pose", segments=11)
X_pose_test_improved  = build_improved_features(test_df, modality="pose", segments=11)

Extracting improved I3D features...
Extracting improved Pose features...


BASELINE COSINE SIMILARITY EVALUATION FUNCTIONS

In [None]:
def compute_cosine_similarity(X_test, X_train):
    """Compute cosine similarity via dot product (assumes features are L2-normalized)."""
    return X_test.dot(X_train.T)

def compute_topk_accuracy(S, y_train, y_test, k_values=[1,5,10]):
    ranks = np.argsort(-S, axis=1)
    results = {}
    for k in k_values:
        topk = ranks[:, :k]
        acc = np.mean([y_test[i] in [y_train[j] for j in topk[i]] for i in range(len(y_test))]) * 100
        results[k] = acc
        print(f"Top-{k} Accuracy: {acc:.2f}%")
    return results

#Checking basic I3D, pose and concatenated feature set accuracies

I3D feature set accuracy

In [None]:
# (A) Using ONLY I3D features:
print("\n--- Evaluation: I3D Only ---")
# Ensure features are normalized (they should already be normalized after our extraction)
X_i3d_train_norm = normalize(X_i3d_train_improved, axis=1)
X_i3d_test_norm  = normalize(X_i3d_test_improved, axis=1)
S_i3d = compute_cosine_similarity(X_i3d_test_norm, X_i3d_train_norm)
print("I3D Only Cosine Similarity:")
acc_i3d = compute_topk_accuracy(S_i3d, y_train, y_test)


--- Evaluation: I3D Only ---
I3D Only Cosine Similarity:
Top-1 Accuracy: 17.02%
Top-5 Accuracy: 20.88%
Top-10 Accuracy: 22.93%


Just pose feature set accuracy

In [None]:
# (B) Using ONLY Pose features:
print("\n--- Evaluation: Pose Only ---")
X_pose_train_norm = normalize(X_pose_train_improved, axis=1)
X_pose_test_norm  = normalize(X_pose_test_improved, axis=1)
S_pose = compute_cosine_similarity(X_pose_test_norm, X_pose_train_norm)
print("Pose Only Cosine Similarity:")
acc_pose = compute_topk_accuracy(S_pose, y_train, y_test)


--- Evaluation: Pose Only ---
Pose Only Cosine Similarity:
Top-1 Accuracy: 0.31%
Top-5 Accuracy: 0.39%
Top-10 Accuracy: 0.83%


Combine both with equal weightage (pose brings down accuracy)

In [None]:
# (C) Using Both modalities with Separate Modality Weighting:
print("\n--- Evaluation: Combined (Separate Modality Weighting) ---")
# Here we fuse the cosine similarities from I3D and pose.
# You can use adaptive weights; here we use equal weighting.
alpha = 0.5
S_combined = alpha * S_i3d + (1 - alpha) * S_pose
acc_combined = compute_topk_accuracy(S_combined, y_train, y_test)


--- Evaluation: Combined (Separate Modality Weighting) ---
Top-1 Accuracy: 15.75%
Top-5 Accuracy: 19.69%
Top-10 Accuracy: 22.23%


#More advanced methods but with these basic datasets

k-Reciprocal Re-Ranking (Simple Version)

In [None]:
from joblib import Parallel, delayed

def k_reciprocal_rerank_simple(S, k1=20, lambda_value=0.3, n_jobs=-1):
    """
    A simple heuristic re-ranking function.
    For each query (row i in S), we:
      1. Identify the top (k1+1) gallery neighbors.
      2. For each candidate gallery index j in that set, check if
         query i appears in the top (k1+1) neighbors for gallery sample j.
         (This is done using initial_rank_g2q.)
      3. Compute a bonus factor proportional to the size of the reciprocal set.
      4. Multiply the similarity score for those gallery samples by (1 + bonus).

    This function returns a new similarity matrix of shape S (num_test, num_train).
    """
    num_test, num_train = S.shape
    # Sort similarities to get neighbor indices.
    initial_rank_q2g = np.argsort(-S, axis=1)  # shape: (num_test, num_train)
    initial_rank_g2q = np.argsort(-S, axis=0)  # shape: (num_test, num_train)

    def rerank_row(i):
        row = S[i, :].copy()
        # Get top (k1+1) gallery indices for query i.
        forward_neighbors = initial_rank_q2g[i, :k1+1]
        # For each candidate gallery sample j, check if query i is also in the top (k1+1) for gallery j.
        reciprocal = [j for j in forward_neighbors if i in initial_rank_g2q[:, j][:k1+1]]
        # Compute a bonus factor that grows with the number of reciprocal neighbors.
        bonus = lambda_value * (len(reciprocal) / (k1+1))
        # Boost the similarity scores for the reciprocal gallery samples.
        for j in reciprocal:
            row[j] = row[j] * (1 + bonus)
        return row

    # Process each query row in parallel.
    new_rows = Parallel(n_jobs=n_jobs)(delayed(rerank_row)(i) for i in range(num_test))
    S_re_rank = np.vstack(new_rows)
    return S_re_rank

# Example usage:
print("\n--- Evaluation: Combined with k-Reciprocal Re-Ranking (Simple Heuristic) ---")
S_re_rank_simple = k_reciprocal_rerank_simple(S_combined, k1=20, lambda_value=0.3, n_jobs=-1)
compute_topk_accuracy(S_re_rank_simple, y_train, y_test)



--- Evaluation: Combined with k-Reciprocal Re-Ranking (Simple Heuristic) ---
Top-1 Accuracy: 16.02%
Top-5 Accuracy: 20.26%
Top-10 Accuracy: 22.58%


{1: np.float64(16.01750547045952),
 5: np.float64(20.26258205689278),
 10: np.float64(22.582056892778994)}

Pose-Guided I3D Feature Extraction

In [None]:
# ---------------------------------------
# FIXED: Pose‑Guided I3D Pooling
# ---------------------------------------
from sklearn.preprocessing import normalize

def get_pose_guided_i3d_feature(uid):
    # 1) Load raw I3D: shape (1,1024,T,1,1)
    arr = i3d_dict[uid]                      # (1,1024,T,1,1)
    arr = arr.squeeze(axis=(0,3,4))         # → (1024, T_i3d)
    D, T_i3d = arr.shape

    # 2) Load pose coords
    buf  = open(f"{VIDEO_POSE_DIR}/{uid}.pose","rb").read()
    pose = Pose.read(buf)
    coords = pose.body.data.squeeze(1)       # (frames, 576, 3)
    F = coords.shape[0]

    # 3) Build wrist trajectory (avg of left & right)
    wrist = (coords[:, 15] + coords[:, 16]) * 0.5  # (frames,3)

    # 4) Compute one weight per I3D segment by summing wrist velocity
    seg_len = max(F // T_i3d, 1)
    weights = []
    for t in range(T_i3d):
        start = t * seg_len
        end   = F if t == T_i3d - 1 else (t + 1) * seg_len
        seg   = wrist[start:end]
        if seg.shape[0] > 1:
            vt = np.sum(np.linalg.norm(np.diff(seg, axis=0), axis=1))
        else:
            vt = 0.0
        weights.append(vt + 1e-6)
    w = np.array(weights)                    # (T_i3d,)

    # 5) Weighted pooling of I3D segments
    Fi3d = (arr * w).sum(axis=1) / w.sum()    # (1024,)

    # 6) SSR + L2‑normalize
    Fi3d = np.sign(Fi3d) * np.sqrt(np.abs(Fi3d) + 1e-8)
    Fi3d /= np.linalg.norm(Fi3d) + 1e-12

    return Fi3d

In [None]:
  # Now rebuild your pose‑guided I3D features:
  print("Extracting pose‑guided I3D features…")
  X_i3d_pg_train = np.vstack([get_pose_guided_i3d_feature(u) for u in proto_df["uid"]])
  X_i3d_pg_test  = np.vstack([get_pose_guided_i3d_feature(u) for u in test_df["uid"]])

  # L2‑normalize
  X_i3d_pg_train_n = normalize(X_i3d_pg_train, axis=1)
  X_i3d_pg_test_n  = normalize(X_i3d_pg_test,  axis=1)

Extracting pose‑guided I3D features…


In [None]:
# Evaluate
print("\n--- Evaluation: Pose‑Guided I3D Only ---")
S_pg = X_i3d_pg_test_n.dot(X_i3d_pg_train_n.T)
compute_topk_accuracy(S_pg, y_train, y_test)


--- Evaluation: Pose‑Guided I3D Only ---
Top-1 Accuracy: 16.46%
Top-5 Accuracy: 20.13%
Top-10 Accuracy: 22.41%


{1: np.float64(16.455142231947484),
 5: np.float64(20.13129102844639),
 10: np.float64(22.407002188183807)}