In [None]:
import AcTOL
import torch
from PIL import Image
# Load AcTOL model

device = "cuda" if torch.cuda.is_available() else "cpu"
model = AcTOL.load("AcTOL", device=device)

In [3]:
image = Image.open("/home/xli990/paichichi/data/EPIC-KITCHEN-100/P01/rgb_frames/P01_104/frame_0000000051.jpg")
text = "pick up cup"

with torch.no_grad():
    image_features_1 = model.encode_image(image)
    text_features_1 = model.encode_text(text)
    reward_1 = model.get_reward(image, text)

FileNotFoundError: [Errno 2] No such file or directory: '/home/xli990/paichichi/data/EPIC-KITCHEN-100/P01/rgb_frames/P01_104/frame_0000000051.jpg'

In [17]:
def inspect(x, name="var"):
    print(f"\n[{name}]")
    print("  type:", type(x))
    print("  is torch.Tensor:", isinstance(x, torch.Tensor))
    if isinstance(x, torch.Tensor):
        print("  shape:", tuple(x.shape))
        print("  dtype:", x.dtype)
        print("  device:", x.device)
        print("  requires_grad:", x.requires_grad)

inspect(image_features_1, "image_features_1")
inspect(text_features_1, "text_features_1")
inspect(reward_1, "reward")


[image_features_1]
  type: <class 'torch.Tensor'>
  is torch.Tensor: True
  shape: (1, 1024)
  dtype: torch.float32
  device: cuda:0
  requires_grad: False

[text_features_1]
  type: <class 'torch.Tensor'>
  is torch.Tensor: True
  shape: (1, 1024)
  dtype: torch.float32
  device: cuda:0
  requires_grad: False

[reward]
  type: <class 'torch.Tensor'>
  is torch.Tensor: True
  shape: (1,)
  dtype: torch.float32
  device: cuda:0
  requires_grad: False


In [18]:
import torch.nn.functional as F

sim = F.cosine_similarity(image_features_1, text_features_1, dim=-1)  # shape: (1,)
print(sim.item())

0.0


In [19]:
image = Image.open("/home/xli990/paichichi/data/EPIC-KITCHEN-100/P01/rgb_frames/P01_104/frame_0000000055.jpg")
text = "pick up cup"

image_features_2 = model.encode_image(image)
text_features_2 = model.encode_text(text)
reward_2 = model.get_reward(image, text)

In [None]:
reward_2

In [None]:
import os, numpy as np, torch
import matplotlib.pyplot as plt
from PIL import Image
import AcTOL

device = "cuda" if torch.cuda.is_available() else "cpu"
model = AcTOL.load("AcTOL", device=device)

def get_reward_curve(DATA_DIR, participant, video_id, start_f, end_f, instruction, num_points=35):
    idxs = np.linspace(start_f, end_f, num_points).round().astype(int)
    kept, rewards = [], []
    for i in range(start_frame, end_frame + 1):
        p = os.path.join(DATA_DIR, participant, "rgb_frames", video_id, f"frame_{i:010d}.jpg")
        if not os.path.exists(p):
            continue
        img = Image.open(p).convert("RGB")
        r = model.get_reward(img, instruction)
        rewards.append(float(r.detach().cpu().item() if torch.is_tensor(r) else r))
        kept.append(i)

    # rewards = []
    # kept = []
    #
    # for i in idxs:
    #     p = os.path.join(DATA_DIR, participant, "rgb_frames", video_id, f"frame_{i:010d}.jpg")
    #     if not os.path.exists(p):
    #         continue
    #     img = Image.open(p).convert("RGB")
    #     r = model.get_reward(img, instruction)
    #     rewards.append(float(r.detach().cpu().item() if torch.is_tensor(r) else r))
    #     kept.append(i)

    return np.array(kept), np.array(rewards, dtype=np.float32)

def plot_curve(rewards, title=""):
    r = (rewards - rewards.min()) / (rewards.max() - rewards.min() + 1e-8)  # normalize 0-1
    plt.figure(figsize=(9,3.5))
    plt.plot(r, linewidth=2.3)
    plt.ylim(0,1.05)
    plt.xlabel("Sampled frame step")
    plt.ylabel("Normalized reward")
    plt.title(title)
    plt.grid(alpha=0.25)
    plt.show()


In [None]:
DATA_DIR = "/home/xli990/paichichi/data/EPIC-KITCHEN-100"
participant = "P01"
video_id = "P01_104"
start_frame, end_frame = 41, 90
instruction = "pick up cup"

frame_ids, rewards = get_reward_curve(DATA_DIR, participant, video_id, start_frame, end_frame, instruction, num_points=50)
plot_curve(rewards, title=f"{participant}-{video_id} | {instruction}")
