In [None]:
from mcap_data_loader.datasets.mcap_dataset import (
    McapFlatBuffersEpisodeDataset,
    McapFlatBuffersEpisodeDatasetConfig,
)
import numpy as np
from collections import defaultdict

dataset = McapFlatBuffersEpisodeDataset(
    McapFlatBuffersEpisodeDatasetConfig(
        data_root="../mcap_data/reach_tag_blip2_features",
        keys=[
            f"/{cam}_camera/color/image_raw/features_proj" for cam in ("env", "follow")
        ],
    )
)
dataset.load()

In [101]:
from itertools import combinations


def cosine_similarity(a, b):
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)


def compute_pairwise(data, func):
    """
    输入: dict[str, np.ndarray]
    输出: dict[str, float]，键为 "key1-key2"（按字母序），值为余弦相似度
    """
    result = {}
    for key1, key2 in combinations(sorted(data.keys()), 2):  # sorted 保证 A-B 而非 B-A
        sim = func(data[key1], data[key2])
        result[f"{key1}-{key2}"] = float(sim)  # 转为 Python float 便于序列化
    return result


In [None]:
ep_distances = defaultdict(lambda: defaultdict(list))
ep_refs = {}
for episode in dataset:
    ref_data = {}
    ref_data_base = {}
    ep_key = episode.config.data_root
    for sample in episode:
        for key, value in sample.items():
            if key not in ref_data:
                ref_data[key] = value["data"]
                # ref_data_base[key] = np.linalg.norm(value["data"])
                ref_data_base[key] = value["data"].sum()
                # ref_data_base[key] = np.abs(value["data"]).sum()
            # rela_dis = np.linalg.norm(ref_data[key] - value["data"])
            rela_dis = cosine_similarity(ref_data[key], value["data"])
            abs_dis = rela_dis + ref_data_base[key]
            ep_distances[ep_key][key].append(abs_dis)
    print(ref_data_base)
    ep_distances[ep_key]["t"] = list(range(len(episode)))
    ep_refs[ep_key] = ref_data

{'/follow_camera/color/image_raw/features_proj': np.float32(-0.72004616), '/env_camera/color/image_raw/features_proj': np.float32(-0.53823113)}
{'/follow_camera/color/image_raw/features_proj': np.float32(-0.7267523), '/env_camera/color/image_raw/features_proj': np.float32(-0.7112872)}
{'/follow_camera/color/image_raw/features_proj': np.float32(-0.4373811), '/env_camera/color/image_raw/features_proj': np.float32(-0.61204624)}


In [102]:
from pprint import pprint

for key in dataset.config.keys:
    print(f"Feature key: {key}")
    key_refs = {ep_key.name: refs[key] for ep_key, refs in ep_refs.items()}
    # pprint(compute_pairwise(key_refs, cosine_similarity))
    pprint(compute_pairwise(key_refs, np.dot))

Feature key: /env_camera/color/image_raw/features_proj
{'0.mcap-1.mcap': 0.5264905691146851,
 '0.mcap-2.mcap': 0.5172009468078613,
 '1.mcap-2.mcap': 0.5181830525398254}
Feature key: /follow_camera/color/image_raw/features_proj
{'0.mcap-1.mcap': 0.5543102622032166,
 '0.mcap-2.mcap': 0.5190693736076355,
 '1.mcap-2.mcap': 0.5304237604141235}


In [104]:
import holoviews as hv
from holoviews import opts

hv.extension("bokeh")

curves: list[hv.Curve] = []
for ep_key, episode in ep_distances.items():
    for s_key in episode.keys() - {"t"}:
        curve = hv.Curve(episode, "t", s_key, label=ep_key.name)
        curves.append(curve)
hv.Layout(curves).cols(2)

In [105]:
overlays = []
step = len(dataset.config.keys)
for key_start in range(step):
    key_curves = curves[key_start::step]
    overlays.append(
        hv.NdOverlay(
            {c.label: c.opts(width=450, height=400, xlim=(0, 300)) for c in key_curves}, kdims="episode"
        ).opts(title="")
    )
hv.Layout(overlays)