In [None]:
from transformers import AutoModel

# comment out the flash_attention_2 line if you don't have a compatible GPU
model = AutoModel.from_pretrained(
    '/data/fsy/new/jina-reranker-m0',
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)

model.to('cuda')  # or 'cpu' if no GPU is available
model.eval()


: 

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 已训练策略的描述
policy_descriptions = [
    "wave hand slowly",
    "raise left arm",
    "squat down low",
    "walk forward",
    "clap hands"
]

# 用户输入的自然语言指令
query = "can you raise your left arm slowly?"

# 使用BGE模型进行嵌入（你也可以替换为LLaMA embedding或Clip+text encoder）
model_name = "BAAI/bge-base-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def embed(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    # 使用CLS token表示句向量
    embeddings = outputs.last_hidden_state[:, 0, :]
    embeddings = torch.nn.functional.normalize(embeddings, dim=-1)
    return embeddings.cpu().numpy()

# 向量化检索
query_emb = embed([query])
policy_embs = embed(policy_descriptions)

similarities = cosine_similarity(query_emb, policy_embs)[0]
ranked_indices = np.argsort(similarities)[::-1]
top_idx = ranked_indices[0]
top_score = similarities[top_idx]
retrieved_policy = policy_descriptions[top_idx]

# 阈值设置：控制是否进入“请求示教”流程
threshold = 0.5
if top_score >= threshold:
    print(f"[✔️ Retrieved] Use policy: '{retrieved_policy}' with similarity {top_score:.2f}")
else:
    print(f"[❌ No Match] Top match '{retrieved_policy}' has score {top_score:.2f}. Requesting demonstration...")


In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Set random seed for reproducibility
np.random.seed(42)

# Define a repository of 30 policy descriptions (20 learned, 10 new/unseen)
learned_policies = [
    "wave hand slowly", "raise left arm", "squat down low", "walk forward", "clap hands",
    "kick right leg", "sit down", "stand up", "turn left", "turn right",
    "nod head", "shake head", "stretch arms", "touch toes", "jump in place",
    "run forward", "crawl on ground", "spin around", "march in place", "lift both arms"
]

unseen_policies = [
    "throw a ball", "catch a ball", "draw a circle", "wave both hands quickly", "slide to the left",
    "bow politely", "stand on one foot", "do a backflip", "hug yourself", "salute"
]

full_policy_repo = learned_policies + unseen_policies
assert len(full_policy_repo) == 30

# Simulated queries corresponding to all 30 policy intents
queries = [
    "please wave your hand slowly", "can you raise your left arm?", "do a slow squat", "move forward by walking",
    "clap your hands now", "kick with your right leg", "sit down please", "get up now", "turn to your left",
    "turn right quickly", "nod your head", "shake your head left and right", "stretch out your arms",
    "can you touch your toes?", "jump in place", "start running", "crawl on the floor", "spin in a circle",
    "march on the spot", "lift both arms", "pretend to throw something", "try catching a ball",
    "draw a circle in the air", "wave both hands fast", "slide left", "give a polite bow",
    "balance on one foot", "do a backflip", "hug yourself gently", "salute like a soldier"
]

# Load model and tokenizer
model_name = "BAAI/bge-base-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to compute embeddings
def embed(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state[:, 0, :]
    embeddings = torch.nn.functional.normalize(embeddings, dim=-1)
    return embeddings.cpu().numpy()

# Embed policies and queries
policy_embs = embed(full_policy_repo)
query_embs = embed(queries)

# Evaluate retrieval performance
threshold = 0.5
retrieved_correct = 0
requested_demo = 0
total_queries = len(queries)
top1_match = []

for i in range(total_queries):
    sim = cosine_similarity([query_embs[i]], policy_embs)[0]
    top_idx = np.argmax(sim)
    top_score = sim[top_idx]
    is_correct = (top_idx == i)
    top1_match.append(is_correct)

    if top_score >= threshold:
        if is_correct:
            retrieved_correct += 1
    else:
        requested_demo += 1

# Calculate metrics
accuracy = np.mean(top1_match)
retrieval_rate = retrieved_correct / total_queries
demo_request_rate = requested_demo / total_queries

accuracy, retrieval_rate, demo_request_rate


In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 策略库（20 已学习 + 10 未学习）
learned_policies = [
    "wave hand slowly", "raise left arm", "squat down low", "walk forward", "clap hands",
    "kick right leg", "sit down", "stand up", "turn left", "turn right",
    "nod head", "shake head", "stretch arms", "touch toes", "jump in place",
    "run forward", "crawl on ground", "spin around", "march in place", "lift both arms"
]
unseen_policies = [
    "throw a ball", "catch a ball", "draw a circle", "wave both hands quickly", "slide to the left",
    "bow politely", "stand on one foot", "do a backflip", "hug yourself", "salute"
]
full_policy_repo = learned_policies + unseen_policies

# 查询语句
queries = [
    "please wave your hand slowly", "can you raise your left arm?", "do a slow squat", "move forward by walking",
    "clap your hands now", "kick with your right leg", "sit down please", "get up now", "turn to your left",
    "turn right quickly", "nod your head", "shake your head left and right", "stretch out your arms",
    "can you touch your toes?", "jump in place", "start running", "crawl on the floor", "spin in a circle",
    "march on the spot", "lift both arms", "pretend to throw something", "try catching a ball",
    "draw a circle in the air", "wave both hands fast", "slide left", "give a polite bow",
    "balance on one foot", "do a backflip", "hug yourself gently", "salute like a soldier"
]

# 嵌入模型
model_name = "BAAI/bge-base-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()

def embed(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state[:, 0, :]
    return torch.nn.functional.normalize(embeddings, dim=-1).cpu().numpy()

# 向量化
policy_embs = embed(full_policy_repo)
query_embs = embed(queries)

# 评估过程
correct_top1 = 0
need_demo = 0
threshold = 0.5

for i, q_emb in enumerate(query_embs):
    sims = cosine_similarity([q_emb], policy_embs)[0]
    top_idx = np.argmax(sims)
    if sims[top_idx] >= threshold:
        if top_idx == i:
            correct_top1 += 1
    else:
        need_demo += 1

total = len(queries)
print(f"Top-1 Accuracy: {correct_top1 / total:.2f}")
print(f"Demonstration Requests: {need_demo / total:.2f}")


: 

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Setup
model_name = "BAAI/bge-base-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()

# Policy repositories
learned_policies = [
    "wave hand slowly", "raise left arm", "squat down low", "walk forward", "clap hands",
    "kick right leg", "sit down", "stand up", "turn left", "turn right",
    "nod head", "shake head", "stretch arms", "touch toes", "jump in place",
    "run forward", "crawl on ground", "spin around", "march in place", "lift both arms"
]
unseen_policies = [
    "throw a ball", "catch a ball", "draw a circle", "wave both hands quickly", "slide to the left",
    "bow politely", "stand on one foot", "do a backflip", "hug yourself", "salute"
]
full_policy_repo = learned_policies + unseen_policies

# Queries (corresponding to each policy)
queries = [
    "please wave your hand slowly", "can you raise your left arm?", "do a slow squat", "move forward by walking",
    "clap your hands now", "kick with your right leg", "sit down please", "get up now", "turn to your left",
    "turn right quickly", "nod your head", "shake your head left and right", "stretch out your arms",
    "can you touch your toes?", "jump in place", "start running", "crawl on the floor", "spin in a circle",
    "march on the spot", "lift both arms", "pretend to throw something", "try catching a ball",
    "draw a circle in the air", "wave both hands fast", "slide left", "give a polite bow",
    "balance on one foot", "do a backflip", "hug yourself gently", "salute like a soldier"
]

# Embedding function
def embed(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state[:, 0, :]
    return torch.nn.functional.normalize(embeddings, dim=-1).cpu().numpy()

# Embed queries and policies
query_embs = embed(queries)
policy_embs = embed(full_policy_repo)

# Compute Recall@k and nDCG@k
def compute_metrics(k):
    recall_at_k = 0
    ndcg_at_k = 0
    for i, q_emb in enumerate(query_embs):
        sims = cosine_similarity([q_emb], policy_embs)[0]
        top_k_indices = np.argsort(sims)[::-1][:k]
        target_idx = i  # i-th query corresponds to i-th policy
        if target_idx in top_k_indices:
            recall_at_k += 1
            rank = np.where(top_k_indices == target_idx)[0][0] + 1
            ndcg_at_k += 1 / np.log2(rank + 1)
    n = len(query_embs)
    return recall_at_k / n, ndcg_at_k / n

# Compute for k = 1, 3, 5
results = {f"@{k}": compute_metrics(k) for k in [1, 3, 5]}
results
