### sonar

In [None]:
import base64
import os
from pathlib import Path
import pandas as pd
import requests
import time

API_URL = "https://api.perplexity.ai/chat/completions"
API_KEY = 

df = pd.read_pickle("../data/pepper_data_test.pkl")
df_downsampled_home = df[df['domain'] == 'Home'].sample(n=40, random_state=42)
df_others = df[df['domain'] != 'Home']
df_balanced = pd.concat([df_downsampled_home, df_others])
df = df_balanced.reset_index(drop=True)

actions = list(df.columns[:9])
print(f"{actions=}")
print(df['domain'].value_counts(), len(df))

letter_to_score = {
    'A': 1,  # very inappropriate
    'B': 2,  # inappropriate
    'C': 3,  # neutral
    'D': 4,  # appropriate
    'E': 5   # very appropriate
}


In [None]:
def encode_image_base64(image_path: str) -> str:
    with open(image_path, "rb") as image_file:
        encoded_bytes = base64.b64encode(image_file.read())
    base64_str = encoded_bytes.decode('utf-8')
    mime_type = "image/png"
    return f"data:{mime_type};base64,{base64_str}"

def create_prompt(action: str) -> str:

    actions = [Vaccum Cleaning, Mopping the Floor, Carry Warm Food, Carry Cold Food, Carry Drinks, Carry Small Objects, Carry Large Objects, Cleaning, Starting a conversation]
        f"""
        You are evaluating the social appropriateness of a robot performing a specific action based on an image.
        This is an image of an indoor scene with a Pepper robot present.
        Given this image of a scenario which includes a robot and several humans, a human evaluator has to answer a question about whether it is socially appropriate for the robot to carry out a certain action in the given scenario.
        Question: How socially appropriate is it for the robot to be doing the following action {action}?
        Answer choices: A. very inappropriate, B. inappropriate, C. neutral, D. appropriate, E. very appropriate.
        Respond with exactly 1 uppercase letter from {A, B, C, D, E}. Do not add any extra text, spaces, or punctuation.
        """
    )

def call_perplexity_api(image_data_url: str, prompt_text: str) -> str:
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json",
        "accept": "application/json"
    }
    system_content = """
         Respond with exactly 1 uppercase letter from {A, B, C, D, E}. Do not add any extra text, spaces, or punctuation.
        
        """
    payload = {
        "model": "sonar-pro",
        "stream": False,
        "max_tokens":18,
        "messages": [
            {"role": "system", "content": system_content},
            {
                "role": "user",
                "content": [
                    {"type": "image_url", "image_url": {"url": image_data_url}},
                    {"type": "text", "text": prompt_text}
                ]
            }
        ]
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    response.raise_for_status()
    response_json = response.json()
    return response_json


In [None]:
# Prepare output CSV path
output_csv_path = "../data/perplexity_predicted_scores.csv"
output_columns = ["image_path"] + actions

# If output CSV exists, load it to resume; otherwise prepare empty DataFrame
if os.path.exists(output_csv_path):
    output_df = pd.read_csv(output_csv_path)
    processed_images = set(output_df["image_path"].tolist())
else:
    output_df = pd.DataFrame(columns=output_columns)
    processed_images = set()

In [None]:
total_cost = 0
for idx, row in df.iterrows():
    if total_cost < 4:
        image_path = row['image_path']
        if image_path in processed_images:
            print(f"Skipping already processed image: {image_path}")
            continue

        if not Path(image_path).is_file():
            print(f"Image file not found: {image_path}, skipping.")
            continue

        try:
            image_data_url = encode_image_base64(str(image_path))
        except Exception as e:
            print(f"Failed to encode image {image_path}: {e}")
            continue

        # Consolidated prompt for all 9 actions
        prompt = """
        This image shows an indoor scene with a Pepper robot and several humans present.
        Judge how socially appropriate it is for the robot in the image to perform each of the following actions in this scenario, in the order given:
        Vacuum Cleaning, Mopping the Floor, Carrying Warm Food, Carrying Cold Food, Carrying Drinks, Carrying Small Objects, Carrying Large Objects, Cleaning, Starting a Conversation.
        Use this rating scale:
        A = Very Inappropriate
        B = Inappropriate
        C = Neutral
        D = Appropriate
        E = Very Appropriate
        """
        try:
            response = call_perplexity_api(image_data_url, prompt)
            total_cost += response['usage']['cost']['total_cost']
            model_answer = response['choices'][0]['message']['content'].strip().upper()
            answers = model_answer.split(",")
            if len(answers) != len(actions):
                print(f"Unexpected number of answers for image {image_path}: {model_answer}")
                predicted_scores = {action: None for action in actions}
            else:
                predicted_scores = {}
                for action, letter in zip(actions, answers):
                    if letter in letter_to_score:
                        predicted_scores[action] = letter_to_score[letter]
                    else:
                        print(f"Invalid rating letter '{letter}' for action '{action}' on image '{image_path}'")
                        predicted_scores[action] = None

            record = {"image_path": image_path}
            for action in actions:
                record[action] = predicted_scores.get(action)

            output_df = pd.concat([output_df, pd.DataFrame([record])], ignore_index=True)
            output_df.to_csv(output_csv_path, index=False)
            print(f"Processed {idx+1}/{len(df)} images: {image_path} - saved results.")

        except Exception as e:
            print(f"API request error for image '{image_path}': {e}")
            continue

print(f"Processing complete. All results saved to {output_csv_path}")


### Llava

In [None]:
import pandas as pd


In [None]:
df = pd.read_pickle("../data/pepper_data_test_equal.pkl")[:10]
actions = list(df.columns[:9])

In [None]:
from tinyllava.eval.run_tiny_llava import eval_model
from tinyllava.mm_utils import get_model_name_from_path

model_path = "../huggingface_cache"


results = []

for idx, row in df.iterrows():
    image_file = row['image_path']  # local path or URL

    for action in actions:
        prompt = f"""You are evaluating the social appropriateness of a robot performing a specific action based on an image.
This is an image of an indoor scene with a Pepper robot present.
Given this image of a scenario which includes a robot and several humans, a human evaluator has to answer a question about whether it is socially appropriate for the robot to carry out a certain action in the given scenario.
Question: How socially appropriate is it for the robot to be doing the following action {action}?
Answer choices: A. very inappropriate, B. inappropriate, C. neutral, D. appropriate, E. very appropriate.
Respond with exactly 1 uppercase letter from {{A, B, C, D, E}}. Do not add any extra text, spaces, or punctuation."""

        args = type('Args', (), {
            "model_path": model_path,
            "model_base": None,
            "model_name": get_model_name_from_path(model_path),
            "query": prompt,
            "conv_mode": "phi",
            "image_file": image_file,
            "sep": ",",
            "temperature": 0,
            "top_p": None,
            "num_beams": 1,
            "max_new_tokens": 1,
        })()

        answer = eval_model(args)
        # eval_model may print by default; capture return value for answer

        results.append({
            "image_path": image_file,
            "action": action,
            "appropriateness_score": answer.strip()
        })

# Further processing like saving results can follow


In [None]:
results