In [1]:
import os
import pandas as pd
import json

# Directory containing summaries
summary_dir = './../data/eval/webshop/final_eval/'

# Function to extract values from JSON files
def extract_values(row, base_path):
    json_path = os.path.join(base_path, f"{row['model_id']}/{row['env_idx']}.json")
    extracted_values = {'r_type': 0, 'r_att': 0, 'r_price': 0, 'r_option': 0}
    
    if os.path.exists(json_path):
        with open(json_path, 'r') as f:
            verbose_info = json.load(f).get('info', {}).get('verbose', {})
            extracted_values.update({
                'r_type': verbose_info.get('r_type', 0),
                'r_att': verbose_info.get('r_att', 0),
                'r_price': verbose_info.get('r_price', 0),
                'r_option': verbose_info.get('r_option', 0)
            })
    
    return pd.Series(extracted_values)

# Process all 'summary.csv' files
processed_dfs = []
for root, _, files in os.walk(summary_dir):
    for file in files:
        if file == 'summary.csv':
            df = pd.read_csv(os.path.join(root, file))
            extracted_df = df.apply(lambda row: extract_values(row, root), axis=1)
            processed_dfs.append(df.join(extracted_df))

# Combine all processed DataFrames and calculate means
final_df = pd.concat(processed_dfs, ignore_index=True)
mean_grouped_by_model_id = final_df.groupby('model_id').mean()[['r_att', 'r_option', 'r_type', 'r_price', 'score', 'num_actions']]

# Scale values as specified
mean_grouped_by_model_id[['r_att', 'r_option', 'r_type', 'r_price']] *= 100
mean_grouped_by_model_id['score'] *= 10

# Round to one decimal place and display the result
mean_grouped_by_model_id.round(1)


Unnamed: 0_level_0,r_att,r_option,r_type,r_price,score,num_actions
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
claude-3-haiku-20240307,51.2,12.3,62.9,66.0,44.4,11.4
gemini-1.5-flash,54.5,9.1,70.6,75.0,46.3,8.6
gpt-4o,67.9,17.3,82.8,89.6,58.4,8.0
gpt-4o-mini,63.8,6.9,77.0,85.2,51.0,11.3
leap-llm/Meta-Llama-3-8B-Instruct-sft-webshop-iter0,28.0,25.8,35.0,36.8,29.4,21.1
leap-llm/Meta-Llama-3-8B-Instruct-sft-webshop-iter1,60.0,16.1,73.4,78.4,51.8,16.7
leap-llm/Meta-Llama-3-8B-Instruct-sft-webshop-iter2,64.8,33.5,80.0,85.4,61.8,11.6
leap-llm/Meta-Llama-3-8B-Instruct-sft-webshop-iter3,64.3,32.2,78.6,83.8,60.7,11.9
meta-llama/Meta-Llama-3-8B-Instruct,1.3,0.6,1.4,1.4,1.2,29.9
