In [None]:
import sys
import rich
import logging
from collections import defaultdict
from sotopia.database.persistent_profile import AgentProfile, EnvironmentProfile, RelationshipProfile
from sotopia.database.logs import EpisodeLog
from sotopia.database.env_agent_combo_storage import EnvAgentComboStorage
from collections import Counter 
from redis_om import Migrator
from rich.console import Console
from rich.terminal_theme import MONOKAI 

## Relationship Profile

In [None]:
res_pks = RelationshipProfile.all_pks()
res_pks = list(res_pks)
print(len(res_pks))
res = []
for pk in res_pks:
    print(pk)
    try:
        res.append(RelationshipProfile.get(pk=pk))
    except:
        print("error")
        pass
res_relationships = [r.relationship for r in res]
Counter(res_relationships)

## Agents Profile

In [None]:
# obtain a specific agent
agents = AgentProfile.find(AgentProfile.first_name == "ss").all()
rich.print(agents)

In [None]:
# find specific agnets
agents = AgentProfile.find(AgentProfile.gender=="Man", AgentProfile.age>30)
for agent in agents:
    rich.print(agent)

In [None]:
# obtain all agents' basic info
agent_pks = AgentProfile.all_pks()
agent_pks = list(agent_pks)
print(len(agent_pks))
agents = []
for pk in agent_pks:
    try:
        agents.append(AgentProfile.get(pk=pk))
    except:
        print("error")
        pass
# output agents's basic info
for agent in agents:
    rich.print(agent)

In [None]:
agent_pks = AgentProfile.all_pks()
agent_pks = list(agent_pks)
print(len(agent_pks))

In [None]:
# Update agent's information
agents = AgentProfile.find(AgentProfile.first_name == 'Ava', AgentProfile.last_name == 'Martinez').all()[0]

In [None]:
agents

In [None]:
agents.update(secret = 'Keeps their bisexuality a secret from her conservative family')

## Environment Profile

In [None]:
# get all environments
all_envs = list(EnvironmentProfile.all_pks())
print(len(all_envs))
print(all_envs[:5])

In [None]:
#get a specific environment profile
env_profile_id = "01H7VFHPJKR16MD1KC71V4ZRCF"
env = EnvironmentProfile.get(env_profile_id)
rich.print(env)

## EnvAgentComboStorage

In [None]:
#all env-agent combos
all_combos = EnvAgentComboStorage().all_pks()
all_combos = list(all_combos)
print(len(all_combos))
rich.print(EnvAgentComboStorage().get(all_combos[0]))

In [None]:
#check for duplicates in EnvAgentComboStorage
cache = set()
for combo_pk in all_combos:
    combo = EnvAgentComboStorage.get(combo_pk)
    curr_tuple = (combo.env_id, combo.agent_ids[0], combo.agent_ids[1])
    if curr_tuple in cache:
        print("duplicate")
    else:
        cache.add(curr_tuple)

## Episode Log

In [None]:
# find episode log by tag
Episodes = EpisodeLog.find(EpisodeLog.tag == "6_initial_aug14_full").all()
len(Episodes)

In [None]:
#get all episode logs' primary keys
episode_pks = EpisodeLog.all_pks()
episode_pks = list(episode_pks)
print(len(episode_pks))
print(episode_pks[0])

In [None]:
# some eps have validation error while loading
# please look at the buggy_eps list
gpt35_llama2_eps = []
buggy_eps = []
for epid in episode_pks:
    try:
        curr_ep = EpisodeLog.get(epid)
    except Exception as e:
        print(e)
        buggy_eps.append(curr_ep)
        continue
    gpt35_llama2_eps.append(curr_ep)
len(gpt35_llama2_eps)

In [None]:
# get the epilogs that contain the specified models
model1 = 'gpt-3.5-turbo'
model2 = 'togethercomputer/llama-2-70b-chat'
model_comp1 = ['gpt-4', model1, model2]
model_comp2 = ['gpt-4', model2, model1]

gpt35_llama2_eps = []
for epid in episode_pks:
    try:
        curr_ep = EpisodeLog.get(epid)
    except:
        continue
    if curr_ep.models == model_comp1 or curr_ep.models == model_comp2:
        gpt35_llama2_eps.append(curr_ep)
len(gpt35_llama2_eps)

In [None]:
# check symetry of epilogs, i.e., if we have an epilog for env1, agent1, agent2, then we should have an epilog for env1, agent2, agent1
def is_symmetric_epilogs(epilogs):
    asymetric_epilogs = []
    gpt35_llama2_epilogs_dict = {}
    for ep in epilogs:
        hash_key = (ep.environment, ep.agents[0], ep.agents[1], ep.models[0], ep.models[1], ep.models[2])
        gpt35_llama2_epilogs_dict[hash_key] = ep.pk
    for hash_key in gpt35_llama2_epilogs_dict:
        if (hash_key[0], hash_key[1], hash_key[2], hash_key[3], hash_key[5], hash_key[4]) not in gpt35_llama2_epilogs_dict:
            asymetric_epilogs.append(gpt35_llama2_epilogs_dict[hash_key])
    
    if len(asymetric_epilogs) == 0:
        return True
    else:
        logging.warning(f"Found {len(asymetric_epilogs)} asymetric epilogs: {asymetric_epilogs}")
        return False
            
    
#export episode log to html
def export_html_from_log(ep: EpisodeLog, file_path: str = "./output.html") -> None:
    agent_profiles, conversation = ep.render_for_humans()
    console = Console(record=True, log_time=False, log_path=False)
    console.log(f"Models:\n Env: {ep.models[0]}\n Agent1: {ep.models[1]}\n Agent2: {ep.models[2]}\n")

    for agent_profile in agent_profiles:
        console.log(agent_profile)
    for message in conversation:
        console.log(message)
    console.save_svg(file_path, theme=MONOKAI)

In [None]:
is_symmetric_epilogs(Episodes)

In [None]:
export_html_from_log(gpt35_llama2_eps[13])

In [None]:
# get a human readable version of the episode
agent_profiles, conversation = Episodes[1].render_for_humans()
for agent_profile in agent_profiles:
    rich.print(agent_profile)
for message in conversation:
    rich.print(message)

In [None]:
#check environments
len(set([Episode.environment for Episode in Episodes]))

In [None]:
from collections import defaultdict
import pandas as pd

def get_avg_reward_for_models(episodes: list[EpisodeLog]) -> dict[str, dict[str, float]]:
    """Get the average reward for each model in the episodes.

    Args:
        episodes (list[EpisodeLog]): A list of episodes.

    Returns:
        dict[str, float]: A dictionary mapping model names to average rewards.
    """
    model_rewards = defaultdict(list)
    model_rewards_avg = {}
    for episode in episodes:
        for idx, model in enumerate(episode.models[1:]): # skip env
            print(model)
            if isinstance(episode.rewards[idx], tuple):
                model_rewards[model + '_agent_' + str(idx)].append(episode.rewards[idx])
            else:
                #rich.print(episode.render_for_humans())
                print(episode.rewards)
                #print(episode.rewards[idx])
    for model in model_rewards:
        model_rewards[model] = [rewards[1] for rewards in model_rewards[model]] 
        model_rewards_avg[model] = pd.DataFrame.from_dict(model_rewards[model])
        model_rewards_avg[model] = model_rewards_avg[model].mean(axis=0).to_dict()
    return pd.DataFrame.from_dict(model_rewards_avg)

def get_avg_successRate_for_models(episodes: list[EpisodeLog]) -> dict[str, dict[str, float]]:
    """Get the average success rate for each model in the episodes.

    Args:
        episodes (list[EpisodeLog]): A list of episodes.

    Returns:
        dict[str, float]: A dictionary mapping model names to average success rates.
    """
    model_rewards = defaultdict(list)
    model_successRate_avg = {}
    for episode in episodes:
        for idx, model in enumerate(episode.models[1:]): # skip env
            if isinstance(episode.rewards[idx], tuple):
                model_rewards[model + '_agent_' + str(idx)].append(episode.rewards[idx])
            else:
                #rich.print(episode.render_for_humans())
                print(episode.rewards, episode.messages[0])
                #print(episode.rewards[idx])
    for model in model_rewards:
        model_successRate_avg[model] = [rewards[1] for rewards in model_rewards[model]]
        model_successRate_avg[model] = pd.DataFrame.from_dict(model_successRate_avg[model])
    assert len(model_successRate_avg) == 2, "There should be two models"
    model_list = list(model_successRate_avg.keys())
    model_one_successRate = model_successRate_avg[model_list[0]] > model_successRate_avg[model_list[1]]
    model_two_successRate = model_successRate_avg[model_list[0]] < model_successRate_avg[model_list[1]]
    model_on_par_successRate = model_successRate_avg[model_list[0]] == model_successRate_avg[model_list[1]]
    return pd.DataFrame.from_dict({
        model_list[0]: model_one_successRate.mean(axis=0).to_dict(),
        "on_par": model_on_par_successRate.mean(axis=0).to_dict(),
        model_list[1]: model_two_successRate.mean(axis=0).to_dict(),
    })

In [None]:
get_avg_reward_for_models(Episodes)

In [None]:
a = get_avg_successRate_for_models(Episodes)
a