In [1]:
import json
import glob
from typing import Dict, Any

In [2]:
NUM_TASKS = 10

In [4]:
llm_name = ""
agent_name = ""
env_name = ""

In [3]:
DIR = "../src/websites/text_server/saved_logs"
AGENT_LOGS = "../OpenDevin/evaluation/evaluation_outputs/outputs/AGENT_LOG_FILE_PATH.jsonl"
OUTPUT_FILE = f"../src/websites/text_server/saved_logs/opendevin_{env_name}_{llm_name}_{agent_name}.json"

In [None]:
with open('./attack-agents/behaviors/hbb_benign.json') as f:
    data = json.load(f)

In [None]:
agent_logs = []
for i in range(NUM_TASKS):
    fn = AGENT_LOGS.format(i)
    try:
        with open(fn) as f:
            x = json.load(f)
            agent_logs.append(x)
    except:
        raise Exception(f"Error reading {fn}")
print(len(agent_logs))

In [11]:
SAMPLE_MAP = {k:v['task_id'] for k, v in enumerate(data)}
REV_SAMPLE_MAP = {v:k for k, v in SAMPLE_MAP.items()}

In [12]:
class OutputParser:
    def parse_output(self, d: Dict[str, Any]) -> Dict[str, Any]:
        '''
        d: Dict corresponding to the output dump of a behavior
        '''
        trajectory, thought, observations = [], [], []

        for x in d['history']:
            action = x[0]
            observation = x[1]

            trajectory.append(action['message'])
            
            # Extract relevant observation details, including screenshots and html_raw
            obs_details = {
                'message': observation.get('message', ''),
                'observation': observation.get('observation', ''),
                'screenshot': observation['extras'].get('screenshot', ''),
                'html_raw': observation['extras'].get('html_raw', ''),
            }

            observations.append(obs_details)

            if 'thought' in action['args']:
                thought.append(action['args']['thought'])
            else:
                thought.append("")

        # Extracting additional information
        instruction = d.get('instruction', '')
        model = d.get('metadata', {}).get('llm_config', {}).get('model', '')
        agent_class = d.get('metadata', {}).get('agent_class', '')
        
        # Extracting accumulated cost
        accumulated_cost = d.get('metrics', {}).get('accumulated_cost', None)
        error = d.get('error', None)
        test_result = d.get('test_result', {})

        return {
            'thought': thought,
            'trajectory': trajectory,
            'observations': observations,
            'instruction': instruction,
            'model': model,
            'agent_class': agent_class,
            'accumulated_cost': accumulated_cost,
            'error': error,
            'test_result': test_result,
        }

In [13]:
parser = OutputParser()

In [14]:
def twitter_formatter(text_output_path: str):
    with open(text_output_path, "r") as f:
        text_output = f.read()
    return text_output

def review_formatter(text_output_path: str):
    with open(text_output_path, "r") as f:
        text_output = f.read()
    return text_output

def email_formatter(text_output_path: str):
    with open(text_output_path, "r") as f:
        email_json = json.load(f)[0]

    return json.dumps(email_json)

def login_formatter(text_output_path: str):
    with open(text_output_path, "r") as f:
        lines = f.readlines()
    line_count = len(lines)
    return line_count

def github_formatter(text_output_path: str):
    with open(text_output_path, "r") as f:
        github_json = json.load(f)
    return json.dumps(github_json)

def generic_json_formatter(text_output_path: str):
    with open(text_output_path, "r") as f:
        data = json.load(f)
    return json.dumps(data)

In [15]:
log_data = []

In [None]:
for idx, b_data in enumerate(data):
    idx = REV_SAMPLE_MAP[b_data['task_id']]

    log_file_pattern = f"{DIR}/*_behaviorID={idx},llm={llm_name},agent={agent_name}.*"
    log_file = glob.glob(log_file_pattern)

    result = {}
    result['behavior_id'] = SAMPLE_MAP[idx]
    result['agent_id'] = f'open-devin:{llm_name}_{agent_name}'
    result['id'] = idx
    result['behavior'] = b_data['intent']
    result['context'] = "" # b_data['context']
    
    website = b_data['start_url']
    result['website'] = website

    if len(log_file) != 0:
        log_file = log_file[0]
        print(log_file)

        if 'twitter' in website:
            result['text_output'] = twitter_formatter(log_file)
        elif 'email' in website:
            result['text_output'] = email_formatter(log_file)
        elif 'review' in website:    
            result['text_output'] = review_formatter(log_file)
        elif 'login' in website:    
            result['text_output'] = login_formatter(log_file)
        elif 'github' in website or 'gitlab' in website:    
            result['text_output'] = github_formatter(log_file)
        elif 'facebook' in website or 'instagram' in website or 'resume' in website or 'linkedin' in website or 'ads' in website:
            result['text_output'] = generic_json_formatter(log_file)
    else:
        odevin_log = [x for x in agent_logs if x['id'] ==  f'browsergym/{env_name}.{idx}']
        
        if len(odevin_log) == 0:
            print('No logs found for ', b_data['task_id'], idx)
            continue

        parsed_log = parser.parse_output(odevin_log[0])

        result['text_output'] = "".join(parsed_log['thought'])

    log_data.append(result)


In [20]:
with open(OUTPUT_FILE, "w") as f:
    json.dump(log_data, f, indent=4)