In [2]:
import json 
from termcolor import colored
from agent_verify.agent import Agent

In [3]:
class IntentFormalizer(Agent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def generate_task(self, instruction):
        system_prompt = f'''
You are a helpful assistant.
The user needs to talk to a customer support agent to solve their request.
However, their request in very complicated.
Your job is to extract the different tasks from their request into a list so that it is easier for the support agent to understand and solve it. 
Each task may have several constraints associated with them.
So, you must extract the constraints associated with each of the subtasks.
Out your response in json in the following format:
{{
    task1: {{
        <task>,
        constraints: [<constraints>]
    }},
    task2: {{
        <task>,
        constraints: [<constraints>]
    }},
    ...
}}
'''
        messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": instruction['ques']}]
        res = self.llm_client.complete(model = self.model_name, messages=messages, response_format="json_object").choices[0].message['content']
        res = json.loads(res)
        return res

In [4]:
def get_tasks_only(task_filter='Allrecipes'):
    task_file='agent_verify/WebVoyager/WebVoyager_data.jsonl'
    task_data = []
    with open(task_file, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                task = json.loads(line)
                if task['web_name'] == task_filter:
                    task_data.append(task)
    return task_data

In [5]:
def formalize_tasks(task_filter):
    output_file = f'formalized_tasks_{task_filter}.json'
    tasks = get_tasks_only(task_filter)
    intent_formalizer = IntentFormalizer()
    formalized_tasks = []
    for i in range(len(tasks)):
        print(i)
        print(colored(tasks[i], 'blue'))
        formalized_task = intent_formalizer.generate_task(tasks[i])
        print(colored(formalized_task, 'green'))
        formalized_tasks.append(formalized_task)
    # with open(output_file, 'w') as f:
    #     json.dump(formalized_tasks, f)
    return formalized_tasks

In [6]:
formalized_tasks = formalize_tasks('Allrecipes')

0
[34m{'web_name': 'Allrecipes', 'id': 'Allrecipes--0', 'ques': 'Provide a recipe for vegetarian lasagna with more than 100 reviews and a rating of at least 4.5 stars suitable for 6 people.', 'web': 'https://www.allrecipes.com/'}[0m
[32m{'task1': {'task': 'Provide a recipe for vegetarian lasagna', 'constraints': ['Recipe must have more than 100 reviews', 'Recipe must have a rating of at least 4.5 stars', 'Recipe must be suitable for 6 people']}}[0m
1
[34m{'web_name': 'Allrecipes', 'id': 'Allrecipes--1', 'ques': 'Find a recipe for a vegetarian lasagna that has at least a four-star rating and uses zucchini.', 'web': 'https://www.allrecipes.com/'}[0m
[32m{'task1': {'task': 'Find a recipe for vegetarian lasagna', 'constraints': ['Recipe must have at least a four-star rating', 'Recipe must use zucchini']}}[0m
2
[34m{'web_name': 'Allrecipes', 'id': 'Allrecipes--2', 'ques': 'Find a recipe for a vegetarian lasagna under 600 calories per serving that has a prep time of less than 1 hour

In [7]:
task_filter = 'Allrecipes'
output_file = f'formalized_tasks_{task_filter}.json'
with open(output_file, 'w') as f:
    json.dump(formalized_tasks, f)

In [8]:
def read_tasks(task_filter='Allrecipes'):
    file_name = f'formalized_tasks_{task_filter}.json'
    tasks = json.loads(open(file_name).read())
    return tasks

In [9]:
tasks = read_tasks('Allrecipes')

In [13]:
for task in tasks:
    print(type(task), task.keys(), task)

<class 'dict'> dict_keys(['task1']) {'task1': {'task': 'Provide a recipe for vegetarian lasagna', 'constraints': ['Recipe must have more than 100 reviews', 'Recipe must have a rating of at least 4.5 stars', 'Recipe must be suitable for 6 people']}}
<class 'dict'> dict_keys(['task1']) {'task1': {'task': 'Find a recipe for vegetarian lasagna', 'constraints': ['Recipe must have at least a four-star rating', 'Recipe must use zucchini']}}
<class 'dict'> dict_keys(['task1']) {'task1': {'task': 'Find a recipe for vegetarian lasagna', 'constraints': ['Under 600 calories per serving', 'Prep time of less than 1 hour']}}
<class 'dict'> dict_keys(['task1']) {'task1': {'task': 'Locate a recipe for vegan chocolate chip cookies on Allrecipes', 'constraints': ['Recipe must be vegan', 'Recipe must be for chocolate chip cookies', 'Recipe must be on Allrecipes', 'Recipe must have over 60 reviews', 'Recipe must have a rating of at least 4.5 stars']}}
<class 'dict'> dict_keys(['task1']) {'task1': {'task': 