In [16]:
import os
import time
import json
from dotenv import load_dotenv

import openai

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

In [2]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
URL = "http://localhost:8080"

In [4]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(URL)

In [5]:
driver.get(URL)
driver.find_element(By.XPATH, '//BODY/APP-ROOT[1]/DIV[1]/NAV[1]/DIV[1]/UL[1]/LI[2]/A[1]').click()
driver.find_element(By.XPATH, '//BODY/APP-ROOT[1]/DIV[1]/NAV[1]/DIV[1]/UL[1]/LI[2]/UL[1]/LI[1]/A[1]').click()

In [77]:
system_prompt = '''
You are FormGPT, an AI assistant embedded in a crawler that has the task of generating different values for form inputs, and finally generating Selenium assertions based on these generated values.
Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies.

GOALS:
1. Given a form HTML, understand the purpose of the form, and generate values for each input in the form.
2. Based on page and form's HTML, generate context for better value generation of the form.
3. Understand the outcome of the form submission, save it to memory, and generate new values based on the feedback given from form submission.
4. Generate Selenium test cases and assertions based on the generated values for inputs that cover edge cases of form submission.
5. Save any information that might be useful for future, and use them when needed.

WORKFLOW:
1. Summarize the forms given by the user.
2. Understand the intent of the website using the <head> content and it's url
3. Gather information relevant to the form from webpage's textual content and save it. Summarize all the information that can help filling the form with correct values.
4. Based on the form intent, inputs, and context of the form, generate values for the form. You are allowed to use fill_inputs command only at this step.
5. Submit the form. You are allowed to use submit_form command only at this step.
6. Check for the submission results and gather feedback from the form.
7. Save the submission values and the results for later use.
8. Add feedback information to the context, and repeat steps 4-7.
9. Based on the submission values and results, generate Selenium assertions for each form input.
10. Based on the assertions in the previous step, generate Selenium test-cases for the form.

CONSTRAINTS:
1. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
2. No user assistance
3. Exclusively use the commands listed in double quotes e.g. "command name"
4. You are not allowed to skip any of the workflow steps.

COMMANDS:
1. get_data: Get Input Data for Workflow Step, args: "workflow_step": "<workflow_step_number>"
2. output_thoughts: Output the Requested Value to User, args: "output": ""
3. fill_inputs: Fill Input, args: "xpaths": "<list_of_xpaths>", "values": "<list_of_values>"
4. submit_form: Submit the Form, args: None

Performance Evaluation:
1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.
2. Constructively self-criticize your big-picture behavior constantly.
3. Reflect on past decisions and strategies to refine your approach.
4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.
Write all code to a file.

You should only respond in JSON format as described below
Response Format:

{
    \"thoughts": {
        \"text\": \"thought\",
        \"reasoning\": \"reasoning\",
        \"plan\": \"- short bulleted\n- list that conveys\n- long-term plan\",
        \"criticism\": \"constructive self-criticism\",
        \"speak\": \"thoughts summary to say to user\"
    },
    \"command\": {
        \"name\": \"command name\",
        \"args\": {
            \"arg name\": \"value\"
        }
    }
}

Ensure the response can be parsed by Python json.loads
'''

In [36]:
html = driver.find_element(By.TAG_NAME, 'body').get_attribute('outerHTML')

## Initialize model

In [37]:
messages = [
    {
        "role": "system",
        "content": system_prompt,
    },
    {
        "role": "user",
        "content": "Determine which next command to use, and respond using the format specified above:"
    }
]

In [38]:
res1 = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=messages,
)

In [41]:
thoughts = json.loads(res1.choices[0].message.content)
print(res1.choices[0].message.content)

{
    "thoughts": {
        "text": "I need to first understand the form and its purpose before generating values for inputs and submitting it. I also need to check if there are any relevant contextual information that might help in generating better values for inputs.",
        "reasoning": "Without understanding the form and relevant context, I might generate pointless values. It's important to know the purpose of the form and the information requested so that I can generate realistic and relevant values.",
        "plan": "- Summarize the forms given by the user using 'get_data' command.\n- Parse <head> content and webpage URL to understand the intent of the website.\n- Gather information relevant to the form from webpage's textual content and save it. This context can be used to better generate values for the form. \n- Use the information from step 2 and 3 to generate values for each input field using a simple strategy.\n- Submit the form.\n- Check the submission result and gather 

## Summarize Form

In [42]:
form_html = driver.find_element(By.TAG_NAME, 'form').get_attribute('outerHTML')

In [43]:
form_html

'<form _ngcontent-cxg-c36="" novalidate="" method="get" id="search-owner-form" class="form-horizontal ng-untouched ng-pristine ng-valid"><div _ngcontent-cxg-c36="" class="form-group"><div _ngcontent-cxg-c36="" id="lastNameGroup" class="control-group"><label _ngcontent-cxg-c36="" class="col-sm-2 control-label">Last name </label><div _ngcontent-cxg-c36="" class="col-sm-10"><input _ngcontent-cxg-c36="" size="30" maxlength="80" id="lastName" name="lastName" value="" class="form-control ng-untouched ng-pristine ng-valid"><span _ngcontent-cxg-c36="" class="help-inline"></span></div></div></div><div _ngcontent-cxg-c36="" class="form-group"><div _ngcontent-cxg-c36="" class="col-sm-offset-2 col-sm-10"><button _ngcontent-cxg-c36="" type="submit" class="btn btn-default">Find Owner</button></div></div></form>'

In [56]:
messages = [
    {
        "role": "system",
        "content": system_prompt,
    },
    {
        "role": "user",
        "content": f"You are performing the step 1 in the workflow. The form's html is: {form_html}. Summarize the form."
    },
    {
        "role": "user",
        "content": "Determine which next command to use, and respond using the format specified above:"
    }
]

In [57]:
res2 = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=messages,
)

In [58]:
thoughts = json.loads(res2.choices[0].message.content)
print(res2.choices[0].message.content)

{
    "thoughts": {
        "text": "The form has two input fields, lastName with id lastName, and a search button. The form is submitted using get method without any additional action. The form is used to search for owner given their last name.",
        "reasoning": "Summarizing the form html helps me to remember the components of the form and what it does.",
        "plan": "Next step is to understand the intention of the website and gather any relevant information from the webpage's textual content.",
        "criticism": "",
        "speak": "I have summarized the form. I will now try to understand the website's intention and extract information about the form."
    },
    "command": {
        "name": "get_data",
        "args": {
            "workflow_step": "2"
        }
    }
}


In [60]:
summary = thoughts["thoughts"]["text"]

In [61]:
summary

'The form has two input fields, lastName with id lastName, and a search button. The form is submitted using get method without any additional action. The form is used to search for owner given their last name.'

## Global Context

In [64]:
head = driver.find_element(By.TAG_NAME, 'head')
head_html = head.get_attribute('outerHTML')

In [65]:
head_html

'<head>\n  <meta charset="utf-8">\n  <title>SpringPetclinicAngular</title>\n  <base href="/petclinic">\n\n  <meta name="viewport" content="width=device-width, initial-scale=1">\n  <link rel="icon" type="image/x-icon" href="favicon.ico">\n\n<style>html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body{margin:0}@media print{*,*:before,*:after{color:#000!important;text-shadow:none!important;background:transparent!important;box-shadow:none!important}}*{box-sizing:border-box}*:before,*:after{box-sizing:border-box}html{font-size:10px;-webkit-tap-highlight-color:rgba(0,0,0,0)}body{font-family:Helvetica Neue,Helvetica,Arial,sans-serif;font-size:14px;line-height:1.42857143;color:#333;background-color:#fff}html{font-family:sans-serif;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body{margin:0}@media print{*,*:before,*:after{background:transparent!important;color:#000!important;box-shadow:none!important;text-shadow:none!important}}*{box-sizing:border-bo

In [66]:
messages = [
    {
        "role": "system",
        "content": system_prompt,
    },
    {
        "role": "user",
        "content": f"You are performing the step 2 in the workflow. The head's html is: {head_html}. Summarize the page's intention."
    },
    {
        "role": "user",
        "content": "Determine which next command to use, and respond using the format specified above:"
    }
]

In [67]:
res3 = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=messages,
)

In [68]:
thoughts = json.loads(res3.choices[0].message.content)
print(res3.choices[0].message.content)

{"thoughts": {"text":"Extracting relevant keywords from the <head> content gives insight about this page's purpose. This page is titled 'SpringPetclinicAngular' which suggests it to be a pet clinic website using the Spring framework, and Angular framework. Base href indicates the relative URL to the root of this website. The viewport sets the initial scale of the page to the actual size of the device the user is using to view the website.", "reasoning": "Understanding the basic content of the <head> helps me determine the purpose of the page, which can help with generating values for forms.", "plan": "Based on the purpose of the website, I can gather more information about the pet clinic such as what types of pets it has in treatment, what services are offered, and what pet owners can expect during their visit. This would require searching for more information on the website.", "criticism": "I didn't find any issues with my approach in this step.", "speak": "Based on my analysis, this 

In [73]:
global_context = thoughts['thoughts']['speak']
print(global_context)

Based on my analysis, this page appears to belong to a pet clinic which uses the Spring and Angular frameworks. I will now gather more information to help my value generation of forms on this page.


## Local Context

In [78]:
page_text = driver.find_element(By.TAG_NAME, 'body').text

In [87]:
messages = [
    {
        "role": "system",
        "content": system_prompt,
    },
    {
        "role": "user",
        "content": f"""
        You are performing the step 3 in the workflow.
        Form Summary: {summary}
        Application Context: {global_context}
        Page Text: {page_text}
        Summarize the information relevant to the form.
        """
    },
    {
        "role": "user",
        "content": "Determine which next command to use, and respond using the format specified above:"
    }
]

In [88]:
print(messages[1]["content"])


        You are performing the step 3 in the workflow.
        Form Summary: The form has two input fields, lastName with id lastName, and a search button. The form is submitted using get method without any additional action. The form is used to search for owner given their last name.
        Application Context: Based on my analysis, this page appears to belong to a pet clinic which uses the Spring and Angular frameworks. I will now gather more information to help my value generation of forms on this page.
        Page Text: HOME
OWNERS
VETERINARIANS
PET TYPES
SPECIALTIES
Owners
Last name
Find Owner
Name Address City Telephone Pets
George Franklin 110 W. Liberty St. Madison 6085551023
Leo
Betty Davis 638 Cardinal Ave. Sun Prairie 6085551749
Basil
Eduardo Rodriquez 2693 Commerce St. McFarland 6085558763
Jewel
Rosy
Harold Davis 563 Friendly St. Windsor 6085553198
Iggy
Peter McTavish 2387 S. Fair Way Madison 6085552765
George
Jean Coleman 105 N. Lake St. Monona 6085552654
Max
Samantha
J

In [91]:
res4 = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=messages,
)

In [92]:
thoughts = json.loads(res4.choices[0].message.content)
print(res4.choices[0].message.content)

{
    "thoughts": {
        "text": "I will use the fill_inputs command to generate values for the input of the form. Based on the textual content of the page, I can gather that this form is used to search for the owners of pets by last name.",
        "reasoning": "Gathering information about a form and the context in which it appears is helpful to generate values for the form. In this case, the text on the page notes that the form is for searching owners by last name which will help to generate proper inputs.",
        "plan": "- Use fill_inputs command to generate values for form inputs. \n- Use feedback from form submission if necessary to improve value generation",
        "criticism": "",
        "speak": "I will use the fill_inputs command to generate values for the input of the form. Based on the textual content of the page, I can gather that this form is used to search for the owners of pets by last name."
    },
    "command": {
        "name": "fill_inputs",
        "args": 