In [1]:
import os
import openai
from dotenv import load_dotenv
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.common.by import By

In [2]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
driver = webdriver.Chrome('drivers/chromedriver108')

# Global Context

In [4]:
base = 'https://www.aircanada.com/us/en/aco/home.html'

In [5]:
driver.get(base)

In [6]:
doc = bs(driver.find_element(By.TAG_NAME, 'html').get_attribute('innerHTML'), 'html.parser')

In [7]:
title = str(doc.head.title) + '\n'
metas = '\n'.join(list(map(lambda x: str(x), doc.head.find_all('meta'))))
head_string = title + metas

In [8]:
category_prompt = '''The following text is the url and content of head tag of a website.
url: {0}
head: {1}
The website category is:'''.format(base, head_string)

In [9]:
category_prompt

'The following text is the url and content of head tag of a website.\nurl: https://www.aircanada.com/us/en/aco/home.html\nhead: <title>Air Canada</title>\n<meta charset="utf-8"/>\n<meta content="IE=edge" http-equiv="X-UA-Compatible"/>\n<meta content="max-age=0" http-equiv="cache-control"/>\n<meta content="no-store" http-equiv="cache-control"/>\n<meta content="0" http-equiv="expires"/>\n<meta content="Tue, 01 Jan 1980 1:00:00 GMT" http-equiv="expires"/>\n<meta content="no-cache" http-equiv="pragma"/>\n<meta content="width=device-width, initial-scale=1" name="viewport"/>\n<meta content="#fffffe" media="(prefers-color-scheme: light)" name="theme-color"/>\n<meta content="#fffffe" media="(prefers-color-scheme: dark)" name="theme-color"/>\n<meta content="#000000" name="msapplication-TileColor"/>\n<meta content="/achome/assets/img/favicon/mstile-144x144.png" name="msapplication-TileImage"/>\nThe website category is:'

In [10]:
category_res = openai.Completion.create(
    model="text-davinci-003",
    prompt=category_prompt,
    temperature=0,
    top_p=1,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    max_tokens=256
)

category_res

<OpenAIObject text_completion id=cmpl-6fay96hALvWzX3yDeaLQYDDBaUVmt at 0x112eca950> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": " Travel"
    }
  ],
  "created": 1675371577,
  "id": "cmpl-6fay96hALvWzX3yDeaLQYDDBaUVmt",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 1,
    "prompt_tokens": 308,
    "total_tokens": 309
  }
}

In [11]:
global_context = 'The website name is {0}. This website is a {1} website.'.format(
    driver.current_url,
    category_res.choices[0].text.strip()
)

global_context

'The website name is https://www.aircanada.com/us/en/aco/home.html. This website is a Travel website.'

# Page Context

In [12]:
driver.get('https://www.aircanada.com/us/en/aco/home.html/')

In [81]:
body_doc = bs(driver.find_element(By.TAG_NAME, 'body').get_attribute('innerHTML'), 'html.parser')

In [82]:
headings = []
for h in range(6):
    headings = [*headings, *list(map(lambda x: x.get_text().strip(), body_doc.find_all('h' + str(h + 1))))]

In [83]:
local_prompt = '''{0}
We are in page {1} of the website. The headings in this page are: {2}.
The page main functionality is'''.format(
    global_context,
    driver.current_url,
    ', '.join(headings)
)

print(local_prompt)

The website name is https://www.aircanada.com/us/en/aco/home.html. This website is a Travel website.
We are in page https://www.aircanada.com/us/en/aco/home.html of the website. The headings in this page are: Air Canada -  Official website, Top of page banner, Offers, Please wait while content is loading., Please wait while content is loading., Customer support, Special offers, About Air Canada, Earn up to 100K pts, worth $1,250 in travel value, You’ve got to go North-ER, Access a flight booking.
The page main functionality is


In [84]:
local_res = openai.Completion.create(
    model="text-davinci-003",
    prompt=local_prompt,
    temperature=0,
    top_p=1,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    max_tokens=256,
    stop=['.']
)

print(local_res.choices[0].text)

 to provide information about Air Canada and to allow customers to book flights


In [85]:
local_context = '''{0}. We are in page {1}. The page main functionality is {2}.'''.format(
    global_context,
    driver.current_url,
    local_res.choices[0].text.strip()
)

local_context

'The website name is https://www.aircanada.com/us/en/aco/home.html. This website is a Travel website.. We are in page https://www.aircanada.com/us/en/aco/home.html. The page main functionality is to provide information about Air Canada and to allow customers to book flights.'

# Input Context

In [90]:
print(body_doc.find_all('form')[0].find_all('input')[0])

<input aria-describedby="bkmgMyBookings_bookingRefNumberSubscript" aria-disabled="false" aria-invalid="true" aria-label="Booking reference/Ticket number" autocomplete="on" autocorrect="off" class="abc-form-element-input abc-form-element-main text-transform-uppercase ng-valid ng-star-inserted ng-touched ng-dirty" id="bkmgMyBookings_bookingRefNumber" maxlength="13" name="bkmgMyBookings_bookingRefNumber" placeholder="" spellcheck="false" type="text"/>


In [123]:
input_prompt = '''{0}
We want to complete the form on this page.
The input on the page has label {1}, and its html is {2}. Provide a value for the input:'''.format(
    local_context,
    'Booking Reference/Ticket Number',
    body_doc.find_all('form')[0].find_all('input')[0]
)

input_prompt

'The website name is https://www.aircanada.com/us/en/aco/home.html. This website is a Travel website.. We are in page https://www.aircanada.com/us/en/aco/home.html. The page main functionality is to provide information about Air Canada and to allow customers to book flights.\nWe want to complete the form on this page.\nThe input on the page has label Booking Reference/Ticket Number, and its html is <input aria-describedby="bkmgMyBookings_bookingRefNumberSubscript" aria-disabled="false" aria-invalid="true" aria-label="Booking reference/Ticket number" autocomplete="on" autocorrect="off" class="abc-form-element-input abc-form-element-main text-transform-uppercase ng-valid ng-star-inserted ng-touched ng-dirty" id="bkmgMyBookings_bookingRefNumber" maxlength="13" name="bkmgMyBookings_bookingRefNumber" placeholder="" spellcheck="false" type="text"/>. Provide a value for the input:'

In [124]:
input_res = openai.Completion.create(
    model="text-davinci-003",
    prompt=input_prompt,
    temperature=0.7,
    top_p=1,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    max_tokens=256,
)

print(input_res.choices[0].text.strip())

MyBookingReference12345


# Error Context

In [125]:
sample_error = '''Your booking reference is a sequence of six letters and/or numbers.

Your ticket number is a sequence of 13 numbers.

Please make any necessary corrections and try again, or call Air Canada Reservations for assistance.'''

In [127]:
values = [
    "MyBookingReference12345",
]

values_string = '"' + '", "'.join(values)+ '"'

values_string

'"MyBookingReference12345"'

In [128]:
error_prompt = '''{0}
The input on the page has label {1}, and its html is {2}.
We entered the following values for input: {3} and we got the following error: "{4}".
A valid input sample is:'''.format(
    local_context,
    'Departure',
    body_doc.find_all('form')[0].find_all('input')[0],
    values_string,
    sample_error
)

error_prompt


'The website name is https://www.aircanada.com/us/en/aco/home.html. This website is a Travel website.. We are in page https://www.aircanada.com/us/en/aco/home.html. The page main functionality is to provide information about Air Canada and to allow customers to book flights.\nThe input on the page has label Departure, and its html is <input aria-describedby="bkmgMyBookings_bookingRefNumberSubscript" aria-disabled="false" aria-invalid="true" aria-label="Booking reference/Ticket number" autocomplete="on" autocorrect="off" class="abc-form-element-input abc-form-element-main text-transform-uppercase ng-valid ng-star-inserted ng-touched ng-dirty" id="bkmgMyBookings_bookingRefNumber" maxlength="13" name="bkmgMyBookings_bookingRefNumber" placeholder="" spellcheck="false" type="text"/>.\nWe entered the following values for input: "MyBookingReference12345" and we got the following error: "Your booking reference is a sequence of six letters and/or numbers.\n\nYour ticket number is a sequence of 

In [129]:
error_res = openai.Completion.create(
    model="text-davinci-003",
    prompt=error_prompt,
    temperature=0,
    top_p=1,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    max_tokens=256,
)

print(error_res.choices[0].text.strip())

"ABC123456789".
