In [43]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
import sqlite3

from src.main import main_cluster_multimodal_model
from src.llm.access_2_cluster import Access2Cluster

# Approach

Use a Pre-trained LLM:
* GPT-3, GPT-3.5, or a lighter version like GPT-2 (well-suited for text generation tasks)
* Fine-tuning or adapting for specific tasks later possible

Steps:
1. HTML Processing: Extract relevant information from the HTML file.
    * Use BeautifulSoup or lxml in Python to parse and extract information from the HTML file.
2. Image Processing: Extract relevant information from the image:
    * Use image-to-text models like Tesseracts or pytesseract (OCR libraries) to extract text from the image.
    * Use OpenCV or PIL (Pillow) in Python to process the image and extract relevant information.
3. Summarize the image and HTML information and the prompt from the playwright test code using T5 model.
4. Python Processing: Parse the given playwright test code for previous step as a precondition.
5. Combine the extracted information with the prompt:
    * Combine the extracted information from the HTML and the image with the prompt for the language model.
6. Pass the combined input to the language model for generating the UI test code.

--> Run locally via notebook or script using the main.py

# Load Data

In [45]:
# Connect to the database
conn = sqlite3.connect('../data/raw/playwright_script.db')
cursor = conn.cursor()

res = cursor.execute("SELECT * FROM tests")
items = res.fetchall()

print("There are {} data.".format(len(items)))

There are 100 data.


In [4]:
# Check the first item
items[0]

('1.1',
 '[1.1] Öffne die Arbeitsmappe "Übersicht Messstellen" im Ordner "Gewässergüte".',
 '[1.1] Expected result: Die Arbeitsmappe wird geöffnet, der Analysekontext ist nicht sichtbar.',
 '.\\html\\1_1.html',
 '.\\screenshot\\1_1.png',
 '.\\test_script\\1_1.spec.ts')

# Run UI Test Generation locally (smaller model GPT-2)

In [5]:
def get_previous_id(id):
    test, step = map(int, id.split('.'))
    if step > 1:
        previous_id = f"{test}.{step - 1}"
    else:
        print(f"Test {test} has no more previous step. No context available.")

    return previous_id

In [6]:
def fetch_relevant_items(db_file, current_id):
    # Connect to SQLite database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    # Get the previous ID
    previous_id = get_previous_id(current_id)

    # Prepare the SQL query to retrieve the desired rows
    query = 'SELECT * FROM tests WHERE id IN (?, ?)'
    cursor.execute(query, (current_id, previous_id))

    # Fetch all matching rows
    items = cursor.fetchall()

    # Close the connection
    conn.close()

    return items

In [7]:
def map_items_to_args(items):
    # The file paths from the test x-1 are used as context
    html_path = items[0][3]
    image_path = items[0][4]
    precondition_path = items[0][5]
    # The last step of the text x is used as prompt
    steps = items[1][1].split(']')
    prompt = steps[-1].strip()

    return {"html_path": html_path,
            "image_path": image_path,
            "precondition_path": precondition_path,
            "prompt": prompt}

Now we can run the main function with the extracted information, we will get the generated UI test code for the test with id 1.4.

In [8]:
# Select test ID and database file
db_file = '../data/raw/playwright_script.db'
current_id = '1.4'

In [75]:
items = fetch_relevant_items(db_file, current_id)
args = map_items_to_args(items)

# To traverse folders in cluster change path
args['image_path'] = args['image_path'].replace('\\','/')

Setup Access2Cluster:

In [84]:
access2cluster = Access2Cluster()
await access2cluster.login()

In [88]:
await access2cluster.start_llm()
args['model'] = access2cluster

In [89]:
args['model'] = access2cluster
description = "I will describe a UI test in German and you will generate Playwright code. Don't explain the code, just generate the code block itself. You get the HTML and a screenshot from the website. You must continue the code."
args['description'] = description

In [90]:
await main_cluster_multimodal_model(**args)

2024-06-22 15:46:37 [[34msrc.main:60[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-06-22 15:46:37 [[34msrc.data.html_processor:27[0m] [DEBUG[0m] >>>> HTML content parsed successfully. - Lines of Code: 41[0m
2024-06-22 15:46:37 [[34msrc.data.python_processor:15[0m] [DEBUG[0m] >>>> Python code parsed successfully. - Lines of Code: 17)[0m
2024-06-22 15:46:37 [[34msrc.main:67[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-06-22 15:46:37 [[34msrc.main:69[0m] [[32mINFO[0m] >>>> Generating test case...[0m
2024-06-22 15:46:48 [[34msrc.ui_tests.test_generation:54[0m] [DEBUG[0m] >>>> Generated code saved to './pred_test_script/1_4.pred.py'[0m
2024-06-22 15:46:48 [[34msrc.main:80[0m] [[32mINFO[0m] >>>> Test case generated for 1_4.[0m


# More Developing: Using modular source code
Possible to update function content in the following files, but no RENAMING or DELETING or changing function signature!:
* src/main.py: Main function for UI test generation.
    * Change constant Max length (max length of the input text for the model)
* src/ui_tests/test_generation.py: Generate the UI test code using the language model.
    * Expand model selection: Just add new case statements for different models with its code.
* src/data/html_processing.py: Extract relevant information from the HTML file.
* src/data/image_processing.py: Extract relevant information from the image.
* src/data/python_processing.py: Parse the given playwright test code for previous step as a precondition.
