In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
import sqlite3

from src.main import main_cluster_multimodal_model
from src.llm.access_2_cluster import Access2Cluster

# Approach

Use a Pre-trained LLM:
* GPT-3, GPT-3.5, or a lighter version like GPT-2 (well-suited for text generation tasks)
* Fine-tuning or adapting for specific tasks later possible

Steps:
1. HTML Processing: Extract relevant information from the HTML file.
    * Use BeautifulSoup or lxml in Python to parse and extract information from the HTML file.
2. Image Processing: Extract relevant information from the image:
    * Use image-to-text models like Tesseracts or pytesseract (OCR libraries) to extract text from the image.
    * Use OpenCV or PIL (Pillow) in Python to process the image and extract relevant information.
3. Summarize the image and HTML information and the prompt from the playwright test code using T5 model.
4. Python Processing: Parse the given playwright test code for previous step as a precondition.
5. Combine the extracted information with the prompt:
    * Combine the extracted information from the HTML and the image with the prompt for the language model.
6. Pass the combined input to the language model for generating the UI test code.

--> Run locally via notebook or script using the main.py

# Load Data

In [8]:
# Connect to the database
conn = sqlite3.connect('../data/raw/playwright_script.db')
cursor = conn.cursor()

res = cursor.execute("SELECT * FROM tests")
items = res.fetchall()

print("There are {} data.".format(len(items)))

There are 100 data.


In [9]:
# Check the first item
items[0]

('1.1',
 '[1.1] Öffne die Arbeitsmappe "Übersicht Messstellen" im Ordner "Gewässergüte".',
 '[1.1] Expected result: Die Arbeitsmappe wird geöffnet, der Analysekontext ist nicht sichtbar.',
 '.\\html\\1_1.html',
 '.\\screenshot\\1_1.png',
 '.\\test_script\\1_1.spec.ts')

# Run UI Test Generation locally (smaller model GPT-2)

In [10]:
def get_previous_id(id):
    test, step = map(int, id.split('.'))
    if step > 1:
        previous_id = f"{test}.{step - 1}"
    else:
        print(f"Test {test} has no more previous step. No context available.")

    return previous_id

In [11]:
def fetch_relevant_items(db_file, current_id):
    # Connect to SQLite database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    # Get the previous ID
    previous_id = get_previous_id(current_id)

    # Prepare the SQL query to retrieve the desired rows
    query = 'SELECT * FROM tests WHERE id IN (?, ?)'
    cursor.execute(query, (current_id, previous_id))

    # Fetch all matching rows
    items = cursor.fetchall()

    # Close the connection
    conn.close()

    return items

In [12]:
def map_items_to_args(items):
    # The file paths from the test x-1 are used as context
    html_path = items[0][3]
    image_path = items[0][4]
    precondition_path = items[0][5]
    validation_path = items[1][5]
    # The last step of the text x is used as prompt
    steps = items[1][1].split(']')
    description = steps[-1].strip()

    return {"html_path": html_path,
            "image_path": image_path,
            "precondition_path": precondition_path,
            "description": description,
            "validation_path": validation_path}

Now we can run the main function with the extracted information, we will get the generated UI test code for the test with id 1.4.

In [13]:
# Select test ID and database file
db_file = '../data/raw/playwright_script.db'
current_id = '1.2'

In [14]:
items = fetch_relevant_items(db_file, current_id)
args = map_items_to_args(items)

# To traverse folders in cluster change path
args['image_path'] = args['image_path'].replace('\\','/')

Setup Access2Cluster:

In [54]:
access2cluster = Access2Cluster()
await access2cluster.login()

In [58]:
await access2cluster.start_llm()

In [59]:
args['model'] = access2cluster

In [63]:
scores = await main_cluster_multimodal_model(**args)

2024-06-29 16:29:43 [[34msrc.main:66[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-06-29 16:29:43 [[34msrc.data.html_processor:142[0m] [DEBUG[0m] >>>> HTML elements extracted successfully. - Number of Elements: 12 - Number of Characters: 944[0m
2024-06-29 16:29:43 [[34msrc.data.python_processor:15[0m] [DEBUG[0m] >>>> Python code parsed successfully. - Lines of Code: 17[0m
2024-06-29 16:29:43 [[34msrc.main:71[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-06-29 16:29:43 [[34msrc.main:74[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-06-29 16:29:43 [[34msrc.main:76[0m] [[32mINFO[0m] >>>> Input prompt created successfully.[0m
2024-06-29 16:29:43 [[34msrc.main:79[0m] [[32mINFO[0m] >>>> Generating test case...[0m


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 1100, in emit
    msg = self.format(record)
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 943, in format
    return fmt.format(record)
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 678, in format
    record.message = record.getMessage()
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.

2024-06-29 16:30:16 [[34msrc.ui_tests.test_generation:64[0m] [DEBUG[0m] >>>> Generated code saved to './pred_test_script/1_2.pred.py'[0m
2024-06-29 16:30:16 [[34msrc.data.python_processor:15[0m] [DEBUG[0m] >>>> Python code parsed successfully. - Lines of Code: 16[0m
2024-06-29 16:30:16 [[34msrc.main:88[0m] [[32mINFO[0m] >>>> Test case generated for 1_2.[0m
2024-06-29 16:30:16 [[34msrc.data.python_processor:15[0m] [DEBUG[0m] >>>> Python code parsed successfully. - Lines of Code: 16[0m


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 1100, in emit
    msg = self.format(record)
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 943, in format
    return fmt.format(record)
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 678, in format
    record.message = record.getMessage()
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\logging\__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\Johannes\anaconda3\envs\PSDA\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.

In [64]:
scores

{'bleu': 0.7214177257714296}

# More Developing: Using modular source code
Possible to update function content in the following files, but no RENAMING or DELETING or changing function signature!:
* src/main.py: Main function for UI test generation.
    * Change constant Max length (max length of the input text for the model)
* src/ui_tests/test_generation.py: Generate the UI test code using the language model.
    * Expand model selection: Just add new case statements for different models with its code.
* src/data/html_processing.py: Extract relevant information from the HTML file.
* src/data/image_processing.py: Extract relevant information from the image.
* src/data/python_processing.py: Parse the given playwright test code for previous step as a precondition.
