In [1]:
import sqlite3
import os

from src.data.data_loading import load_config
from src.data.database import create_finetuning_data_from_db, fetch_relevant_items, map_items_to_args
from src.finetuning import create_finetuning_data_sample, save_finetuning_data_as_json
from src.json_generator.balanced_split import balanced_train_val_test_split
from src.json_generator.generate_json import generate_json
from src.input_builder import create_input_prompt
from src.utils.helpers import get_previous_id
from src.utils.logger import setup_logger

In [2]:
logger = setup_logger(__name__, level='DEBUG')  # Change to 'INFO' for less verbosity
# set working directory to the root of the project

In [3]:
os.chdir("..")
os.getcwd()

'C:\\Users\\c-beh\\PycharmProjects\\cadenza-playwright-llm'

In [4]:
db_file = './data/raw/playwright_script.db'
config = load_config("./config/config.yaml")

In [5]:
# Compact version of the rest of this notebook
generate_json(db_file, config, "finetuned_T5_sc+_html+_single")

2024-07-19 19:47:11 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-19 19:47:11 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-19 19:47:11 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-19 19:47:11 [[34msrc.input_builder:79[0m] [[32mINFO[0m] >>>> Input prompt created successfully.[0m
2024-07-19 19:47:11 [[34msrc.finetuning:33[0m] [[32mINFO[0m] >>>> Combining input and expected output into json finetuning data format...[0m
2024-07-19 19:47:11 [[34msrc.finetuning:50[0m] [[32mINFO[0m] >>>> Finetuning conversation created successfully.[0m
2024-07-19 19:47:11 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-19 19:47:11 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-19 19:47:11 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-19 19:

In [5]:
# Load ids to be used for finetuning from database
conn = sqlite3.connect(db_file)
c = conn.cursor()
c.execute('SELECT id FROM tests')
items = c.fetchall()
conn.close()

ids = [i[0] for i in items]

In [6]:
tc_ids_train, tc_ids_val, tc_ids_test = balanced_train_val_test_split()

print("Train: ", tc_ids_train, "\nVal: ", tc_ids_val,"\nTest: ", tc_ids_test)

tc_ids_train = [str(tc) for tc in tc_ids_train]
tc_ids_test = [str(tc) for tc in tc_ids_test]
tc_ids_val = [str(tc) for tc in tc_ids_val]

print(tc_ids_train,"\n", tc_ids_val,"\n", tc_ids_test)

Train:  [19  0  7  1 14  3  5 12 11 17 18 27 10  4 25] 
Val:  [ 8 16 13 26] 
Test:  [ 2 15  9 30 28]
['19', '0', '7', '1', '14', '3', '5', '12', '11', '17', '18', '27', '10', '4', '25'] 
 ['8', '16', '13', '26'] 
 ['2', '15', '9', '30', '28']


In [7]:
train_ids = []
test_ids = []
val_ids = []
for el in ids:
    if (el.split('.')[0]) in tc_ids_train:
        train_ids.append(el)
    if (el.split('.')[0]) in tc_ids_test:
        test_ids.append(el)
    if (el.split('.')[0]) in tc_ids_val:
        val_ids.append(el)

# ABOVE IS READY FOR FINETUNING -- GO BELOW TO CREATE JSONS

In [7]:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

In [8]:
# Check for added X.0 Cases
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

query = 'SELECT * FROM tests WHERE steps = ""'
cursor.execute(query)
cursor.fetchall()

[('10.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('11.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('12.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('13.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('14.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('15.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('16.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('17.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('18.0',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts'),
 ('19.0',
  '',
  '',
  '.\\html\\0_1

In [9]:
current_id = ids[0]
items = fetch_relevant_items(cursor, current_id)
print(items)

[('1.0', '', '', '.\\html\\0_1.html', '.\\screenshot\\0_1.png', '.\\test_script\\0_1.spec.ts'), ('1.1', '[1.1] Öffne die Arbeitsmappe "Übersicht Messstellen" im Ordner "Gewässergüte".', '[1.1] Expected result: Die Arbeitsmappe wird geöffnet, der Analysekontext ist nicht sichtbar.', '.\\html\\1_1.html', '.\\screenshot\\1_1.png', '.\\test_script\\1_1.spec.ts')]


In [10]:
args = map_items_to_args(items, config)
print(args.keys())
print("\n","DESC: ", args["description"],"\n","HTML: ", args["html_path"],"\n","IMG: ", args["image_path"],"\n","VAL: ", args["validation_path"],"\n","PRECON: ", args["precondition_path"] )

dict_keys(['html_path', 'image_path', 'precondition_path', 'description', 'validation_path', 'config'])

 DESC:  Öffne die Arbeitsmappe "Übersicht Messstellen" im Ordner "Gewässergüte". 
 HTML:  .\data\raw\.\html\0_1.html 
 IMG:  .\data\raw\.\screenshot\0_1.png 
 VAL:  .\data\raw\.\test_script\1_1.spec.ts 
 PRECON:  .\data\raw\.\test_script\0_1.spec.ts


In [11]:
args['image_path'] = args['image_path'].replace('\\','/')
#args['html_path'] = args['html_path'].replace('\\','/')
#args['precondition_path'] = args['precondition_path'].replace('\\','/')
#args['validation_path'] = args['validation_path'].replace('\\','/')
print(args['image_path'])

./data/raw/./screenshot/0_1.png


In [39]:
convo = create_finetuning_data_sample(**args)

print(convo)

2024-07-18 15:18:49 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-18 15:18:49 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-18 15:18:49 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-18 15:18:49 [[34msrc.input_builder:79[0m] [[32mINFO[0m] >>>> Input prompt created successfully.[0m
2024-07-18 15:18:49 [[34msrc.finetuning:34[0m] [[32mINFO[0m] >>>> Combining input and expected output into json finetuning data format...[0m
2024-07-18 15:18:49 [[34msrc.finetuning:51[0m] [[32mINFO[0m] >>>> Finetuning conversation created successfully.[0m
{'id': '01_01', 'image': '.\\data\\raw\\.\\screenshot\\0_1.png', 'conversations': [{'from': 'human', 'value': '### Simplified HTML Content:\nButtons: \n{"id": "navigationTrigger", "class": "button button-icon button-borderless"}\n{"id": "workbook-create", "class": "button workbook-create button-icon"}\n{"id": "RDxYr2

In [33]:
current_id = "1.2"
previous_id = get_previous_id(current_id)
if not previous_id:
    print([])

query_prev = f"SELECT * FROM tests WHERE id = {previous_id}"
query_curr = f"SELECT * FROM tests WHERE id = {current_id}"
cursor.execute(query_prev)
prev = cursor.fetchall()
cursor.execute(query_curr)
curr = cursor.fetchall()

comp = prev + curr


PASST


# -- CREATE JSONS HERE -- 

In [8]:
finetuning_data_test = create_finetuning_data_from_db(test_ids, db_file, config)
finetuning_data_train = create_finetuning_data_from_db(train_ids, db_file, config)
finetuning_data_val = create_finetuning_data_from_db(val_ids, db_file, config)

2024-07-18 17:34:02 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-18 17:34:02 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-18 17:34:02 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-18 17:34:02 [[34msrc.input_builder:79[0m] [[32mINFO[0m] >>>> Input prompt created successfully.[0m
2024-07-18 17:34:02 [[34msrc.finetuning:33[0m] [[32mINFO[0m] >>>> Combining input and expected output into json finetuning data format...[0m
2024-07-18 17:34:02 [[34msrc.finetuning:50[0m] [[32mINFO[0m] >>>> Finetuning conversation created successfully.[0m
2024-07-18 17:34:02 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-18 17:34:02 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-18 17:34:02 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-18 17:

In [9]:
save_finetuning_data_as_json(finetuning_data_test, name= "test_template_1")
save_finetuning_data_as_json(finetuning_data_train, name= "train_template_1")
save_finetuning_data_as_json(finetuning_data_val, name= "val_template_1")

2024-07-18 17:34:20 [[34msrc.finetuning:70[0m] [[32mINFO[0m] >>>> Finetuning data saved as JSON file: ./data/finetuning/s21_finetuning_data_test_template_1_20240718-173420.json[0m
2024-07-18 17:34:20 [[34msrc.finetuning:70[0m] [[32mINFO[0m] >>>> Finetuning data saved as JSON file: ./data/finetuning/s61_finetuning_data_train_template_1_20240718-173420.json[0m
2024-07-18 17:34:20 [[34msrc.finetuning:70[0m] [[32mINFO[0m] >>>> Finetuning data saved as JSON file: ./data/finetuning/s18_finetuning_data_val_template_1_20240718-173420.json[0m


### Simplified HTML Content:
Buttons: 
{"id": "navigationTrigger", "class": "button button-icon button-borderless"}
{"id": "workbook-create", "class": "button workbook-create button-icon"}
{"id": "RDxYr2vFytOijWjelj7P1", "class": "button navigation-menu button-icon"}
{"text": "Arbeitsmappe importieren", "class": "button"}
{"text": "Repository neu einlesen …", "class": "button"}
Inputs: 
{"class": "select2-search__field", "aria-label": "Suchen nach …", "type": "search", "placeholder": "Suchen nach …"}
Links: 
{"text": "Zum Navigatorbaum springen", "id": "skip-to-navigator", "class": "button button-primary"}
{"text": "Zum Hauptbereich springen", "id": "skip-to-content", "class": "button button-primary"}
{"text": "Startseite", "id": "home", "class": "button button-icon button-borderless"}
{"text": "Karte", "class": "button button-icon button-borderless d-topnav--map-button"}
{"text": "Verzeichnis Tutorial", "id": "d-nav-tree-node_ROOT-Tutorial_firstContent", "class": "d-nav-tree-node--mai

# Javascript demarkation

In [None]:
#ids
#db
#config

In [5]:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

current_id = "1.1"
items = fetch_relevant_items(cursor, current_id)
if len(items) < 2:
    logger.warning(f"Insufficient data for ID {current_id}. Expected 2 rows but got {len(items)}.")
    
args = map_items_to_args(items, config)
args['image_path'] = args['image_path'].replace('\\', '/')


In [6]:
data_sample = create_finetuning_data_sample(**args)
print(data_sample)

2024-07-19 14:20:24 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-19 14:20:25 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-19 14:20:25 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-19 14:20:25 [[34msrc.input_builder:79[0m] [[32mINFO[0m] >>>> Input prompt created successfully.[0m
2024-07-19 14:20:25 [[34msrc.finetuning:33[0m] [[32mINFO[0m] >>>> Combining input and expected output into json finetuning data format...[0m
2024-07-19 14:20:25 [[34msrc.finetuning:50[0m] [[32mINFO[0m] >>>> Finetuning conversation created successfully.[0m
{'id': '01_01', 'image': './data/raw/./screenshot/0_1.png', 'conversations': [{'from': 'human', 'value': '### Simplified HTML Content:\nButtons: \nButton ids: navigationTrigger, workbook-create, RDxYr2vFytOijWjelj7P1\nButton classes: button button-icon button-borderless, button workbook-create button-icon, button nav

In [7]:
for el in data_sample["conversations"]:    
    #print("\n NEW INDEX",el,"\nINDEX DONE")
    print(el["from"]) 
    print(el["value"])

human
### Simplified HTML Content:
Buttons: 
Button ids: navigationTrigger, workbook-create, RDxYr2vFytOijWjelj7P1
Button classes: button button-icon button-borderless, button workbook-create button-icon, button navigation-menu button-icon, button, button
Button texts: Arbeitsmappe importieren, Repository neu einlesen …
Inputs: 
Input classes: select2-search__field
Input aria-labels: Suchen nach …
Input types: search
Input placeholders: Suchen nach …
Links: 
Link texts: Zum Navigatorbaum springen, Zum Hauptbereich springen, Startseite, Karte, Verzeichnis Tutorial, Verzeichnis Gewässergüte, Verzeichnis Automobile, Verzeichnis Ergänzende Geodaten, Verzeichnis Zentrale Dienste, Verzeichnis Meine Arbeitsmappen, Arbeitsmappe Zugangsdaten, Zugangsdaten, disy Cadenza, Tutorials, Lernmodulen, Onlinehilfe, Webseite, Lernmodulen, 1, 2, 3, 4, 5, 6, 7, disy Cadenza v9.4.71, © Disy Informationssysteme GmbH, Über Disy
Link ids: skip-to-navigator, skip-to-content, home, d-nav-tree-node_ROOT-Tutorial_

In [33]:
test = " test "

test_w_dMark = "'''javascript" + test + "'''"

print(test_w_dMark)

'''javascript test '''


In [5]:
print("```javascript\nimport { test, expect } from '@playwright/test';\nimport { writeFileSync } from 'fs';\n\n\ntest('test', async ({ page }) => {\n  await page.goto('http://localhost:8080/cadenza/');\n  await page.getByRole('link', { name: 'Anmelden' }).click();\n  await page.getByLabel('Benutzername *').click();\n  await page.getByLabel('Benutzername *').fill('Admin');\n  await page.getByLabel('Benutzername *').press('Tab');\n  await page.getByPlaceholder(' ').fill('Admin');\n  await page.getByRole('button', { name: 'Anmelden' }).click();\n  await page.getByText('Verzeichnis Gewässergüte', { exact: true }).click();\n  const parentElement = await page.getByText('Arbeitsmappe Übersicht Messstellen').locator('..');\n  await parentElement.locator('.d-icon.d-icon-bold.status-icon').click(); \n\n});\n```")

```javascript
import { test, expect } from '@playwright/test';
import { writeFileSync } from 'fs';


test('test', async ({ page }) => {
  await page.goto('http://localhost:8080/cadenza/');
  await page.getByRole('link', { name: 'Anmelden' }).click();
  await page.getByLabel('Benutzername *').click();
  await page.getByLabel('Benutzername *').fill('Admin');
  await page.getByLabel('Benutzername *').press('Tab');
  await page.getByPlaceholder(' ').fill('Admin');
  await page.getByRole('button', { name: 'Anmelden' }).click();
  await page.getByText('Verzeichnis Gewässergüte', { exact: true }).click();
  const parentElement = await page.getByText('Arbeitsmappe Übersicht Messstellen').locator('..');
  await parentElement.locator('.d-icon.d-icon-bold.status-icon').click(); 

});
```
