In [1]:
import sqlite3
import os

from src.data.data_loading import load_config
from src.data.database import create_finetuning_data_from_db
from src.finetuning import create_finetuning_data_sample, save_finetuning_data_as_json
from src.balanced_split import balanced_train_val_test_split
from src.input_builder import create_input_prompt
from src.utils.logger import setup_logger
from src.data.code_processor import parse_code

In [2]:
logger = setup_logger(__name__, level='DEBUG')  # Change to 'INFO' for less verbosity

In [3]:
# set working directory to the root of the project
os.chdir("..")
os.getcwd()

'C:\\Users\\c-beh\\PycharmProjects\\cadenza-playwright-llm'

In [7]:
db_file = './data/raw/playwright_script.db'
config = load_config("./config/config.yaml")

In [8]:
# Load ids to be used for finetuning from database
conn = sqlite3.connect(db_file)
c = conn.cursor()
c.execute('SELECT id FROM tests')
items = c.fetchall()
conn.close()

ids = [i[0] for i in items][0:] # Skip the first id since it is not possible to get the previous id

In [9]:
tc_ids_train, tc_ids_val, tc_ids_test = balanced_train_val_test_split()

print("Train: ", tc_ids_train, "\nVal: ", tc_ids_val,"\nTest: ", tc_ids_test)

Train:  [19  0  7  1 14  3  5 12 11 17 18 27 10  4 25] 
Val:  [ 8 16 13 26] 
Test:  [ 2 15  9 30 28]


In [88]:
inte = [13, 17]
#map(str, inte)
inte = [str(y) for y in inte]
print(inte)
print(type(inte[0]))

['13', '17']
<class 'str'>


In [89]:
print(ids)

['1.1', '1.2', '1.3', '1.4', '2.1', '2.2', '3.1', '3.2', '4.1', '4.2', '4.3', '4.4', '4.5', '4.6', '4.7', '5.1', '5.2', '5.3', '5.4', '5.5', '7.1', '7.2', '7.3', '7.4', '8.1', '8.2', '8.3', '8.4', '8.5', '8.6', '9.1', '9.2', '9.3', '9.4', '9.5', '9.6', '10.1', '10.2', '10.3', '10.4', '10.5', '10.6', '11.1', '11.2', '11.3', '11.4', '13.1', '13.2', '13.3', '15.1', '15.2', '15.3', '15.4', '15.5', '17.1', '17.2', '17.3', '17.4', '19.1', '19.2', '19.3', '19.4', '26.1', '26.2', '26.3', '26.4', '26.5', '26.6', '28.1', '28.2', '28.3', '28.4', '12.1', '12.2', '12.3', '12.4', '12.5', '14.1', '14.2', '14.3', '16.1', '16.2', '16.3', '18.1', '18.2', '18.3', '25.1', '25.2', '25.3', '25.4', '25.5', '27.1', '27.2', '27.3', '27.4', '27.5', '30.1', '30.2', '30.3', '30.4']


In [10]:
tc_ids_train = [str(tc) for tc in tc_ids_train]
tc_ids_test = [str(tc) for tc in tc_ids_test]
tc_ids_val = [str(tc) for tc in tc_ids_val]
print(tc_ids_train)
print(tc_ids_test)
print(tc_ids_val)

['19', '0', '7', '1', '14', '3', '5', '12', '11', '17', '18', '27', '10', '4', '25']
['2', '15', '9', '30', '28']
['8', '16', '13', '26']


In [11]:
train_ids = []
test_ids = []
val_ids = []
for el in ids:
    if (el.split('.')[0]) in tc_ids_train:
        train_ids.append(el)
    if (el.split('.')[0]) in tc_ids_test:
        test_ids.append(el)
    if (el.split('.')[0]) in tc_ids_val:
        val_ids.append(el)

print(test_ids)
print(val_ids)
print(train_ids)

['2.1', '2.2', '9.1', '9.2', '9.3', '9.4', '9.5', '9.6', '15.1', '15.2', '15.3', '15.4', '15.5', '28.1', '28.2', '28.3', '28.4', '30.1', '30.2', '30.3', '30.4']
['8.1', '8.2', '8.3', '8.4', '8.5', '8.6', '13.1', '13.2', '13.3', '26.1', '26.2', '26.3', '26.4', '26.5', '26.6', '16.1', '16.2', '16.3']
['1.1', '1.2', '1.3', '1.4', '3.1', '3.2', '4.1', '4.2', '4.3', '4.4', '4.5', '4.6', '4.7', '5.1', '5.2', '5.3', '5.4', '5.5', '7.1', '7.2', '7.3', '7.4', '10.1', '10.2', '10.3', '10.4', '10.5', '10.6', '11.1', '11.2', '11.3', '11.4', '17.1', '17.2', '17.3', '17.4', '19.1', '19.2', '19.3', '19.4', '12.1', '12.2', '12.3', '12.4', '12.5', '14.1', '14.2', '14.3', '18.1', '18.2', '18.3', '25.1', '25.2', '25.3', '25.4', '25.5', '27.1', '27.2', '27.3', '27.4', '27.5', '0.1']


In [12]:
finetuning_data_test = create_finetuning_data_from_db(test_ids, db_file, config)
finetuning_data_train = create_finetuning_data_from_db(train_ids, db_file, config)
finetuning_data_val = create_finetuning_data_from_db(val_ids, db_file, config)

2024-07-18 10:47:02 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-18 10:47:02 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-18 10:47:02 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-18 10:47:02 [[34msrc.input_builder:79[0m] [[32mINFO[0m] >>>> Input prompt created successfully.[0m
2024-07-18 10:47:02 [[34msrc.finetuning:33[0m] [[32mINFO[0m] >>>> Combining input and expected output into json finetuning data format...[0m
2024-07-18 10:47:02 [[34msrc.finetuning:50[0m] [[32mINFO[0m] >>>> Finetuning conversation created successfully.[0m
2024-07-18 10:47:02 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-18 10:47:02 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-18 10:47:02 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-18 10:

In [26]:
finetuning_data = create_finetuning_data_from_db(ids, db_file, config)

2024-07-17 15:11:32 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-17 15:11:32 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-17 15:11:32 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-17 15:11:32 [[34msrc.input_builder:79[0m] [[32mINFO[0m] >>>> Input prompt created successfully.[0m
2024-07-17 15:11:32 [[34msrc.finetuning:33[0m] [[32mINFO[0m] >>>> Combining input and expected output into json finetuning data format...[0m
2024-07-17 15:11:32 [[34msrc.finetuning:50[0m] [[32mINFO[0m] >>>> Finetuning conversation created successfully.[0m
2024-07-17 15:11:32 [[34msrc.input_builder:30[0m] [[32mINFO[0m] >>>> Loading context...[0m
2024-07-17 15:11:32 [[34msrc.input_builder:46[0m] [[32mINFO[0m] >>>> Context loaded successfully.[0m
2024-07-17 15:11:32 [[34msrc.input_builder:48[0m] [[32mINFO[0m] >>>> Creating input prompt...[0m
2024-07-17 15:

In [13]:
print(finetuning_data_val[0])
print(finetuning_data_train[0])
print(finetuning_data_test[0])

{'id': '08_01', 'image': '.\\data\\raw\\.\\screenshot\\8_1.png', 'conversations': [{'from': 'human', 'value': '### Simplified HTML Content:\nButtons: \n{"id": "navigationTrigger", "class": "button button-icon button-borderless"}\n{"id": "workbook-create", "class": "button workbook-create button-icon"}\nInputs: \n{"class": "select2-search__field", "aria-label": "Suchen nach …", "type": "search", "placeholder": "Suchen nach …"}\nLinks: \n{"text": "Zum Navigatorbaum springen", "id": "skip-to-navigator", "class": "button button-primary"}\n{"text": "Zum Hauptbereich springen", "id": "skip-to-content", "class": "button button-primary"}\n{"text": "Startseite", "id": "home", "class": "button button-icon button-borderless"}\n{"text": "Karte", "class": "button button-icon button-borderless d-topnav--map-button"}\n{"text": "Verzeichnis Tutorial", "id": "d-nav-tree-node_ROOT-Tutorial_firstContent", "class": "d-nav-tree-node--main d-hover-context"}\n{"text": "Verzeichnis Gewässergüte", "id": "d-nav

In [7]:
finetuning_data[0]

{'id': '01_01',
 'image': '.\\data\\raw\\.\\screenshot\\1_1.png',
 'conversations': [{'from': 'human',
   'value': '### Simplified HTML Content:\nButtons: \n{"id": "navigationTrigger", "class": "button button-icon button-borderless"}\n{"id": "workbook-create", "class": "button workbook-create button-icon"}\nInputs: \n{"class": "select2-search__field", "aria-label": "Suchen nach …", "type": "search", "placeholder": "Suchen nach …"}\nLinks: \n{"text": "Zum Navigatorbaum springen", "id": "skip-to-navigator", "class": "button button-primary"}\n{"text": "Zum Hauptbereich springen", "id": "skip-to-content", "class": "button button-primary"}\n{"text": "Startseite", "id": "home", "class": "button button-icon button-borderless"}\n{"text": "Karte", "class": "button button-icon button-borderless d-topnav--map-button"}\n{"text": "Verzeichnis Tutorial", "id": "d-nav-tree-node_ROOT-Tutorial_firstContent", "class": "d-nav-tree-node--main d-hover-context"}\n{"text": "Verzeichnis Gewässergüte", "id": "

In [14]:
save_finetuning_data_as_json(finetuning_data_test, name= "test")
save_finetuning_data_as_json(finetuning_data_train, name= "train")
save_finetuning_data_as_json(finetuning_data_val, name= "val")

2024-07-18 10:47:44 [[34msrc.finetuning:70[0m] [[32mINFO[0m] >>>> Finetuning data saved as JSON file: ./data/finetuning/s21_finetuning_data_test_20240718-104744.json[0m
2024-07-18 10:47:44 [[34msrc.finetuning:70[0m] [[32mINFO[0m] >>>> Finetuning data saved as JSON file: ./data/finetuning/s61_finetuning_data_train_20240718-104744.json[0m
2024-07-18 10:47:44 [[34msrc.finetuning:70[0m] [[32mINFO[0m] >>>> Finetuning data saved as JSON file: ./data/finetuning/s18_finetuning_data_val_20240718-104744.json[0m


In [8]:
save_finetuning_data_as_json(finetuning_data)

2024-07-17 14:45:30 [[34msrc.finetuning:70[0m] [[32mINFO[0m] >>>> Finetuning data saved as JSON file: ./data/finetuning/s77_finetuning_data_default_20240717-144530.json[0m


# DB Ergänzung

In [3]:
# set working directory to the root of the project
os.chdir("..")
os.getcwd()

'C:\\Users\\c-beh\\PycharmProjects\\cadenza-playwright-llm'

In [5]:
db_file = './data/raw/playwright_script.db'
config = load_config("./config/config.yaml")

In [6]:
# Load ids to be used for finetuning from database
conn = sqlite3.connect(db_file)
c = conn.cursor()
c.execute('SELECT id FROM tests')
items = c.fetchall()
conn.close()

ids = [i[0] for i in items][0:] # Skip the first id since it is not possible to get the previous id

In [8]:
# Idea removed since it creates to many new entries instead current function will get an exception for a first steps and specific reference to 0_1

['1.1', '1.2', '1.3', '1.4', '2.1', '2.2', '3.1', '3.2', '4.1', '4.2', '4.3', '4.4', '4.5', '4.6', '4.7', '5.1', '5.2', '5.3', '5.4', '5.5', '7.1', '7.2', '7.3', '7.4', '8.1', '8.2', '8.3', '8.4', '8.5', '8.6', '9.1', '9.2', '9.3', '9.4', '9.5', '9.6', '10.1', '10.2', '10.3', '10.4', '10.5', '10.6', '11.1', '11.2', '11.3', '11.4', '13.1', '13.2', '13.3', '15.1', '15.2', '15.3', '15.4', '15.5', '17.1', '17.2', '17.3', '17.4', '19.1', '19.2', '19.3', '19.4', '26.1', '26.2', '26.3', '26.4', '26.5', '26.6', '28.1', '28.2', '28.3', '28.4', '12.1', '12.2', '12.3', '12.4', '12.5', '14.1', '14.2', '14.3', '16.1', '16.2', '16.3', '18.1', '18.2', '18.3', '25.1', '25.2', '25.3', '25.4', '25.5', '27.1', '27.2', '27.3', '27.4', '27.5', '30.1', '30.2', '30.3', '30.4']


# TC adaptation for empty numbers

In [18]:
dir = './data/raw/test_script'
tc_ids = []

ts_list = os.listdir(dir)
print(ts_list)
if '.gitkeep' in ts_list:
    ts_list.remove('.gitkeep')
for el in ts_list:
    id = el.split('_')[0]
    if id not in tc_ids:
        tc_ids.append(id)
tc_ids = [int(tc) for tc in tc_ids]
print(tc_ids)

['0_1.spec.ts', '10_1.spec.ts', '10_2.spec.ts', '10_3.spec.ts', '10_4.spec.ts', '10_5.spec.ts', '10_6.spec.ts', '11_1.spec.ts', '11_2.spec.ts', '11_3.spec.ts', '11_4.spec.ts', '12_1.spec.ts', '12_2.spec.ts', '12_3.spec.ts', '12_4.spec.ts', '12_5.spec.ts', '13_1.spec.ts', '13_2.spec.ts', '13_3.spec.ts', '14_1.spec.ts', '14_2.spec.ts', '14_3.spec.ts', '15_1.spec.ts', '15_2.spec.ts', '15_3.spec.ts', '15_4.spec.ts', '15_5.spec.ts', '16_1.spec.ts', '16_2.spec.ts', '16_3.spec.ts', '17_1.spec.ts', '17_2.spec.ts', '17_3.spec.ts', '17_4.spec.ts', '18_1.spec.ts', '18_2.spec.ts', '18_3.spec.ts', '19_1.spec.ts', '19_2.spec.ts', '19_3.spec.ts', '19_4.spec.ts', '1_1.spec.ts', '1_2.spec.ts', '1_3.spec.ts', '1_4.spec.ts', '25_1.spec.ts', '25_2.spec.ts', '25_3.spec.ts', '25_4.spec.ts', '25_5.spec.ts', '26_1.spec.ts', '26_2.spec.ts', '26_3.spec.ts', '26_4.spec.ts', '26_5.spec.ts', '26_6.spec.ts', '27_1.spec.ts', '27_2.spec.ts', '27_3.spec.ts', '27_4.spec.ts', '27_5.spec.ts', '28_1.spec.ts', '28_2.spec.t

# Adapting create finetuning data


In [37]:
from src.data.database import fetch_relevant_items, map_items_to_args
from src.utils.helpers import get_previous_id

In [31]:
db_file = './data/raw/playwright_script.db' #'C:/Users/c-beh/Documents/KIT/Semester 3/PSDA/Ex3/data_updated/data/playwright_script.db' #
config = load_config("./config/config.yaml")

In [32]:
# Load ids to be used for finetuning from database
conn = sqlite3.connect(db_file)
c = conn.cursor()
c.execute('SELECT id FROM tests')
items = c.fetchall()
conn.close()

ids = [i[0] for i in items][0:]

In [33]:
print(ids)

['1.1', '1.2', '1.3', '1.4', '2.1', '2.2', '3.1', '3.2', '4.1', '4.2', '4.3', '4.4', '4.5', '4.6', '4.7', '5.1', '5.2', '5.3', '5.4', '5.5', '7.1', '7.2', '7.3', '7.4', '8.1', '8.2', '8.3', '8.4', '8.5', '8.6', '9.1', '9.2', '9.3', '9.4', '9.5', '9.6', '10.1', '10.2', '10.3', '10.4', '10.5', '10.6', '11.1', '11.2', '11.3', '11.4', '13.1', '13.2', '13.3', '15.1', '15.2', '15.3', '15.4', '15.5', '17.1', '17.2', '17.3', '17.4', '19.1', '19.2', '19.3', '19.4', '26.1', '26.2', '26.3', '26.4', '26.5', '26.6', '28.1', '28.2', '28.3', '28.4', '12.1', '12.2', '12.3', '12.4', '12.5', '14.1', '14.2', '14.3', '16.1', '16.2', '16.3', '18.1', '18.2', '18.3', '25.1', '25.2', '25.3', '25.4', '25.5', '27.1', '27.2', '27.3', '27.4', '27.5', '30.1', '30.2', '30.3', '30.4']


In [None]:
def get_previous_id_local(id: str) -> str:
    """Get the ID of the previous step of a test case.

    :param id: The ID of the current step.
    :return: The ID of the previous step, or an empty string if invalid.
    """
    try:
        test, step = map(int, id.split('.'))
        if step > 1:
            return f"{test}.{step - 1}"
        elif step == 1:
            return "0.1"
        else:
            logger.warning(f"Test {test} has no previous step. Cannot retrieve ID.")
            return ""
    except ValueError:
        logger.error(f"Invalid ID format: {id}")
        return ""

In [None]:
finetuning_data = create_finetuning_data_from_db(ids, db_file, config)

# Add missing 0_1 step in db

In [4]:
db_file = './data/raw/playwright_script.db'
config = load_config("./config/config.yaml")

In [5]:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

In [6]:
# Load ids to be used for finetuning from database
conn = sqlite3.connect(db_file)
c = conn.cursor()
c.execute('SELECT id FROM tests')
items = c.fetchall()


#ids = [i[0] for i in items][0:] # Skip the first id since it is not possible to get the previous id

In [49]:
current_id = "1.2"
previous_id = get_previous_id(current_id)
if not previous_id:
    print([])

query = 'SELECT * FROM tests WHERE id IN (?, ?)'
cursor.execute(query, (current_id, previous_id))
print(cursor.fetchall())

[('1.1', '[1.1] Öffne die Arbeitsmappe "Übersicht Messstellen" im Ordner "Gewässergüte".', '[1.1] Expected result: Die Arbeitsmappe wird geöffnet, der Analysekontext ist nicht sichtbar.', '.\\html\\1_1.html', '.\\screenshot\\1_1.png', '.\\test_script\\1_1.spec.ts'), ('1.2', '[1.1] Öffne die Arbeitsmappe "Übersicht Messstellen" im Ordner "Gewässergüte". [1.2]  Öffnen der Tabellen-Sicht "Messstellenliste" über die Werkzeugliste der Arbeitsmappe.', '[1.2] Expected result: Die Tabelle "Messstelleninformationen" wird angezeigt, der Analysekontext ist sichtbar.', '.\\html\\1_2.html', '.\\screenshot\\1_2.png', '.\\test_script\\1_2.spec.ts')]


In [53]:
query = 'SELECT * FROM tests WHERE id = 1.1'
cursor.execute(query)
cursor.fetchall()

[('1.1',
  '[1.1] Öffne die Arbeitsmappe "Übersicht Messstellen" im Ordner "Gewässergüte".',
  '[1.1] Expected result: Die Arbeitsmappe wird geöffnet, der Analysekontext ist nicht sichtbar.',
  '.\\html\\1_1.html',
  '.\\screenshot\\1_1.png',
  '.\\test_script\\1_1.spec.ts')]

In [55]:
query = 'PRAGMA table_info(tests)'
cursor.execute(query)
cursor.fetchall()

[(0, 'id', 'TEXT', 0, None, 0),
 (1, 'steps', 'TEXT', 0, None, 0),
 (2, 'expectation', 'TEXT', 0, None, 0),
 (3, 'html', 'TEXT', 0, None, 0),
 (4, 'screenshot', 'TEXT', 0, None, 0),
 (5, 'test_script', 'TEXT', 0, None, 0)]

In [12]:
id = "0.1"
TEXT = ""
exp_res = ""
html_path = ".\\html\\0_1.html"
screenshot_path = ".\\screenshot\\0_1.png"
test_script_path = ".\\test_script\\0_1.spec.ts"

query = f"INSERT INTO tests VALUES('{id}','{TEXT}','{exp_res}','{html_path}','{screenshot_path}','{test_script_path}')"
cursor.execute(query)
cursor.fetchall()
conn.commit()

In [6]:
query = 'SELECT * FROM tests WHERE id = 0.1'
cursor.execute(query)
cursor.fetchall()

[('0.1',
  '',
  '',
  '.\\html\\0_1.html',
  '.\\screenshot\\0_1.png',
  '.\\test_script\\0_1.spec.ts')]

In [14]:
DUPLICATE = False
if DUPLICATE:
    id = "0.1"
        
    query = f"DELETE FROM tests WHERE id = {id} "
    cursor.execute(query)
    cursor.fetchall()
    conn.commit()

In [64]:
conn.close()