# 1. Load Datasets

## 1.1 Load Model Predictions

In [12]:
from pathlib import Path
import re
import pandas as pd

# Show all columns, but truncate cell content to keep rows readable
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", 80)  #

def _find_results_root(start: Path | None = None) -> Path:
    p = (start or Path.cwd()).resolve()
    for _ in range(6):
        candidate = p / "evaluation" / "results"
        if candidate.exists() and candidate.is_dir():
            return candidate
        p = p.parent
    raise FileNotFoundError("Could not locate 'evaluation/results' directory from current path upwards.")

def load_batch_csvs(app_names: list[str], results_root: Path | None = None) -> list[pd.DataFrame]:
    root = results_root or _find_results_root()
    batch_dfs: list[pd.DataFrame] = []
    for app in app_names:
        app_dir = root / app
        if not app_dir.exists():
            print(f"[skip] App dir not found: {app_dir}")
            continue
        batch_dirs = sorted(
            [d for d in app_dir.glob("batch_*") if d.is_dir()],
            key=lambda d: int(re.match(r"batch_(\d+)$", d.name).group(1)) if re.match(r"batch_(\d+)$", d.name) else 0,
        )
        for bdir in batch_dirs:
            m = re.match(r"batch_(\d+)$", bdir.name)
            if not m:
                print(f"[skip] Unrecognized batch dir: {bdir}")
                continue
            batch_id = int(m.group(1))
            csv_path = bdir / "csv" / f"batch_{batch_id}.csv"
            if not csv_path.exists():
                print(f"[skip] CSV missing: {csv_path}")
                continue
            df = pd.read_csv(csv_path)
            df["app"] = app
            df["batch_id"] = batch_id
            batch_dfs.append(df)
            print(f"[ok] Loaded {csv_path} with {len(df)} rows")
    return batch_dfs

In [13]:
apps = ["01", "02", "03", "04", "05"]
batch_frames = load_batch_csvs(apps)
combined = pd.concat(batch_frames, ignore_index=True) if batch_frames else pd.DataFrame()
print(len(combined))
combined

[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\results\01\batch_1\csv\batch_1.csv with 12 rows
[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\results\01\batch_2\csv\batch_2.csv with 12 rows
[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\results\01\batch_3\csv\batch_3.csv with 6 rows
[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\results\02\batch_1\csv\batch_1.csv with 11 rows
[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\results\02\batch_2\csv\batch_2.csv with 12 rows
[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\results\02\batch_3\csv\batch_3.csv with 6 rows
[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\results\03\batch_1\csv\batch_1.csv with 12 rows
[ok] Loaded C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spect

Unnamed: 0,requirement_id,rand_id,title,status,started_at,finished_at,elapsed_s,start_url,current_url,steps_taken,run_dir,error,usage_input_tokens,usage_output_tokens,usage_reasoning_tokens,usage_total_tokens,model_decision_json,interactions_json,requirement_description,ac_1_name,ac_1_text,ac_1_status,ac_2_name,ac_2_text,ac_2_status,ac_3_name,ac_3_text,ac_3_status,app,batch_id
0,2aa7e914-3995-4185-a6e0-87b60d01584f,CQ26,Users shall be able to filter parking options by vehicle type such as Standa...,met,2025-09-28T14:43:22.930788+00:00,2025-09-28T14:45:11.441166+00:00,108.51,http://192.168.178.40:8010/,http://192.168.178.40:8010/,9,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,76520,1068,0,77588,"{""status"": ""met"", ""explanation"": ""The application displays Standard, EV, and...","[{""id"": ""ffc718df-b626-477c-947a-43d32c329736"", ""turn_index"": 1, ""started_at...",Users shall be able to filter parking options by vehicle type such as Standa...,AC-1,"Vehicle type filter displays options for Standard, EV, and Motorcycle.",met,AC-2,Selecting a vehicle type updates the list to show only compatible spots.,met,AC-3,Filter selection persists while the user navigates between map and detail vi...,met,1,1
1,de7e84a1-a821-43b6-a476-349d4a2907da,DO84,App shall maintain a reservation history list stored in local storage.,met,2025-09-28T14:50:44.569639+00:00,2025-09-28T14:57:12.642405+00:00,388.073,http://192.168.178.40:8010/,http://192.168.178.40:8010/,30,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,278796,2549,0,281345,"{""status"": ""met"", ""explanation"": ""The application retains reservation histor...","[{""id"": ""7c9312fe-85f3-47a7-babb-5b906a2e7eeb"", ""turn_index"": 1, ""started_at...",App shall maintain a reservation history list stored in local storage.,AC-1,Previous reservations persist across sessions via local storage.,met,AC-2,"History view lists spot name, date, duration, and amount paid.",met,AC-3,User can clear history via dedicated control which wipes the local storage e...,met,1,1
2,f026cdd0-a01b-4dab-b13f-50847d7216be,DQ56,App shall allow users to scan their license plate via device camera to auto-...,unmet,2025-09-28T14:57:12.659429+00:00,2025-09-28T14:58:48.248570+00:00,95.589,http://192.168.178.40:8010/,http://192.168.178.40:8010/,8,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,69690,1254,0,70944,"{""status"": ""not_met"", ""explanation"": ""The app does not provide an option to ...","[{""id"": ""68421870-12f7-4f43-a409-198cc76a3214"", ""turn_index"": 1, ""started_at...",App shall allow users to scan their license plate via device camera to auto-...,AC-1,Vehicle profile view offers a 'Scan Plate' option using the device camera.,unmet,AC-2,Captured image is processed to recognize plate characters.,unmet,AC-3,Recognized plate auto-populates corresponding input fields with an option to...,unmet,1,1
3,e708410e-711c-4183-ae46-bce1889baff9,DQ80,App shall display a mock occupancy trend chart for the selected parking spot.,met,2025-09-28T14:58:48.259754+00:00,2025-09-28T15:00:37.106056+00:00,108.846,http://192.168.178.40:8010/,http://192.168.178.40:8010/,10,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,88852,1381,0,90233,"{""status"": ""met"", ""explanation"": ""All criteria were fulfilled with a mini ch...","[{""id"": ""7dcac49e-89b4-4e13-b6b7-e26da44e4244"", ""turn_index"": 1, ""started_at...",App shall display a mock occupancy trend chart for the selected parking spot.,AC-1,Detail view includes a mini chart element with hourly occupancy percentages.,met,AC-2,Chart updates to reflect data for the newly selected spot.,met,AC-3,Chart includes a legend showing the meaning of axes and colors.,met,1,1
4,ffd0117f-5cea-4bd7-90d5-160f4d2b8e46,LH51,Users shall be able to add and remove parking spots from a favorites list.,met,2025-09-28T15:13:32.575531+00:00,2025-09-28T15:15:21.580370+00:00,109.005,http://192.168.178.40:8010/,http://192.168.178.40:8010/,11,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,96768,1363,0,98131,"{""status"": ""met"", ""explanation"": ""The system allows users to add parking spo...","[{""id"": ""30bbc497-e926-4f6c-978c-2ef5c133fd0a"", ""turn_index"": 1, ""started_at...",Users shall be able to add and remove parking spots from a favorites list.,AC-1,Each parking card includes a favorite toggle control.,met,AC-2,Selected favorites appear in a dedicated favorites section.,met,AC-3,Removing a favorite updates both the favorites list and the original card in...,met,1,1
5,6e04c7f6-523a-47e8-b3e4-d0d2019fcbf5,MH44,App shall produce a downloadable parking receipt file for each completed res...,partially_met,2025-09-28T15:18:41.737046+00:00,2025-09-28T15:29:22.597788+00:00,640.861,http://192.168.178.40:8010/,http://192.168.178.40:8010/reservations,45,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,450143,4804,0,454947,"{""status"": ""partially_met"", ""explanation"": ""The app offers 'Download Receipt...","[{""id"": ""0878d3a1-27a9-4bc3-8996-dea8b591787c"", ""turn_index"": 1, ""started_at...",App shall produce a downloadable parking receipt file for each completed res...,AC-1,"After payment confirmation, user can tap a 'Download Receipt' action.",met,AC-2,Action generates a PDF or equivalent document with reservation and payment d...,unmet,AC-3,Receipt file saves to the user's device via standard browser download prompt.,unmet,1,1
6,4dc4be42-614b-494f-b2d2-71d2b01dfc3e,MI48,App shall display zone-specific parking regulations within the spot detail p...,met,2025-09-28T15:29:22.609714+00:00,2025-09-28T15:38:29.855688+00:00,547.246,http://192.168.178.40:8010/,http://192.168.178.40:8010/,35,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,369560,4374,0,373934,"{""status"": ""met"", ""explanation"": ""The detail panel for each parking spot dis...","[{""id"": ""84271950-d4fa-4733-b87c-5decf24ef476"", ""turn_index"": 1, ""started_at...",App shall display zone-specific parking regulations within the spot detail p...,AC-1,Detail panel includes a regulation summary specific to the selected parking ...,met,AC-2,"Regulation summary covers maximum duration, permit requirements, and enforce...",met,AC-3,Regulation content updates when the user switches between different parking ...,met,1,1
7,d38cb45c-63f9-49d1-930a-3cb8e462411c,MT45,Users shall manage a primary vehicle profile for reservation defaults.,met,2025-09-28T15:40:38.301425+00:00,2025-09-28T15:43:20.016649+00:00,161.715,http://192.168.178.40:8010/,http://192.168.178.40:8010/,15,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,148718,1809,0,150527,"{""status"": ""met"", ""explanation"": ""All ACs are met, allowing users to manage ...","[{""id"": ""8db90012-a055-494d-a5b1-23c0e828973a"", ""turn_index"": 1, ""started_at...",Users shall manage a primary vehicle profile for reservation defaults.,AC-1,"Profile drawer allows entry of vehicle nickname, type, and license plate.",met,AC-2,Saved vehicle data persists in local storage and auto-populates reservation ...,met,AC-3,Users can update vehicle information and see instant confirmation of changes.,met,1,1
8,c498ceb8-9bf1-495c-a507-dcb67aa90da9,SG43,User shall be able to cancel an active reservation via the reservation manag...,met,2025-09-28T16:55:41.882339+00:00,2025-09-28T17:03:43.155086+00:00,481.273,http://192.168.178.40:8010/,http://192.168.178.40:8010/,34,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,321223,2828,0,324051,"{""status"": ""met"", ""explanation"": ""All acceptance criteria for canceling an a...","[{""id"": ""f63d1173-9c1b-4d15-8b34-136b9ef2ced1"", ""turn_index"": 1, ""started_at...",User shall be able to cancel an active reservation via the reservation manag...,AC-1,Active reservation section shows the current booking with a cancel action.,met,AC-2,Selecting cancel prompts confirmation to prevent accidental removal.,met,AC-3,"Upon confirmation, reservation data is removed from local storage and the UI...",met,1,1
9,90a5137d-9cc3-4f92-bdfe-4cb82f0d7a24,SK83,Parking list shall support sorting by price and by estimated walking distance.,met,2025-09-28T17:03:43.182019+00:00,2025-09-28T17:06:46.033701+00:00,182.852,http://192.168.178.40:8010/,http://192.168.178.40:8010/,16,C:\Users\krkol\Documents\workspaces\tuc_workspace\GUI-Spector\evaluation\res...,,140247,1510,0,141757,"{""status"": ""met"", ""explanation"": ""The parking list supports sorting by price...","[{""id"": ""76609243-e324-4b03-8742-c3702b55222d"", ""turn_index"": 1, ""started_at...",Parking list shall support sorting by price and by estimated walking distance.,AC-1,Sort control includes options 'Price: Low to High' and 'Distance: Near to Far'.,met,AC-2,Selecting a sort option reorders the list accordingly using seeded metrics.,met,AC-3,Sort selection remains active until the user changes or resets it in the cur...,met,1,1


## 1.2 Normalize Requirements Status

In [14]:
import re
import pandas as pd

def derive_status_from_acs(row, ac_cols=None):
    # Auto-detect AC status columns like ac_1_status, ac_2_status, ...
    if ac_cols is None:
        ac_cols = [c for c in row.index if re.fullmatch(r"ac_\d+_status", c)]
    vals = []
    for c in ac_cols:
        v = row.get(c)
        if pd.notna(v):
            s = str(v).strip().lower()
            if s:
                vals.append(s)

    if not vals:
        # If no AC statuses present, keep existing status (or return None)
        return row.get("status")

    if all(v == "met" for v in vals):
        return "met"
    if all(v == "unmet" for v in vals):
        return "unmet"
    return "partially_met"

# Apply to your combined DataFrame
combined["status_norm"] = combined.apply(derive_status_from_acs, axis=1)

# Optional quick sanity check
print(combined["status_norm"].value_counts(dropna=False))
display(combined[["rand_id", "status", "status_norm", "ac_1_status", "ac_2_status", "ac_3_status"]].head(10))

status_norm
met              85
unmet            31
partially_met    31
error             1
Name: count, dtype: int64


Unnamed: 0,rand_id,status,status_norm,ac_1_status,ac_2_status,ac_3_status
0,CQ26,met,met,met,met,met
1,DO84,met,met,met,met,met
2,DQ56,unmet,unmet,unmet,unmet,unmet
3,DQ80,met,met,met,met,met
4,LH51,met,met,met,met,met
5,MH44,partially_met,partially_met,met,unmet,unmet
6,MI48,met,met,met,met,met
7,MT45,met,met,met,met,met
8,SG43,met,met,met,met,met
9,SK83,met,met,met,met,met


## 1.3 Load Goldstandard Annotations

In [15]:
# Load gold-standard JSONs (evaluation/goldstandard/<app_prefix>_*.json)
# into two pandas DataFrames: requirements_df and acceptance_criteria_df.

from pathlib import Path
import json
import pandas as pd

def _find_gold_root(start: Path | None = None) -> Path:
    p = (start or Path.cwd()).resolve()
    for _ in range(8):
        cand = p / "evaluation" / "goldstandard"
        if cand.exists() and cand.is_dir():
            return cand
        p = p.parent
    raise FileNotFoundError("Could not locate 'evaluation/goldstandard'")

def _norm_status(v: str | None) -> str | None:
    if v is None:
        return None
    s = str(v).strip().lower()
    aliases = {
        "met": {"met", "pass", "true", "yes", "1", "fulfilled", "satisfied"},
        "unmet": {"unmet", "not_met", "fail", "false", "no", "0", "not met", "unsatisfied", "rejected"},
        "partially_met": {"partially_met", "partially met", "partial", "mixed"},
    }
    for k, vs in aliases.items():
        if s in vs:
            return k
    if s in {"met", "unmet", "partially_met"}:
        return s
    return s  # leave as-is

def _load_json(path: Path) -> dict:
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except UnicodeDecodeError:
        return json.loads(path.read_text(encoding="latin-1", errors="replace"))

def _parse_gold_json(path: Path, app_code: str) -> tuple[pd.DataFrame, pd.DataFrame]:
    data = _load_json(path)
    app_name = str(data.get("app_name", ""))
    req_rows, ac_rows = [], []
    for req in (data.get("requirements") or []):
        rid = str(req.get("id", ""))
        desc = str(req.get("description", ""))
        rstatus = _norm_status(req.get("status"))
        rand_id = str(req.get("rand_id", ""))

        ann = req.get("annotations", {}) or {}
        ai = _norm_status(ann.get("ai"))
        ann1_ante = _norm_status(ann.get("ann1_ante"))
        ann1_post = _norm_status(ann.get("ann1_post"))
        ann2_ante = _norm_status(ann.get("ann2_ante"))
        ann2_post = _norm_status(ann.get("ann2_post"))

        req_rows.append({
            "app_code": app_code,
            "file": path.name,
            "app_name": app_name,
            "rand_id": rand_id,
            "requirement_id": rid,
            "description": desc,
            "status": rstatus,
            "ai": ai,
            "ann1_ante": ann1_ante,
            "ann1_post": ann1_post,
            "ann2_ante": ann2_ante,
            "ann2_post": ann2_post,
        })

        for ac in (req.get("acceptance_criteria") or []):
            ac_rows.append({
                "app_code": app_code,
                "file": path.name,
                "app_name": app_name,
                "rand_id": rand_id,
                "requirement_id": rid,
                "ac_id": str(ac.get("id", "")),
                "ac_text": str(ac.get("text", "")),
                "ann1_ante": _norm_status(ac.get("ann1_ante")),
                "ann1_post": _norm_status(ac.get("ann1_post")),
                "ann2_ante": _norm_status(ac.get("ann2_ante")),
                "ann2_post": _norm_status(ac.get("ann2_post")),
            })
    return pd.DataFrame(req_rows), pd.DataFrame(ac_rows)

def load_gold_into_dfs(app_prefix: str, gold_root: Path | None = None) -> tuple[pd.DataFrame, pd.DataFrame]:
    root = gold_root or _find_gold_root()
    files = sorted([p for p in (root.glob(f"{app_prefix}_*.json")) if p.is_file()])
    if not files:
        print(f"[gold] No files for prefix '{app_prefix}' in {root}")
        return pd.DataFrame(), pd.DataFrame()

    req_dfs, ac_dfs = [], []
    for f in files:
        req_df, ac_df = _parse_gold_json(f, app_code=app_prefix)
        req_dfs.append(req_df)
        ac_dfs.append(ac_df)
        print(f"[gold] Loaded {f.name}: req={len(req_df)} ac={len(ac_df)}")

    req_all = pd.concat(req_dfs, ignore_index=True) if req_dfs else pd.DataFrame()
    ac_all = pd.concat(ac_dfs, ignore_index=True) if ac_dfs else pd.DataFrame()
    return req_all, ac_all

In [16]:
# Example:
req_df, ac_df = load_gold_into_dfs("01")

[gold] Loaded 01_park-and-pay.json: req=30 ac=90


In [17]:
req_df

Unnamed: 0,app_code,file,app_name,rand_id,requirement_id,description,status,ai,ann1_ante,ann1_post,ann2_ante,ann2_post
0,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,App shall load with a combined map and list of nearby parking options source...,met,met,met,met,met,met
1,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,A toggle control shall allow users to switch between combined map-plus-list ...,met,met,met,met,met,met
2,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,Search field shall allow users to locate parking options by entering an addr...,met,met,met,met,met,met
3,1,01_park-and-pay.json,Park & Pay,IO58,REQ-004,App shall provide a price range slider to narrow parking options by hourly r...,met,met,met,met,met,met
4,1,01_park-and-pay.json,Park & Pay,CQ26,REQ-005,Users shall be able to filter parking options by vehicle type such as Standa...,met,met,met,met,met,met
5,1,01_park-and-pay.json,Park & Pay,OJ77,REQ-006,Each parking entry shall display its current availability status using color...,met,met,met,met,met,met
6,1,01_park-and-pay.json,Park & Pay,NQ89,REQ-007,Selecting a parking entry shall open a detail panel with comprehensive spot ...,met,met,met,met,met,met
7,1,01_park-and-pay.json,Park & Pay,ON35,REQ-008,App shall allow the user to create a parking reservation stored in browser l...,met,met,met,met,met,met
8,1,01_park-and-pay.json,Park & Pay,SG43,REQ-009,User shall be able to cancel an active reservation via the reservation manag...,met,met,met,met,met,met
9,1,01_park-and-pay.json,Park & Pay,PE29,REQ-010,App shall display a live countdown timer for the upcoming reservation start ...,partially_met,partially_met,partially_met,partially_met,partially_met,partially_met


In [18]:
ac_df[:10]

Unnamed: 0,app_code,file,app_name,rand_id,requirement_id,ac_id,ac_text,ann1_ante,ann1_post,ann2_ante,ann2_post
0,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC1,"When the prototype loads, a default city map view is displayed with parking ...",met,met,met,met
1,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC2,Parking list panel is populated automatically with at least five seeded park...,met,met,met,met
2,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC3,No user interaction is required to see initial parking options in both map a...,met,met,met,met
3,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC1,Toggle button labeled 'Map/List' is visible on the primary toolbar.,met,met,met,met
4,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC2,Clicking the toggle hides the map and expands the list view to full width.,met,met,met,met
5,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC3,Clicking the toggle again restores the combined map and list layout without ...,met,met,met,met
6,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,AC1,Search field accepts free-text input of at least three characters.,met,met,met,met
7,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,AC2,Submitting a search filters the list to entries matching address or landmark...,met,met,met,met
8,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,AC3,Map re-centers to the first matching parking location after search submission.,met,met,met,met
9,1,01_park-and-pay.json,Park & Pay,IO58,REQ-004,AC1,Price slider allows selection of minimum and maximum hourly rate values.,met,met,met,met


In [19]:
apps = ["01", "02", "03", "04", "05"]

all_req_dfs = []
all_ac_dfs = []
for app_prefix in apps:
    req_df, ac_df = load_gold_into_dfs(app_prefix)  # uses the helpers defined earlier
    all_req_dfs.append(req_df)
    all_ac_dfs.append(ac_df)

req_all = pd.concat([df for df in all_req_dfs if not df.empty], ignore_index=True) if all_req_dfs else pd.DataFrame()
ac_all  = pd.concat([df for df in all_ac_dfs if not df.empty], ignore_index=True)  if all_ac_dfs  else pd.DataFrame()

print(f"Requirements: {len(req_all)} rows across {len(apps)} apps")
print(f"ACs:          {len(ac_all)} rows across {len(apps)} apps")
display(req_all.head())
display(ac_all.head())

[gold] Loaded 01_park-and-pay.json: req=30 ac=90
[gold] Loaded 02_budget-tracker.json: req=30 ac=90
[gold] Loaded 03_recipe-generator.json: req=30 ac=90
[gold] Loaded 04_fitness-challenges.json: req=30 ac=90
[gold] Loaded 05_cleaning-booking.json: req=30 ac=90
Requirements: 150 rows across 5 apps
ACs:          450 rows across 5 apps


Unnamed: 0,app_code,file,app_name,rand_id,requirement_id,description,status,ai,ann1_ante,ann1_post,ann2_ante,ann2_post
0,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,App shall load with a combined map and list of nearby parking options source...,met,met,met,met,met,met
1,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,A toggle control shall allow users to switch between combined map-plus-list ...,met,met,met,met,met,met
2,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,Search field shall allow users to locate parking options by entering an addr...,met,met,met,met,met,met
3,1,01_park-and-pay.json,Park & Pay,IO58,REQ-004,App shall provide a price range slider to narrow parking options by hourly r...,met,met,met,met,met,met
4,1,01_park-and-pay.json,Park & Pay,CQ26,REQ-005,Users shall be able to filter parking options by vehicle type such as Standa...,met,met,met,met,met,met


Unnamed: 0,app_code,file,app_name,rand_id,requirement_id,ac_id,ac_text,ann1_ante,ann1_post,ann2_ante,ann2_post
0,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC1,"When the prototype loads, a default city map view is displayed with parking ...",met,met,met,met
1,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC2,Parking list panel is populated automatically with at least five seeded park...,met,met,met,met
2,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC3,No user interaction is required to see initial parking options in both map a...,met,met,met,met
3,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC1,Toggle button labeled 'Map/List' is visible on the primary toolbar.,met,met,met,met
4,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC2,Clicking the toggle hides the map and expands the list view to full width.,met,met,met,met


# 2. Compute Requirements Status Performance

In [23]:
# Ensure string types for joining
combined["rand_id"] = combined["rand_id"].astype(str)
req_all["rand_id"] = req_all["rand_id"].astype(str)

# Select only what we need from gold (keep ann1_post as ground-truth)
gold_req = req_all[["rand_id", "requirement_id", "ann1_post"]].copy()

# Inner join to align only matching rand_ids
req_join = combined.merge(gold_req, on="rand_id", how="inner", suffixes=("", "_gold"))

print("combined rows:", len(combined))
print("gold req rows:", len(gold_req))
print("joined rows:", len(req_join))

# Show a small sample to inspect alignment (model status vs. gold ann1_post)
cols_to_show = [
    "rand_id", "requirement_id_gold", "status_norm", "ann1_post",
    "title", "requirement_description", "app", "batch_id"
]
cols_to_show = [c for c in cols_to_show if c in req_join.columns]
display(req_join[cols_to_show].head(20))

# Optionally, show non-joined (missing in gold or model) for debugging:
missing_in_gold = combined[~combined["rand_id"].isin(gold_req["rand_id"])]
missing_in_model = gold_req[~gold_req["rand_id"].isin(combined["rand_id"])]

print("Missing in gold by rand_id:", len(missing_in_gold))
print("Missing in model by rand_id:", len(missing_in_model))
if len(missing_in_gold):
    display(missing_in_gold[["rand_id", "title", "app", "batch_id"]].head(10))
if len(missing_in_model):
    display(missing_in_model.head(10))

combined rows: 148
gold req rows: 150
joined rows: 148


Unnamed: 0,rand_id,requirement_id_gold,status_norm,ann1_post,title,requirement_description,app,batch_id
0,CQ26,REQ-005,met,met,Users shall be able to filter parking options by vehicle type such as Standa...,Users shall be able to filter parking options by vehicle type such as Standa...,1,1
1,DO84,REQ-013,met,met,App shall maintain a reservation history list stored in local storage.,App shall maintain a reservation history list stored in local storage.,1,1
2,DQ56,REQ-028,unmet,unmet,App shall allow users to scan their license plate via device camera to auto-...,App shall allow users to scan their license plate via device camera to auto-...,1,1
3,DQ80,REQ-018,met,partially_met,App shall display a mock occupancy trend chart for the selected parking spot.,App shall display a mock occupancy trend chart for the selected parking spot.,1,1
4,LH51,REQ-014,met,met,Users shall be able to add and remove parking spots from a favorites list.,Users shall be able to add and remove parking spots from a favorites list.,1,1
5,MH44,REQ-024,partially_met,partially_met,App shall produce a downloadable parking receipt file for each completed res...,App shall produce a downloadable parking receipt file for each completed res...,1,1
6,MI48,REQ-015,met,met,App shall display zone-specific parking regulations within the spot detail p...,App shall display zone-specific parking regulations within the spot detail p...,1,1
7,MT45,REQ-019,met,partially_met,Users shall manage a primary vehicle profile for reservation defaults.,Users shall manage a primary vehicle profile for reservation defaults.,1,1
8,SG43,REQ-009,met,met,User shall be able to cancel an active reservation via the reservation manag...,User shall be able to cancel an active reservation via the reservation manag...,1,1
9,SK83,REQ-017,met,met,Parking list shall support sorting by price and by estimated walking distance.,Parking list shall support sorting by price and by estimated walking distance.,1,1


Missing in gold by rand_id: 0
Missing in model by rand_id: 2


Unnamed: 0,rand_id,requirement_id,ann1_post
48,CU65,REQ-019,partially_met
139,KB29,REQ-020,partially_met


In [9]:
from sklearn.metrics import precision_recall_fscore_support
import pandas as pd

labels = ["met", "unmet", "partially_met"]

df = req_join.copy()
df = df[df["status"].notna() & df["ann1_post"].notna()]
df = df[df["status"].isin(labels) & df["ann1_post"].isin(labels)]

rows = []
for app, g in df.groupby("app", dropna=False):
    y_true = g["ann1_post"]
    y_pred = g["status"]

    p, r, f1, support = precision_recall_fscore_support(
        y_true, y_pred, labels=labels, zero_division=0
    )
    for lbl, p_i, r_i, f1_i, s_i in zip(labels, p, r, f1, support):
        rows.append({
            "app": app,
            "label": lbl,
            "precision": p_i,
            "recall": r_i,
            "f1": f1_i,
            "support": int(s_i),
        })

per_app_metrics = pd.DataFrame(rows).sort_values(["app", "label"]).reset_index(drop=True)
display(per_app_metrics)

# Optional quick pivot to see F1 per label by app
display(per_app_metrics.pivot(index="app", columns="label", values="f1").round(3))

Unnamed: 0,app,label,precision,recall,f1,support
0,1,met,0.789474,0.9375,0.857143,16
1,1,partially_met,0.5,0.428571,0.461538,7
2,1,unmet,1.0,0.714286,0.833333,7
3,2,met,1.0,0.944444,0.971429,18
4,2,partially_met,0.833333,1.0,0.909091,5
5,2,unmet,1.0,1.0,1.0,6
6,3,met,0.933333,0.823529,0.875,17
7,3,partially_met,0.625,0.833333,0.714286,6
8,3,unmet,0.833333,0.833333,0.833333,6
9,4,met,1.0,0.823529,0.903226,17


label,met,partially_met,unmet
app,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.857,0.462,0.833
2,0.971,0.909,1.0
3,0.875,0.714,0.833
4,0.903,0.571,0.8
5,0.919,0.5,0.923


In [25]:
import pandas as pd

labels = ["met", "unmet", "partially_met"]

# per_app_metrics is the long-form table from earlier
wide = per_app_metrics.pivot(index="app", columns="label", values=["precision", "recall", "f1"])

# Ensure all labels exist as columns (fill missing with 0.0)
wide = wide.reindex(columns=pd.MultiIndex.from_product([["precision", "recall", "f1"], labels]), fill_value=0.0)

# Reorder columns: met (P,R,F1) → unmet (P,R,F1) → partially_met (P,R,F1)
ordered_cols = []
for lbl in labels:
    ordered_cols += [("precision", lbl), ("recall", lbl), ("f1", lbl)]
wide = wide[ordered_cols]

# Flatten column names to e.g., met_P, met_R, met_F1, unmet_P, ...
name_map = {"precision": "P", "recall": "R", "f1": "F1"}
wide.columns = [f"{lbl}_{name_map[m]}" for m, lbl in wide.columns]

wide = wide.reset_index()
display(wide.round(3))

Unnamed: 0,app,met_P,met_R,met_F1,unmet_P,unmet_R,unmet_F1,partially_met_P,partially_met_R,partially_met_F1
0,1,0.789,0.938,0.857,1.0,0.714,0.833,0.5,0.429,0.462
1,2,1.0,0.944,0.971,1.0,1.0,1.0,0.833,1.0,0.909
2,3,0.933,0.824,0.875,0.833,0.833,0.833,0.625,0.833,0.714
3,4,1.0,0.824,0.903,0.75,0.857,0.8,0.5,0.667,0.571
4,5,0.85,1.0,0.919,1.0,0.857,0.923,0.667,0.4,0.5


# 3. Compute Acceptance Criteria Performance

In [26]:
ac_all[:10]

Unnamed: 0,app_code,file,app_name,rand_id,requirement_id,ac_id,ac_text,ann1_ante,ann1_post,ann2_ante,ann2_post
0,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC1,"When the prototype loads, a default city map view is displayed with parking ...",met,met,met,met
1,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC2,Parking list panel is populated automatically with at least five seeded park...,met,met,met,met
2,1,01_park-and-pay.json,Park & Pay,ZX99,REQ-001,AC3,No user interaction is required to see initial parking options in both map a...,met,met,met,met
3,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC1,Toggle button labeled 'Map/List' is visible on the primary toolbar.,met,met,met,met
4,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC2,Clicking the toggle hides the map and expands the list view to full width.,met,met,met,met
5,1,01_park-and-pay.json,Park & Pay,WP70,REQ-002,AC3,Clicking the toggle again restores the combined map and list layout without ...,met,met,met,met
6,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,AC1,Search field accepts free-text input of at least three characters.,met,met,met,met
7,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,AC2,Submitting a search filters the list to entries matching address or landmark...,met,met,met,met
8,1,01_park-and-pay.json,Park & Pay,UP18,REQ-003,AC3,Map re-centers to the first matching parking location after search submission.,met,met,met,met
9,1,01_park-and-pay.json,Park & Pay,IO58,REQ-004,AC1,Price slider allows selection of minimum and maximum hourly rate values.,met,met,met,met


In [27]:

import pandas as pd

def to_ac_long(df: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for _, r in df.iterrows():
        for i in (1, 2, 3):
            status = r.get(f"ac_{i}_status")
            text = r.get(f"ac_{i}_text")
            if pd.isna(status) or str(status).strip() == "":
                continue
            rows.append({
                "app": str(r.get("app", "")),
                "rand_id": str(r.get("rand_id", "")),
                "requirement_id": r.get("requirement_id", ""),
                "ac_id": f"AC{i}",
                "ac_text_model": text,
                "ac_status_model": str(status).strip().lower(),  # met/unmet
            })
    return pd.DataFrame(rows)

model_ac_long = to_ac_long(combined)
display(model_ac_long.head(10))
print("Rows:", len(model_ac_long))

Unnamed: 0,app,rand_id,requirement_id,ac_id,ac_text_model,ac_status_model
0,1,CQ26,2aa7e914-3995-4185-a6e0-87b60d01584f,AC1,"Vehicle type filter displays options for Standard, EV, and Motorcycle.",met
1,1,CQ26,2aa7e914-3995-4185-a6e0-87b60d01584f,AC2,Selecting a vehicle type updates the list to show only compatible spots.,met
2,1,CQ26,2aa7e914-3995-4185-a6e0-87b60d01584f,AC3,Filter selection persists while the user navigates between map and detail vi...,met
3,1,DO84,de7e84a1-a821-43b6-a476-349d4a2907da,AC1,Previous reservations persist across sessions via local storage.,met
4,1,DO84,de7e84a1-a821-43b6-a476-349d4a2907da,AC2,"History view lists spot name, date, duration, and amount paid.",met
5,1,DO84,de7e84a1-a821-43b6-a476-349d4a2907da,AC3,User can clear history via dedicated control which wipes the local storage e...,met
6,1,DQ56,f026cdd0-a01b-4dab-b13f-50847d7216be,AC1,Vehicle profile view offers a 'Scan Plate' option using the device camera.,unmet
7,1,DQ56,f026cdd0-a01b-4dab-b13f-50847d7216be,AC2,Captured image is processed to recognize plate characters.,unmet
8,1,DQ56,f026cdd0-a01b-4dab-b13f-50847d7216be,AC3,Recognized plate auto-populates corresponding input fields with an option to...,unmet
9,1,DQ80,e708410e-711c-4183-ae46-bce1889baff9,AC1,Detail view includes a mini chart element with hourly occupancy percentages.,met


Rows: 441


In [28]:
gold_ac = ac_all.copy()

# Ensure string types for join keys
model_ac_long["rand_id"] = model_ac_long["rand_id"].astype(str)
model_ac_long["ac_id"]   = model_ac_long["ac_id"].astype(str)
gold_ac["rand_id"]       = gold_ac["rand_id"].astype(str)
gold_ac["ac_id"]         = gold_ac["ac_id"].astype(str)

ac_join = model_ac_long.merge(
    gold_ac[["rand_id", "ac_id", "ac_text", "ann1_post"]],
    on=["rand_id", "ac_id"],
    how="inner",
    suffixes=("_model", "_gold")
)

print("model AC rows:", len(model_ac_long))
print("gold AC rows:", len(gold_ac))
print("joined AC rows:", len(ac_join))

display(ac_join.head(20))

model AC rows: 441
gold AC rows: 450
joined AC rows: 441


Unnamed: 0,app,rand_id,requirement_id,ac_id,ac_text_model,ac_status_model,ac_text,ann1_post
0,1,CQ26,2aa7e914-3995-4185-a6e0-87b60d01584f,AC1,"Vehicle type filter displays options for Standard, EV, and Motorcycle.",met,"Vehicle type filter displays options for Standard, EV, and Motorcycle.",met
1,1,CQ26,2aa7e914-3995-4185-a6e0-87b60d01584f,AC2,Selecting a vehicle type updates the list to show only compatible spots.,met,Selecting a vehicle type updates the list to show only compatible spots.,met
2,1,CQ26,2aa7e914-3995-4185-a6e0-87b60d01584f,AC3,Filter selection persists while the user navigates between map and detail vi...,met,Filter selection persists while the user navigates between map and detail vi...,met
3,1,DO84,de7e84a1-a821-43b6-a476-349d4a2907da,AC1,Previous reservations persist across sessions via local storage.,met,Previous reservations persist across sessions via local storage.,met
4,1,DO84,de7e84a1-a821-43b6-a476-349d4a2907da,AC2,"History view lists spot name, date, duration, and amount paid.",met,"History view lists spot name, date, duration, and amount paid.",met
5,1,DO84,de7e84a1-a821-43b6-a476-349d4a2907da,AC3,User can clear history via dedicated control which wipes the local storage e...,met,User can clear history via dedicated control which wipes the local storage e...,met
6,1,DQ56,f026cdd0-a01b-4dab-b13f-50847d7216be,AC1,Vehicle profile view offers a 'Scan Plate' option using the device camera.,unmet,Vehicle profile view offers a 'Scan Plate' option using the device camera.,unmet
7,1,DQ56,f026cdd0-a01b-4dab-b13f-50847d7216be,AC2,Captured image is processed to recognize plate characters.,unmet,Captured image is processed to recognize plate characters.,unmet
8,1,DQ56,f026cdd0-a01b-4dab-b13f-50847d7216be,AC3,Recognized plate auto-populates corresponding input fields with an option to...,unmet,Recognized plate auto-populates corresponding input fields with an option to...,unmet
9,1,DQ80,e708410e-711c-4183-ae46-bce1889baff9,AC1,Detail view includes a mini chart element with hourly occupancy percentages.,met,Detail view includes a mini chart element with hourly occupancy percentages.,met


In [30]:
from sklearn.metrics import precision_recall_fscore_support
import pandas as pd

valid = {"met", "unmet"}
ac_eval = ac_join.copy()
ac_eval = ac_eval[
    ac_eval["ac_status_model"].isin(valid) & ac_eval["ann1_post"].isin(valid)
].copy()

def prf_binary(y_true, y_pred, positive: str):
    y_true_pos = (y_true == positive)
    y_pred_pos = (y_pred == positive)
    p, r, f1, _ = precision_recall_fscore_support(
        y_true_pos, y_pred_pos, average="binary", zero_division=0
    )
    return p, r, f1

rows = []
for app, g in ac_eval.groupby("app", dropna=False):
    p_m, r_m, f1_m = prf_binary(g["ann1_post"], g["ac_status_model"], "met")
    p_u, r_u, f1_u = prf_binary(g["ann1_post"], g["ac_status_model"], "unmet")
    rows.append({
        "app": app,
        "met_P": p_m, "met_R": r_m, "met_F1": f1_m,
        "unmet_P": p_u, "unmet_R": r_u, "unmet_F1": f1_u,
    })

ac_metrics_by_app = pd.DataFrame(rows).sort_values("app").reset_index(drop=True)
display(ac_metrics_by_app.round(3))

Unnamed: 0,app,met_P,met_R,met_F1,unmet_P,unmet_R,unmet_F1
0,1,0.908,0.967,0.937,0.92,0.793,0.852
1,2,0.968,0.984,0.976,0.958,0.92,0.939
2,3,0.931,0.9,0.915,0.793,0.852,0.821
3,4,0.946,0.898,0.922,0.824,0.903,0.862
4,5,0.909,1.0,0.952,1.0,0.778,0.875


# 4. Compute Efficiency Statistics

In [31]:
import pandas as pd

metrics = {
    "#Steps": "steps_taken",
    "Time (s)": "elapsed_s",
    "#In-Tok.": "usage_input_tokens",
    "#Out-Tok.": "usage_output_tokens",
}

df_eff = combined.copy()

# Ensure numeric dtypes
for col in metrics.values():
    if col in df_eff.columns:
        df_eff[col] = pd.to_numeric(df_eff[col], errors="coerce")

# Group by app and compute mean/sd
rows = []
for app, g in df_eff.groupby("app", dropna=False):
    row = {"app": app}
    for label, col in metrics.items():
        row[f"{label} Av."] = g[col].mean()
        row[f"{label} SD"]  = g[col].std(ddof=1)
    rows.append(row)

eff_stats = pd.DataFrame(rows).sort_values("app").reset_index(drop=True)

# Optional formatting and SD NaN -> 0.0 if single sample
sd_cols = [c for c in eff_stats.columns if c.endswith("SD")]
eff_stats[sd_cols] = eff_stats[sd_cols].fillna(0.0)

display(eff_stats.round({
    "#Steps Av.": 1, "#Steps SD": 1,
    "Time (s) Av.": 2, "Time (s) SD": 2,
    "#In-Tok. Av.": 0, "#In-Tok. SD": 0,
    "#Out-Tok. Av.": 0, "#Out-Tok. SD": 0,
}))

Unnamed: 0,app,#Steps Av.,#Steps SD,Time (s) Av.,Time (s) SD,#In-Tok. Av.,#In-Tok. SD,#Out-Tok. Av.,#Out-Tok. SD
0,1,25.1,17.4,335.98,291.23,229776.0,165507.0,2394.0,1323.0
1,2,21.1,14.9,322.33,316.01,189710.0,127758.0,1984.0,817.0
2,3,19.5,16.6,223.14,240.53,174555.0,149196.0,2002.0,1020.0
3,4,22.3,12.4,293.44,182.27,202726.0,107710.0,2115.0,822.0
4,5,34.2,19.8,412.87,266.67,311923.0,187972.0,2850.0,1399.0


In [33]:

COST_IN_PER_MTOK = 3.0
COST_OUT_PER_MTOK = 12.0

eff_stats["Cost In ($)"]  = (eff_stats["#In-Tok. Av."]  / 1_000_000.0) * COST_IN_PER_MTOK
eff_stats["Cost Out ($)"] = (eff_stats["#Out-Tok. Av."] / 1_000_000.0) * COST_OUT_PER_MTOK

display(eff_stats.round({
    "#Steps Av.": 1, "#Steps SD": 1,
    "Time (s) Av.": 2, "Time (s) SD": 2,
    "#In-Tok. Av.": 0, "#In-Tok. SD": 0,
    "#Out-Tok. Av.": 0, "#Out-Tok. SD": 0,
    "Cost In ($)": 4, "Cost Out ($)": 4,
}))

Unnamed: 0,app,#Steps Av.,#Steps SD,Time (s) Av.,Time (s) SD,#In-Tok. Av.,#In-Tok. SD,#Out-Tok. Av.,#Out-Tok. SD,Cost In ($),Cost Out ($)
0,1,25.1,17.4,335.98,291.23,229776.0,165507.0,2394.0,1323.0,0.6893,0.0287
1,2,21.1,14.9,322.33,316.01,189710.0,127758.0,1984.0,817.0,0.5691,0.0238
2,3,19.5,16.6,223.14,240.53,174555.0,149196.0,2002.0,1020.0,0.5237,0.024
3,4,22.3,12.4,293.44,182.27,202726.0,107710.0,2115.0,822.0,0.6082,0.0254
4,5,34.2,19.8,412.87,266.67,311923.0,187972.0,2850.0,1399.0,0.9358,0.0342


# 5. Combined Results Table

In [35]:
req_df = wide.copy()
req_df.columns = [c if c == "app" else f"{c} (Req)" for c in req_df.columns]

ac_df = ac_metrics_by_app.copy()
ac_df.columns = [c if c == "app" else f"{c} (AC)" for c in ac_df.columns]

final = (
    req_df.merge(ac_df, on="app", how="inner")
          .merge(eff_stats, on="app", how="inner")
)

print("Rows:", len(final), "Cols:", len(final.columns))
display(final.round(3))

Rows: 5 Cols: 26


Unnamed: 0,app,met_P (Req),met_R (Req),met_F1 (Req),unmet_P (Req),unmet_R (Req),unmet_F1 (Req),partially_met_P (Req),partially_met_R (Req),partially_met_F1 (Req),met_P (AC),met_R (AC),met_F1 (AC),unmet_P (AC),unmet_R (AC),unmet_F1 (AC),#Steps Av.,#Steps SD,Time (s) Av.,Time (s) SD,#In-Tok. Av.,#In-Tok. SD,#Out-Tok. Av.,#Out-Tok. SD,Cost In ($),Cost Out ($)
0,1,0.789,0.938,0.857,1.0,0.714,0.833,0.5,0.429,0.462,0.908,0.967,0.937,0.92,0.793,0.852,25.067,17.39,335.984,291.226,229776.233,165507.008,2393.6,1322.629,0.689,0.029
1,2,1.0,0.944,0.971,1.0,1.0,1.0,0.833,1.0,0.909,0.968,0.984,0.976,0.958,0.92,0.939,21.138,14.901,322.326,316.009,189710.345,127758.364,1984.31,816.81,0.569,0.024
2,3,0.933,0.824,0.875,0.833,0.833,0.833,0.625,0.833,0.714,0.931,0.9,0.915,0.793,0.852,0.821,19.533,16.602,223.141,240.531,174555.067,149196.392,2002.433,1019.978,0.524,0.024
3,4,1.0,0.824,0.903,0.75,0.857,0.8,0.5,0.667,0.571,0.946,0.898,0.922,0.824,0.903,0.862,22.267,12.396,293.441,182.266,202726.333,107709.927,2114.733,822.488,0.608,0.025
4,5,0.85,1.0,0.919,1.0,0.857,0.923,0.667,0.4,0.5,0.909,1.0,0.952,1.0,0.778,0.875,34.172,19.837,412.873,266.665,311922.517,187972.306,2849.724,1398.934,0.936,0.034


In [36]:

import pandas as pd

num_cols = final.select_dtypes(include="number").columns

avg_series = pd.Series(index=final.columns, dtype="object")
avg_series["app"] = "Average"
for c in num_cols:
    avg_series[c] = final[c].mean()

sd_series = pd.Series(index=final.columns, dtype="object")
sd_series["app"] = "SD"
for c in num_cols:
    sd_series[c] = final[c].std(ddof=1)  # sample SD; change to ddof=0 for population SD

final_with_summary = pd.concat([final, pd.DataFrame([avg_series]), pd.DataFrame([sd_series])], ignore_index=True)

display(final_with_summary.round(3))

Unnamed: 0,app,met_P (Req),met_R (Req),met_F1 (Req),unmet_P (Req),unmet_R (Req),unmet_F1 (Req),partially_met_P (Req),partially_met_R (Req),partially_met_F1 (Req),met_P (AC),met_R (AC),met_F1 (AC),unmet_P (AC),unmet_R (AC),unmet_F1 (AC),#Steps Av.,#Steps SD,Time (s) Av.,Time (s) SD,#In-Tok. Av.,#In-Tok. SD,#Out-Tok. Av.,#Out-Tok. SD,Cost In ($),Cost Out ($)
0,01,0.789,0.938,0.857,1.0,0.714,0.833,0.5,0.429,0.462,0.908,0.967,0.937,0.92,0.793,0.852,25.067,17.39,335.984,291.226,229776.233,165507.008,2393.6,1322.629,0.689,0.029
1,02,1.0,0.944,0.971,1.0,1.0,1.0,0.833,1.0,0.909,0.968,0.984,0.976,0.958,0.92,0.939,21.138,14.901,322.326,316.009,189710.345,127758.364,1984.31,816.81,0.569,0.024
2,03,0.933,0.824,0.875,0.833,0.833,0.833,0.625,0.833,0.714,0.931,0.9,0.915,0.793,0.852,0.821,19.533,16.602,223.141,240.531,174555.067,149196.392,2002.433,1019.978,0.524,0.024
3,04,1.0,0.824,0.903,0.75,0.857,0.8,0.5,0.667,0.571,0.946,0.898,0.922,0.824,0.903,0.862,22.267,12.396,293.441,182.266,202726.333,107709.927,2114.733,822.488,0.608,0.025
4,05,0.85,1.0,0.919,1.0,0.857,0.923,0.667,0.4,0.5,0.909,1.0,0.952,1.0,0.778,0.875,34.172,19.837,412.873,266.665,311922.517,187972.306,2849.724,1398.934,0.936,0.034
5,Average,0.915,0.906,0.905,0.917,0.852,0.878,0.625,0.666,0.631,0.933,0.95,0.94,0.899,0.849,0.87,24.435,16.225,317.553,259.339,221738.099,147628.799,2268.96,1076.168,0.665,0.027
6,SD,0.093,0.079,0.044,0.118,0.102,0.082,0.138,0.258,0.183,0.026,0.048,0.025,0.088,0.064,0.043,5.805,2.782,68.817,51.42,54339.911,31376.89,363.548,273.724,0.163,0.004
