Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📄 URL evals #10

Merged
merged 8 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions bananalyzer/data/examples.json
Original file line number Diff line number Diff line change
Expand Up @@ -2648,5 +2648,155 @@
}
}
]
},
{
"id": "cb25b8d2-1ac6-43d5-930f-70edeccf34f9",
"url": "https://www.hackensackmeridianhealth.org/en/location-search#sort=%40locationname%20ascending&f:@locationtype=[Hospitals%20and%20Emergency%20Departments]",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.hackensackmeridianhealth.org/en/location-search#first=10&sort=%40locationname%20ascending&f:@locationtype=[Hospitals%20and%20Emergency%20Departments]"
}
]
},
{
"id": "2ac79ff2-a923-49df-9e66-5ce7f9d5d0e4",
"url": "https://www.uchicagomedicine.org/find-a-location?page=5&sortby=default",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.uchicagomedicine.org/find-a-location?page=6&sortby=default"
}
]
},
{
"id": "d76159ad-f62a-49e1-9896-df5ef91dcb5f",
"url": "https://www.adventisthealthcare.com/locations/results/?searchId=7402b4ad-5a84-ee11-a85e-000d3a611ea2&sort=13&page=5",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.adventisthealthcare.com/locations/results/?searchId=7402b4ad-5a84-ee11-a85e-000d3a611ea2&sort=13&page=7"
}
]
},
{
"id": "17c411ab-5586-4cd7-940c-514959796b4f",
"url": "https://www.providence.org/locations?lookup=25d90ac1a9024e7298afb10aff38bd76&lookupvalue=Hospitals&postal=V3Z&latlng=49.1049,-122.824&page=4",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.providence.org/locations?lookup=25d90ac1a9024e7298afb10aff38bd76&lookupvalue=Hospitals&postal=V3Z&latlng=49.1049,-122.824&page=6"
}
]
},
{
"id": "6d85220a-b86e-4b85-96c8-0fb94447cf29",
"url": "https://www.baptistfirst.org/find-a-location/?page=4&count=10",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.baptistfirst.org/find-a-location/?page=6&count=10"
}
]
},
{
"id": "6b9054b1-c67c-46cc-bd67-7dfd29a9ebf1",
"url": "https://www.mhs.net/locations?listPage=2",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.mhs.net/locations?listPage=4"
}
]
},
{
"id": "a316a94d-7ad6-4385-a2e0-4b47e1f1a661",
"url": "https://whs.org/locations-practices/search-results/",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://whs.org/locations-practices/search-results/?sf_paged=3"
}
]
},
{
"id": "8fc4b97d-f823-4c31-b50f-ad42671fe2b1",
"url": "https://www.shrinerschildrens.org/en/locations?listPage=1",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.shrinerschildrens.org/en/locations?listPage=4"
}
]
},
{
"id": "b572a5e4-717b-41ab-86e3-ec41cac368b3",
"url": "https://www.sih.net/locations?page=1&perpage=25&q=&serv=&dist=&zip=&sort=Ascending&view=list",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.sih.net/locations?page=4&perpage=25&q=&serv=&dist=&zip=&sort=Ascending&view=list"
}
]
},
{
"id": "b2a98bed-bce2-404a-9b6b-ff27d28a6c1f",
"url": "https://www.mercyone.org/find-a-location/locations-results?LocationText=&page=54&count=9",
"source": "hosted",
"domain": "healthcare",
"subdomain": "contact",
"type": "navigate",
"goal": "You are currently on a listing page. Use pagination controls to travel to the last available listing page if possible ",
"evals": [
{
"type": "end_url_match",
"expected": "https://www.mercyone.org/find-a-location/locations-results?LocationText=&page=58&count=9"
}
]
}
]
58 changes: 27 additions & 31 deletions bananalyzer/data/schemas.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Literal, Optional, Union

import pytest
from deepdiff import DeepDiff
from playwright.async_api import Page
from pydantic import BaseModel, Field, model_validator

from bananalyzer.data.fetch_schemas import fetch_schemas
Expand All @@ -18,49 +18,45 @@
]


class Eval(BaseModel, ABC):
class Eval(BaseModel):
"""
Base class for all evals. Evals are used to determine if an action or result is correct
"""

@abstractmethod
type: Literal["json_match", "end_url_match"] = "json_match"
expected: Union[Dict[str, Any], List[str], str]

def eval_action(self, action: str) -> bool:
"""
We don't care for action level evals at the moment
"""
raise NotImplementedError("eval_action not implemented")

@abstractmethod
def eval_result(self, result: str) -> bool:
raise NotImplementedError("eval_result not implemented")


class JSONEval(BaseModel):
type: Literal["json_match"] = Field(default="json_match")
expected: Union[Dict[str, Any], List[str]]

def eval_action(self, _: str) -> bool:
# We don't care about action level evaluations
return True
def eval_results(self, page: Page, result: Dict[str, Any]) -> None:
if self.type == "json_match":
diff = DeepDiff(
self.expected, result, ignore_order=True, report_repetition=True
)
if diff:
# Pretty print both expected and actual results
pretty_expected = json.dumps(self.expected, indent=4)
pretty_actual = json.dumps(result, indent=4)

def eval_results(self, result: Dict[str, Any]) -> None:
diff = DeepDiff(
self.expected, result, ignore_order=True, report_repetition=True
)
if diff:
# Pretty print both expected and actual results
pretty_expected = json.dumps(self.expected, indent=4)
pretty_actual = json.dumps(result, indent=4)
diff_msg = f"Actual: {pretty_actual}\nExpected: {pretty_expected}"
pytest.fail(f"JSONEval mismatch!\n{diff_msg}")

diff_msg = f"Actual: {pretty_actual}\nExpected: {pretty_expected}"
pytest.fail(f"JSONEval mismatch!\n{diff_msg}")


class ActionEval(BaseModel):
actions: Dict[str, str]
elif self.type == "end_url_match":
if page.url != self.expected:
diff_msg = f"Actual URL:\t{page.url}\nExpected URL:\t{self.expected}"
pytest.fail(f"URLEval mismatch!\n{diff_msg}")


class Example(BaseModel):
id: str
url: str
source: Literal["mhtml", "hosted"] = Field(description="Source of the website")
source: Literal["mhtml", "hosted", "url"] = Field(
description="Source of the website"
)
domain: str = Field(description="Domain of the website")
subdomain: str = Field(description="Subdomain of the website")
type: GoalType = Field(
Expand All @@ -75,7 +71,7 @@ class Example(BaseModel):
default=None,
description="If it is a fetch type, we can infer the goal based on this id to avoid large schemas in json",
)
evals: List[Union[JSONEval]] = Field(
evals: List[Eval] = Field(
description="Various evaluations to test for within the example"
)

Expand Down
2 changes: 1 addition & 1 deletion bananalyzer/runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async def test_{example.id.replace("-", "_")}() -> None:
# The agent is imported into the global context prior to this call
result = await agent.run(context, example)
for curr_eval in example.evals:
curr_eval.eval_results(result)
curr_eval.eval_results(p, result)
""",
example=example,
)
Expand Down
2 changes: 1 addition & 1 deletion bananalyzer/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, List
from typing import List, Optional

from pydantic import BaseModel, Field

Expand Down
2 changes: 1 addition & 1 deletion bananalyzer/test_bananalyzer_agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Test bananalyzer.py file to ensure CLI is working as expected within the project
Test bananalyzer file to ensure CLI is working as expected within the project
"""
from bananalyzer.runner.null_agent_wrapper import NullAgentRunner

Expand Down
40 changes: 18 additions & 22 deletions fetch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@
],
"source": [
"# Capture page content as MHTML\n",
"result = await client.send('Page.captureSnapshot', {'format': 'mhtml'})\n",
"mhtml = result['data']\n",
"result = await client.send(\"Page.captureSnapshot\", {\"format\": \"mhtml\"})\n",
"mhtml = result[\"data\"]\n",
"\n",
"# Create example values\n",
"# values = {\n",
Expand All @@ -117,43 +117,38 @@
" \"subdomain\": \"contact\",\n",
" \"type\": \"links\",\n",
" \"goal\": \"Fetch all of the links to the detail pages of clinics on the current page\",\n",
" \"evals\": [\n",
" {\n",
" \"type\": \"json_match\",\n",
" \"expected\": {}\n",
" }\n",
" ]\n",
" }\n",
" \"evals\": [{\"type\": \"json_match\", \"expected\": {}}],\n",
"}\n",
"\n",
"# Write MHTML content to the specified file\n",
"folder_path = f\"./bananalyzer/data/{values['id']}\"\n",
"folder_path = f\"./bananalyzer/data/{values['id']}\"\n",
"os.makedirs(folder_path, exist_ok=True)\n",
"file_path = os.path.join(folder_path, \"index.mhtml\")\n",
"with open(file_path, 'w') as f:\n",
"with open(file_path, \"w\") as f:\n",
" f.write(mhtml)\n",
"\n",
"# Print the path to the console\n",
"print(f'Page saved as MHTML at: {file_path}')\n",
"print(f\"Page saved as MHTML at: {file_path}\")\n",
"file_path = os.path.join(folder_path, \"index.mhtml\")\n",
"with open(file_path, 'w') as f:\n",
"with open(file_path, \"w\") as f:\n",
" f.write(mhtml)\n",
"\n",
"# Print the path to the console\n",
"print(f'Page saved as MHTML at: {file_path}')\n",
"print(f\"Page saved as MHTML at: {file_path}\")\n",
"\n",
"# Save values to JSON file\n",
"import json\n",
"\n",
"json_file_path = \"./bananalyzer/data/examples.json\"\n",
"with open(json_file_path, 'r') as json_file:\n",
"with open(json_file_path, \"r\") as json_file:\n",
" data = json.load(json_file)\n",
"\n",
"data.append(values)\n",
"\n",
"with open(json_file_path, 'w') as json_file:\n",
"with open(json_file_path, \"w\") as json_file:\n",
" json.dump(data, json_file, indent=4)\n",
" json_file.write('\\n')\n",
"# print(json.dumps(data, indent=4))\n"
" json_file.write(\"\\n\")\n",
"# print(json.dumps(data, indent=4))"
]
},
{
Expand All @@ -164,21 +159,22 @@
"source": [
"import os\n",
"import shutil\n",
"\n",
"\"\"\"\n",
"Utility to delete extra mhtml folders\n",
"\"\"\"\n",
"# Read in the examples.json and fetch all of the ids\n",
"with open(json_file_path, 'r') as json_file:\n",
"with open(json_file_path, \"r\") as json_file:\n",
" data = json.load(json_file)\n",
"ids = [item['id'] for item in data]\n",
"ids = [item[\"id\"] for item in data]\n",
"\n",
"# Iterate through the folders\n",
"base_folder = './bananalyzer/data/'\n",
"base_folder = \"./bananalyzer/data/\"\n",
"for folder in os.listdir(base_folder):\n",
" folder_path = os.path.join(base_folder, folder)\n",
" if folder not in ids and os.path.isdir(folder_path):\n",
" # If there is an id that is not present and the path is a directory, delete that folder\n",
" shutil.rmtree(folder_path)\n"
" shutil.rmtree(folder_path)"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pydantic = "^2.4.2"
pytest-asyncio = "^0.21.1"
deepdiff = "^6.7.0"
pytest-xdist = "^3.4.0"
black = {extras = ["jupyter"], version = "^23.11.0"}

[tool.poetry.group.test.dependencies]
pytest = "^7.4.2"
Expand Down
Loading