Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion prompting/rewards/web_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pydantic import BaseModel
from scipy import spatial
from thefuzz import fuzz
from functools import lru_cache

from prompting.datasets.random_website import DDGDataset, DDGDatasetEntry
from prompting.rewards.relevance import RelevanceRewardModel
Expand All @@ -32,6 +33,11 @@ class WebsiteResult(BaseModel):
relevant: str


@lru_cache(maxsize=1000)
def extract_content_with_cache(url: str):
return DDGDataset.extract_website_content(url)


class WebRetrievalRewardModel(RelevanceRewardModel):
def _cosine_similarity(self, content1: str, content2: str) -> float:
"""Calculate the cosine similarity between sentence embeddings of the reference and completions."""
Expand All @@ -46,7 +52,7 @@ def score_website_result(
return 0

# Content scraped from the URL provided in the completion.
reference_website_content = DDGDataset.extract_website_content(response_url)
reference_website_content = extract_content_with_cache(response_url)
if not reference_website_content or len(reference_website_content) == 0:
logger.debug(f"Failed to extract miner's content from website: {response_url}")
return 0
Expand Down
Loading