Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-toml
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.1.3
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
ci:
autofix_commit_msg: '[pre-commit.ci] Auto format from pre-commit.com hooks'
autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
2 changes: 1 addition & 1 deletion dsp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ def __getattr__(self, name):
import sys
sys.modules[__name__] = DspModule()

"""
"""
59 changes: 37 additions & 22 deletions dsp/evaluation/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from openai import InvalidRequestError
from openai.error import APIError

import dsp
import tqdm
import pandas as pd

from IPython.display import display
from dsp.utils import EM, F1, HotPotF1
from dsp.utils import EM


def evaluateRetrieval(fn, dev, metric=None):
Expand All @@ -19,17 +16,24 @@ def evaluateRetrieval(fn, dev, metric=None):
d = dict(example)

# d['prediction'] = prediction.answer
d['correct'] = dsp.passage_match(prediction.context, example.answer)
d["correct"] = dsp.passage_match(prediction.context, example.answer)
data.append(d)

df = pd.DataFrame(data)

percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
percentage = round(100.0 * df["correct"].sum() / len(dev), 1)
print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
df["correct"] = df["correct"].apply(lambda x: "✔️" if x else "❌")

pd.options.display.max_colwidth = None
display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
display(
df.style.set_table_styles(
[
{"selector": "th", "props": [("text-align", "left")]},
{"selector": "td", "props": [("text-align", "left")]},
]
)
)


def evaluateAnswer(fn, dev, metric=EM):
Expand All @@ -43,19 +47,25 @@ def evaluateAnswer(fn, dev, metric=EM):

pred = prediction.answer

d['prediction'] = pred
d['correct'] = metric(pred, example.answer)
d["prediction"] = pred
d["correct"] = metric(pred, example.answer)
data.append(d)

df = pd.DataFrame(data)

percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
percentage = round(100.0 * df["correct"].sum() / len(dev), 1)
print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
df["correct"] = df["correct"].apply(lambda x: "✔️" if x else "❌")

pd.options.display.max_colwidth = None
display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))

display(
df.style.set_table_styles(
[
{"selector": "th", "props": [("text-align", "left")]},
{"selector": "td", "props": [("text-align", "left")]},
]
)
)


def evaluate(fn, dev, metric=EM):
Expand All @@ -67,21 +77,26 @@ def evaluate(fn, dev, metric=EM):

d = dict(example)

pred = prediction#.answer
pred = prediction # .answer

d['prediction'] = pred
d['correct'] = metric(pred, example.answer)
d["prediction"] = pred
d["correct"] = metric(pred, example.answer)
data.append(d)

df = pd.DataFrame(data)

percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
percentage = round(100.0 * df["correct"].sum() / len(dev), 1)
print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
df["correct"] = df["correct"].apply(lambda x: "✔️" if x else "❌")

pd.options.display.max_colwidth = None
display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
display(
df.style.set_table_styles(
[
{"selector": "th", "props": [("text-align", "left")]},
{"selector": "td", "props": [("text-align", "left")]},
]
)
)

return percentage


2 changes: 1 addition & 1 deletion dsp/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
from .pyserini import *

from .hf_client import HFClientTGI
from .hf_client import Anyscale
from .hf_client import Anyscale
62 changes: 36 additions & 26 deletions dsp/modules/azurecognitivesearch.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from typing import Optional, Union, Any
from typing import Union, Any

from dsp.utils import dotdict

try:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents._paging import SearchItemPaged
except ImportError as e:
except ImportError:
raise ImportError(
"You need to install azure-search-documents library"
"Please use the command: pip install azure-search-documents"
)


class AzureCognitiveSearch:
"""Wrapper for the Azure Congitive Search Retrieval."""

Expand All @@ -19,51 +21,59 @@ def __init__(
search_service_name: str,
search_api_key: str,
search_index_name: str,
field_text: str, # required field to map with "content" field in dsp framework
field_score: str, # required field to map with "score" field in dsp framework

field_text: str, # required field to map with "content" field in dsp framework
field_score: str, # required field to map with "score" field in dsp framework
):
self.search_service_name = search_service_name
self.search_api_key = search_api_key
self.search_index_name = search_index_name
self.endpoint=f"https://{self.search_service_name}.search.windows.net"
self.field_text = field_text # field name of the text content
self.field_score = field_score # field name of the search score
self.endpoint = f"https://{self.search_service_name}.search.windows.net"
self.field_text = field_text # field name of the text content
self.field_score = field_score # field name of the search score
# Create a client
self.credential = AzureKeyCredential(self.search_api_key)
self.client = SearchClient(endpoint=self.endpoint,
index_name=self.search_index_name,
credential=self.credential)
self.client = SearchClient(
endpoint=self.endpoint,
index_name=self.search_index_name,
credential=self.credential,
)

def __call__(self, query: str, k: int = 10) -> Union[list[str], list[dotdict]]:

topk: list[dict[str, Any]] = azure_search_request(self.field_text, self.field_score, self.client, query, k)
topk = [{**d, "long_text": d["text"]} for d in topk]
topk: list[dict[str, Any]] = azure_search_request(
self.field_text, self.field_score, self.client, query, k
)
topk = [{**d, "long_text": d["text"]} for d in topk]

return [dotdict(psg) for psg in topk]

def azure_search_request(key_content: str, key_score: str, client: SearchClient, query: str, top: int =1):
'''

def azure_search_request(
key_content: str, key_score: str, client: SearchClient, query: str, top: int = 1
):
"""
Search in Azure Conginitve Search Index
'''
results = client.search(search_text=query,top=top)
"""
results = client.search(search_text=query, top=top)
results = process_azure_result(results, key_content, key_content)

return results

def process_azure_result(results:SearchItemPaged, content_key:str, content_score: str):
'''

def process_azure_result(
results: SearchItemPaged, content_key: str, content_score: str
):
"""
process received result from Azure cognitive search as dictionary array and map content and score to correct format
'''
"""
res = []
for result in results:
tmp = {}
for key, value in result.items():
if(key == content_key):
tmp["text"] = value # assign content
elif(key == content_score):
if key == content_key:
tmp["text"] = value # assign content
elif key == content_score:
tmp["score"] = value
else:
tmp[key] = value
tmp[key] = value
res.append(tmp)
return res
return res
6 changes: 4 additions & 2 deletions dsp/modules/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ def wrapper(*args, **kwargs):
return decorator


cachedir = os.environ.get('DSP_CACHEDIR') or os.path.join(Path.home(), 'cachedir_joblib')
cachedir = os.environ.get("DSP_CACHEDIR") or os.path.join(
Path.home(), "cachedir_joblib"
)
CacheMemory = Memory(location=cachedir, verbose=0)

cachedir2 = os.environ.get('DSP_NOTEBOOK_CACHEDIR')
cachedir2 = os.environ.get("DSP_NOTEBOOK_CACHEDIR")
NotebookCacheMemory = dotdict()
NotebookCacheMemory.cache = noop_decorator

Expand Down
3 changes: 2 additions & 1 deletion dsp/modules/cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

try:
import cohere

cohere_api_error = cohere.CohereAPIError
except ImportError:
cohere_api_error = Exception
Expand Down Expand Up @@ -106,7 +107,7 @@ def __call__(
prompt: str,
only_completed: bool = True,
return_sorted: bool = False,
**kwargs
**kwargs,
):
assert only_completed, "for now"
assert return_sorted is False, "for now"
Expand Down
2 changes: 1 addition & 1 deletion dsp/modules/finetuning/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .finetune_hf import *
from .finetune_hf import *
Loading