stanfordnlp · DanielUH2019 · Oct 29, 2023 · Oct 29, 2023 · Oct 29, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,17 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+    -   id: check-toml
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.1.3
+    hooks:
+    -   id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+    -   id: ruff-format
+ci:
+    autofix_commit_msg: '[pre-commit.ci] Auto format from pre-commit.com hooks'
+    autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
diff --git a/dsp/__init__.py b/dsp/__init__.py
@@ -40,4 +40,4 @@ def __getattr__(self, name):
 import sys
 sys.modules[__name__] = DspModule()
 
-"""
+"""
diff --git a/dsp/evaluation/utils.py b/dsp/evaluation/utils.py
@@ -1,12 +1,9 @@
-from openai import InvalidRequestError
-from openai.error import APIError
-
 import dsp
 import tqdm
 import pandas as pd
 
 from IPython.display import display
-from dsp.utils import EM, F1, HotPotF1
+from dsp.utils import EM
 
 
 def evaluateRetrieval(fn, dev, metric=None):
@@ -19,17 +16,24 @@ def evaluateRetrieval(fn, dev, metric=None):
         d = dict(example)
 
         # d['prediction'] = prediction.answer
-        d['correct'] =  dsp.passage_match(prediction.context, example.answer)
+        d["correct"] = dsp.passage_match(prediction.context, example.answer)
         data.append(d)
 
     df = pd.DataFrame(data)
 
-    percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
+    percentage = round(100.0 * df["correct"].sum() / len(dev), 1)
     print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
-    df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
+    df["correct"] = df["correct"].apply(lambda x: "✔️" if x else "❌")
 
     pd.options.display.max_colwidth = None
-    display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
+    display(
+        df.style.set_table_styles(
+            [
+                {"selector": "th", "props": [("text-align", "left")]},
+                {"selector": "td", "props": [("text-align", "left")]},
+            ]
+        )
+    )
 
 
 def evaluateAnswer(fn, dev, metric=EM):
@@ -43,19 +47,25 @@ def evaluateAnswer(fn, dev, metric=EM):
 
         pred = prediction.answer
 
-        d['prediction'] = pred
-        d['correct'] = metric(pred, example.answer)
+        d["prediction"] = pred
+        d["correct"] = metric(pred, example.answer)
         data.append(d)
 
     df = pd.DataFrame(data)
 
-    percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
+    percentage = round(100.0 * df["correct"].sum() / len(dev), 1)
     print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
-    df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
+    df["correct"] = df["correct"].apply(lambda x: "✔️" if x else "❌")
 
     pd.options.display.max_colwidth = None
-    display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
-
+    display(
+        df.style.set_table_styles(
+            [
+                {"selector": "th", "props": [("text-align", "left")]},
+                {"selector": "td", "props": [("text-align", "left")]},
+            ]
+        )
+    )
 
 
 def evaluate(fn, dev, metric=EM):
@@ -67,21 +77,26 @@ def evaluate(fn, dev, metric=EM):
 
         d = dict(example)
 
-        pred = prediction#.answer
+        pred = prediction  # .answer
 
-        d['prediction'] = pred
-        d['correct'] = metric(pred, example.answer)
+        d["prediction"] = pred
+        d["correct"] = metric(pred, example.answer)
         data.append(d)
 
     df = pd.DataFrame(data)
 
-    percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
+    percentage = round(100.0 * df["correct"].sum() / len(dev), 1)
     print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
-    df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')
+    df["correct"] = df["correct"].apply(lambda x: "✔️" if x else "❌")
 
     pd.options.display.max_colwidth = None
-    display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))
+    display(
+        df.style.set_table_styles(
+            [
+                {"selector": "th", "props": [("text-align", "left")]},
+                {"selector": "td", "props": [("text-align", "left")]},
+            ]
+        )
+    )
 
     return percentage
-
-
diff --git a/dsp/modules/__init__.py b/dsp/modules/__init__.py
@@ -8,4 +8,4 @@
 from .pyserini import *
 
 from .hf_client import HFClientTGI
-from .hf_client import Anyscale
+from .hf_client import Anyscale
diff --git a/dsp/modules/azurecognitivesearch.py b/dsp/modules/azurecognitivesearch.py
@@ -1,16 +1,18 @@
-from typing import Optional, Union, Any
+from typing import Union, Any
 
 from dsp.utils import dotdict
+
 try:
     from azure.core.credentials import AzureKeyCredential
     from azure.search.documents import SearchClient
     from azure.search.documents._paging import SearchItemPaged
-except ImportError as e:
+except ImportError:
     raise ImportError(
         "You need to install azure-search-documents library"
         "Please use the command: pip install azure-search-documents"
     )
 
+
 class AzureCognitiveSearch:
     """Wrapper for the Azure Congitive Search Retrieval."""
 
@@ -19,51 +21,59 @@ def __init__(
         search_service_name: str,
         search_api_key: str,
         search_index_name: str,
-        field_text: str, # required field to map with "content" field in dsp framework
-        field_score: str, # required field to map with "score" field in dsp framework
-
+        field_text: str,  # required field to map with "content" field in dsp framework
+        field_score: str,  # required field to map with "score" field in dsp framework
     ):
         self.search_service_name = search_service_name
         self.search_api_key = search_api_key
         self.search_index_name = search_index_name
-        self.endpoint=f"https://{self.search_service_name}.search.windows.net"
-        self.field_text = field_text # field name of the text content
-        self.field_score = field_score # field name of the search score
+        self.endpoint = f"https://{self.search_service_name}.search.windows.net"
+        self.field_text = field_text  # field name of the text content
+        self.field_score = field_score  # field name of the search score
         # Create a client
         self.credential = AzureKeyCredential(self.search_api_key)
-        self.client = SearchClient(endpoint=self.endpoint,
-                        index_name=self.search_index_name,
-                        credential=self.credential)
+        self.client = SearchClient(
+            endpoint=self.endpoint,
+            index_name=self.search_index_name,
+            credential=self.credential,
+        )
 
     def __call__(self, query: str, k: int = 10) -> Union[list[str], list[dotdict]]:
-
-        topk: list[dict[str, Any]] = azure_search_request(self.field_text, self.field_score, self.client, query, k)
-        topk = [{**d, "long_text": d["text"]} for d in topk]            
+        topk: list[dict[str, Any]] = azure_search_request(
+            self.field_text, self.field_score, self.client, query, k
+        )
+        topk = [{**d, "long_text": d["text"]} for d in topk]
 
         return [dotdict(psg) for psg in topk]
 
-def azure_search_request(key_content: str, key_score: str,  client: SearchClient, query: str, top: int =1):
-    '''
+
+def azure_search_request(
+    key_content: str, key_score: str, client: SearchClient, query: str, top: int = 1
+):
+    """
     Search in Azure Conginitve Search Index
-    '''
-    results = client.search(search_text=query,top=top)
+    """
+    results = client.search(search_text=query, top=top)
     results = process_azure_result(results, key_content, key_content)
 
     return results
 
-def process_azure_result(results:SearchItemPaged, content_key:str, content_score: str):
-    '''
+
+def process_azure_result(
+    results: SearchItemPaged, content_key: str, content_score: str
+):
+    """
     process received result from Azure cognitive search as dictionary array and map content and score to correct format
-    '''
+    """
     res = []
     for result in results:
         tmp = {}
         for key, value in result.items():
-            if(key == content_key):
-                tmp["text"] = value # assign content
-            elif(key == content_score):
+            if key == content_key:
+                tmp["text"] = value  # assign content
+            elif key == content_score:
                 tmp["score"] = value
             else:
-                tmp[key] = value            
+                tmp[key] = value
         res.append(tmp)
-    return res 
+    return res
diff --git a/dsp/modules/cache_utils.py b/dsp/modules/cache_utils.py
@@ -24,10 +24,12 @@ def wrapper(*args, **kwargs):
         return decorator
 
 
-cachedir = os.environ.get('DSP_CACHEDIR') or os.path.join(Path.home(), 'cachedir_joblib')
+cachedir = os.environ.get("DSP_CACHEDIR") or os.path.join(
+    Path.home(), "cachedir_joblib"
+)
 CacheMemory = Memory(location=cachedir, verbose=0)
 
-cachedir2 = os.environ.get('DSP_NOTEBOOK_CACHEDIR')
+cachedir2 = os.environ.get("DSP_NOTEBOOK_CACHEDIR")
 NotebookCacheMemory = dotdict()
 NotebookCacheMemory.cache = noop_decorator
 

diff --git a/dsp/modules/cohere.py b/dsp/modules/cohere.py
@@ -6,6 +6,7 @@
 
 try:
     import cohere
+
     cohere_api_error = cohere.CohereAPIError
 except ImportError:
     cohere_api_error = Exception
@@ -106,7 +107,7 @@ def __call__(
         prompt: str,
         only_completed: bool = True,
         return_sorted: bool = False,
-        **kwargs
+        **kwargs,
     ):
         assert only_completed, "for now"
         assert return_sorted is False, "for now"

diff --git a/dsp/modules/finetuning/__init__.py b/dsp/modules/finetuning/__init__.py
@@ -1 +1 @@
-from .finetune_hf import *
+from .finetune_hf import *
-Original file line number
+Diff line change
@@ Expand Up / @@ -40,4 +40,4 @@ def __getattr__(self, name): @@
     import sys
     sys.modules[__name__] = DspModule()
-    """
+    """
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from .finetune_hf import *
		from .finetune_hf import *