diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8ffcd4d..8cf0b68 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,6 +25,7 @@ jobs: TEST_TEXT_EMBEDDINGS_MODEL: ${{ secrets.TEST_TEXT_EMBEDDINGS_MODEL }} TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ secrets.TEST_MULTIMODAL_EMBEDDINGS_MODEL }} TEST_VISION_MODEL: ${{ secrets.TEST_VISION_MODEL }} + TEST_RERANK_MODEL: ${{ secrets.TEST_RERANK_MODEL }} - name: To PyPI using Flit uses: AsifArmanRahman/to-pypi-using-flit@v1 diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index fc1c6ad..25bf448 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -28,4 +28,5 @@ jobs: TEST_MODEL_NAME: ${{ secrets.TEST_MODEL_NAME }} TEST_TEXT_EMBEDDINGS_MODEL: ${{ secrets.TEST_TEXT_EMBEDDINGS_MODEL }} TEST_MULTIMODAL_EMBEDDINGS_MODEL: ${{ secrets.TEST_MULTIMODAL_EMBEDDINGS_MODEL }} - TEST_VISION_MODEL: ${{ secrets.TEST_VISION_MODEL }} \ No newline at end of file + TEST_VISION_MODEL: ${{ secrets.TEST_VISION_MODEL }} + TEST_RERANK_MODEL: ${{ secrets.TEST_RERANK_MODEL }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 62c36a5..7c268b0 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,6 @@ venv.bak/ # mypy .mypy_cache/ +# JetBrains Folder +.idea + diff --git a/predictionguard/client.py b/predictionguard/client.py index a1e44f3..c4762de 100644 --- a/predictionguard/client.py +++ b/predictionguard/client.py @@ -6,6 +6,7 @@ from .src.chat import Chat from .src.completions import Completions from .src.embeddings import Embeddings +from .src.rerank import Rerank from .src.tokenize import Tokenize from .src.translate import Translate from .src.factuality import Factuality @@ -16,8 +17,9 @@ from .version import __version__ __all__ = [ - "PredictionGuard", "Chat", "Completions", "Embeddings", "Tokenize", - "Translate", "Factuality", "Toxicity", "Pii", "Injection", "Models" + "PredictionGuard", "Chat", "Completions", "Embeddings", "Rerank", + "Tokenize", "Translate", "Factuality", "Toxicity", "Pii", "Injection", + "Models" ] class PredictionGuard: @@ -63,6 +65,9 @@ def __init__( self.embeddings: Embeddings = Embeddings(self.api_key, self.url) """Embedding generates chat completions based on a conversation history.""" + self.rerank: Rerank = Rerank(self.api_key, self.url) + """Rerank sorts text inputs by semantic relevance to a specified query.""" + self.translate: Translate = Translate(self.api_key, self.url) """Translate converts text from one language to another.""" diff --git a/predictionguard/src/rerank.py b/predictionguard/src/rerank.py new file mode 100644 index 0000000..9853018 --- /dev/null +++ b/predictionguard/src/rerank.py @@ -0,0 +1,119 @@ +import json + +import requests +from typing import Any, Dict, List, Optional + +from ..version import __version__ + + +class Rerank: + """Rerank sorts text inputs by semantic relevance to a specified query. + + Usage:: + + import os + import json + + from predictionguard import PredictionGuard + + # Set your Prediction Guard token as an environmental variable. + os.environ["PREDICTIONGUARD_API_KEY"] = "" + + client = PredictionGuard() + + response = client.rerank.create( + model="bge-reranker-v2-m3", + query="What is Deep Learning?", + documents=[ + "Deep Learning is pizza.", + "Deep Learning is not pizza." + ], + return_documents=True + ) + + print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": "))) + """ + + + def __init__(self, api_key, url): + self.api_key = api_key + self.url = url + + def create( + self, + model: str, + query: str, + documents: List[str], + return_documents: Optional[bool] = True + ) -> Dict[str, Any]: + """ + Creates a rerank request in the Prediction Guard /rerank API. + + :param model: The model to use for reranking. + :param query: The query to rank against. + :param documents: The documents to rank. + :param return_documents: Whether to return documents with score. + :return: A dictionary containing the tokens and token metadata. + """ + + # Run _create_rerank + choices = self._create_rerank(model, query, documents, return_documents) + return choices + + def _create_rerank(self, model, query, documents, return_documents): + """ + Function to rank text. + """ + + headers = { + "Content-Type": "application/json", + "Authorization": "Bearer " + self.api_key, + "User-Agent": "Prediction Guard Python Client: " + __version__, + } + + payload = { + "model": model, + "query": query, + "documents": documents, + "return_documents": return_documents + } + + payload = json.dumps(payload) + + response = requests.request( + "POST", self.url + "/rerank", headers=headers, data=payload + ) + + if response.status_code == 200: + ret = response.json() + return ret + elif response.status_code == 429: + raise ValueError( + "Could not connect to Prediction Guard API. " + "Too many requests, rate limit or quota exceeded." + ) + else: + # Check if there is a json body in the response. Read that in, + # print out the error field in the json body, and raise an exception. + err = "" + try: + err = response.json()["error"] + except Exception: + pass + raise ValueError("Could not rank documents. " + err) + + def list_models(self): + # Get the list of current models. + headers = { + "Content-Type": "application/json", + "Authorization": "Bearer " + self.api_key, + "User-Agent": "Prediction Guard Python Client: " + __version__ + } + + response = requests.request("GET", self.url + "/models/rerank", headers=headers) + + response_list = [] + for model in response.json()["data"]: + response_list.append(model["id"]) + + return response_list diff --git a/predictionguard/src/tokenize.py b/predictionguard/src/tokenize.py index 919422c..4c22e40 100644 --- a/predictionguard/src/tokenize.py +++ b/predictionguard/src/tokenize.py @@ -36,7 +36,7 @@ def __init__(self, api_key, url): def create(self, model: str, input: str) -> Dict[str, Any]: """ - Creates a prompt injection check request in the Prediction Guard /injection API. + Creates a tokenization request in the Prediction Guard /tokenize API. :param model: The model to use for generating tokens. :param input: The text to convert into tokens. @@ -49,7 +49,7 @@ def create(self, model: str, input: str) -> Dict[str, Any]: "Model %s is not supported by this endpoint." % model ) - # Run _check_injection + # Run _create_tokens choices = self._create_tokens(model, input) return choices diff --git a/predictionguard/version.py b/predictionguard/version.py index ae78e2b..33c738e 100644 --- a/predictionguard/version.py +++ b/predictionguard/version.py @@ -1,2 +1,2 @@ # Setting the package version -__version__ = "2.6.0" +__version__ = "2.7.0" diff --git a/tests/test_rerank.py b/tests/test_rerank.py new file mode 100644 index 0000000..f11d305 --- /dev/null +++ b/tests/test_rerank.py @@ -0,0 +1,31 @@ +import os + +from predictionguard import PredictionGuard + + +def test_rerank_create(): + test_client = PredictionGuard() + + response = test_client.rerank.create( + model=os.environ["TEST_RERANK_MODEL"], + query="What is Deep Learning?", + documents=[ + "Deep Learning is pizza.", + "Deep Learning is not pizza." + ], + return_documents=True, + ) + + assert len(response) > 0 + assert type(response["results"][0]["index"]) is int + assert type(response["results"][0]["relevance_score"]) is float + assert type(response["results"][0]["text"]) is str + + +def test_rerank_list(): + test_client = PredictionGuard() + + response = test_client.rerank.list_models() + + assert len(response) > 0 + assert type(response[0]) is str