From dfbd845bfb82f048a99ce49d6918c0dcb524cb84 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 11 Aug 2025 15:58:58 -0500 Subject: [PATCH 1/4] Quick and dirty start on ChatHuggingface() --- chatlas/__init__.py | 3 +- chatlas/_provider_huggingface.py | 152 +++++++++++++++++++++++++++++ tests/test_provider_huggingface.py | 117 ++++++++++++++++++++++ 3 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 chatlas/_provider_huggingface.py create mode 100644 tests/test_provider_huggingface.py diff --git a/chatlas/__init__.py b/chatlas/__init__.py index a60a5636..133fd13c 100644 --- a/chatlas/__init__.py +++ b/chatlas/__init__.py @@ -11,6 +11,7 @@ from ._provider_github import ChatGithub from ._provider_google import ChatGoogle, ChatVertex from ._provider_groq import ChatGroq +from ._provider_huggingface import ChatHuggingFace from ._provider_ollama import ChatOllama from ._provider_openai import ChatAzureOpenAI, ChatOpenAI from ._provider_perplexity import ChatPerplexity @@ -33,6 +34,7 @@ "ChatGithub", "ChatGoogle", "ChatGroq", + "ChatHuggingFace", "ChatOllama", "ChatOpenAI", "ChatAzureOpenAI", @@ -58,4 +60,3 @@ "Turn", "types", ) - diff --git a/chatlas/_provider_huggingface.py b/chatlas/_provider_huggingface.py new file mode 100644 index 00000000..15128336 --- /dev/null +++ b/chatlas/_provider_huggingface.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, Optional + +from ._chat import Chat +from ._logging import log_model_default +from ._provider_openai import OpenAIProvider + +if TYPE_CHECKING: + from openai.types.chat import ChatCompletion + + from .types.openai import ChatClientArgs, SubmitInputArgs + + +def ChatHuggingFace( + *, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + api_key: Optional[str] = None, + kwargs: Optional["ChatClientArgs"] = None, +) -> Chat["SubmitInputArgs", ChatCompletion]: + """ + Chat with a model hosted on Hugging Face Inference API. + + [Hugging Face](https://huggingface.co/) hosts a variety of open-source + and proprietary AI models available via their Inference API. + To use the Hugging Face API, you must have an Access Token, which you can obtain + from your [Hugging Face account](https://huggingface.co/settings/tokens). + Ensure that at least "Make calls to Inference Providers" and + "Make calls to your Inference Endpoints" is checked. + + This function is a lightweight wrapper around ChatOpenAI(), with + the defaults adjusted for Hugging Face. + + ## Known limitations + + * Some models do not support the chat interface or parts of it, for example + `google/gemma-2-2b-it` does not support a system prompt. You will need to + carefully choose the model. + * Tool calling support varies by model - many models do not support it. + + Prerequisites + -------------- + + ::: {.callout-note} + ## API key + + You will need to create a Hugging Face account and generate an API token + from your [account settings](https://huggingface.co/settings/tokens). + Make sure to enable "Make calls to Inference Providers" permission. + ::: + + Examples + -------- + ```python + import os + from chatlas import ChatHuggingFace + + chat = ChatHuggingFace(api_key=os.getenv("HUGGINGFACE_API_KEY")) + chat.chat("What is the capital of France?") + ``` + + Parameters + ---------- + system_prompt + A system prompt to set the behavior of the assistant. + model + The model to use for the chat. The default, None, will pick a reasonable + default, and warn you about it. We strongly recommend explicitly + choosing a model for all but the most casual use. + api_key + The API key to use for authentication. You generally should not supply + this directly, but instead set the `HUGGINGFACE_API_KEY` environment + variable. + kwargs + Additional arguments to pass to the underlying OpenAI client + constructor. + + Returns + ------- + Chat + A chat object that retains the state of the conversation. + + Note + ---- + Pasting an API key into a chat constructor (e.g., `ChatHuggingFace(api_key="...")`) + is the simplest way to get started, and is fine for interactive use, but is + problematic for code that may be shared with others. + + Instead, consider using environment variables or a configuration file to manage + your credentials. One popular way to manage credentials is to use a `.env` file + to store your credentials, and then use the `python-dotenv` package to load them + into your environment. + + ```shell + pip install python-dotenv + ``` + + ```shell + # .env + HUGGINGFACE_API_KEY=... + ``` + + ```python + from chatlas import ChatHuggingFace + from dotenv import load_dotenv + + load_dotenv() + chat = ChatHuggingFace() + chat.console() + ``` + + Another, more general, solution is to load your environment variables into the shell + before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file): + + ```shell + export HUGGINGFACE_API_KEY=... + ``` + """ + if api_key is None: + api_key = os.getenv("HUGGINGFACE_API_KEY") + + if model is None: + model = log_model_default("meta-llama/Llama-3.1-8B-Instruct") + + return Chat( + provider=HuggingFaceProvider( + api_key=api_key, + model=model, + kwargs=kwargs, + ), + system_prompt=system_prompt, + ) + + +class HuggingFaceProvider(OpenAIProvider): + def __init__( + self, + *, + api_key: Optional[str] = None, + model: str, + kwargs: Optional["ChatClientArgs"] = None, + ): + # https://huggingface.co/docs/inference-providers/en/index?python-clients=requests#http--curl + super().__init__( + name="HuggingFace", + model=model, + api_key=api_key, + base_url="https://router.huggingface.co/v1", + kwargs=kwargs, + ) diff --git a/tests/test_provider_huggingface.py b/tests/test_provider_huggingface.py new file mode 100644 index 00000000..29d9205f --- /dev/null +++ b/tests/test_provider_huggingface.py @@ -0,0 +1,117 @@ +import os +import pytest +from chatlas import ChatHuggingFace + +from .conftest import ( + assert_data_extraction, + assert_images_inline, + assert_images_remote, + assert_tools_async, + assert_tools_parallel, + assert_tools_sequential, + assert_tools_simple, + assert_tools_simple_stream_content, + assert_turns_existing, + assert_turns_system, +) + + +def test_huggingface_simple_request(): + chat = ChatHuggingFace( + system_prompt="Be as terse as possible; no punctuation", + model="meta-llama/Llama-3.1-8B-Instruct", + ) + chat.chat("What is 1 + 1?") + turn = chat.get_last_turn() + assert turn is not None + assert turn.tokens is not None + assert len(turn.tokens) == 3 + assert turn.tokens[0] > 0 # input tokens + assert turn.tokens[1] > 0 # output tokens + assert turn.finish_reason == "stop" + + +@pytest.mark.asyncio +async def test_huggingface_simple_streaming_request(): + chat = ChatHuggingFace( + system_prompt="Be as terse as possible; no punctuation", + model="meta-llama/Llama-3.1-8B-Instruct", + ) + res = [] + async for x in await chat.stream_async("What is 1 + 1?"): + res.append(x) + assert "2" in "".join(res) + turn = chat.get_last_turn() + assert turn is not None + assert turn.finish_reason == "stop" + + +def test_huggingface_respects_turns_interface(): + chat_fun = ChatHuggingFace + assert_turns_system(chat_fun) + assert_turns_existing(chat_fun) + + +def test_huggingface_tools(): + chat_fun = lambda **kwargs: ChatHuggingFace( + model="meta-llama/Llama-3.1-8B-Instruct", **kwargs + ) + assert_tools_simple(chat_fun) + assert_tools_sequential(chat_fun) + assert_tools_parallel(chat_fun) + assert_tools_simple_stream_content(chat_fun) + + +@pytest.mark.asyncio +async def test_huggingface_tools_async(): + chat_fun = lambda **kwargs: ChatHuggingFace( + model="meta-llama/Llama-3.1-8B-Instruct", **kwargs + ) + await assert_tools_async(chat_fun) + + +def test_huggingface_data_extraction(): + chat_fun = lambda **kwargs: ChatHuggingFace( + model="meta-llama/Llama-3.1-8B-Instruct", **kwargs + ) + assert_data_extraction(chat_fun) + + +def test_huggingface_images(): + # Use a vision model that supports images + chat_fun = lambda **kwargs: ChatHuggingFace( + model="Qwen/Qwen2.5-VL-7B-Instruct", **kwargs + ) + assert_images_inline(chat_fun) + assert_images_remote(chat_fun) + + +def test_huggingface_api_key_from_env(): + # Test that API key is read from environment + original_key = os.environ.get("HUGGINGFACE_API_KEY") + test_key = "test_key_123" + + try: + os.environ["HUGGINGFACE_API_KEY"] = test_key + chat = ChatHuggingFace() + assert chat.provider._client.api_key == test_key + finally: + if original_key is not None: + os.environ["HUGGINGFACE_API_KEY"] = original_key + elif "HUGGINGFACE_API_KEY" in os.environ: + del os.environ["HUGGINGFACE_API_KEY"] + + +def test_huggingface_custom_model(): + chat = ChatHuggingFace(model="microsoft/DialoGPT-medium") + assert chat.provider.model == "microsoft/DialoGPT-medium" + + +def test_huggingface_base_url(): + chat = ChatHuggingFace() + assert "huggingface.co" in str(chat.provider._client.base_url) + + +def test_huggingface_provider_name(): + chat = ChatHuggingFace() + assert chat.provider.name == "HuggingFace" \ No newline at end of file From 4ec665e3d49ceb20e64aada0d314a1ceb917041d Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 11 Aug 2025 16:43:06 -0500 Subject: [PATCH 2/4] Add to API reference --- .github/workflows/test.yml | 1 + docs/_quarto.yml | 1 + docs/get-started/models.qmd | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 180d6eb9..9524c8c2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,6 +32,7 @@ jobs: DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} PORTKEY_API_KEY: ${{ secrets.PORTKEY_API_KEY }} + HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }} # Free tier of Google is rate limited, so we only test on 3.12 TEST_GOOGLE: ${{ matrix.config.test_google }} # Free tier of Azure is rate limited, so we only test on 3.12 diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 307ea253..2c7bc351 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -120,6 +120,7 @@ quartodoc: - ChatGithub - ChatGoogle - ChatGroq + - ChatHuggingFace - ChatOllama - ChatOpenAI - ChatPerplexity diff --git a/docs/get-started/models.qmd b/docs/get-started/models.qmd index f6421b2d..9c011d1f 100644 --- a/docs/get-started/models.qmd +++ b/docs/get-started/models.qmd @@ -20,6 +20,7 @@ To see the pre-requisites for a given provider, visit the relevant usage page in | GitHub model marketplace | [`ChatGithub()`](../reference/ChatGithub.qmd) | | | Google (Gemini) | [`ChatGoogle()`](../reference/ChatGoogle.qmd) | | | Groq | [`ChatGroq()`](../reference/ChatGroq.qmd) | | +| Hugging Face | [`ChatHuggingFace()`](../reference/ChatHuggingFace.qmd) | | | Ollama local models | [`ChatOllama()`](../reference/ChatOllama.qmd) | | | OpenAI | [`ChatOpenAI()`](../reference/ChatOpenAI.qmd) | | | perplexity.ai | [`ChatPerplexity()`](../reference/ChatPerplexity.qmd) | | From 7359d63f09ddce237f76e066e847beb35b429a0d Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 11 Aug 2025 17:04:24 -0500 Subject: [PATCH 3/4] Free tier of Hugging Face seems _very_ limited --- .github/workflows/test.yml | 1 - tests/test_provider_huggingface.py | 56 +++++++++++------------------- tests/test_provider_portkey.py | 6 ++++ 3 files changed, 27 insertions(+), 36 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9524c8c2..180d6eb9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,7 +32,6 @@ jobs: DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} PORTKEY_API_KEY: ${{ secrets.PORTKEY_API_KEY }} - HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }} # Free tier of Google is rate limited, so we only test on 3.12 TEST_GOOGLE: ${{ matrix.config.test_google }} # Free tier of Azure is rate limited, so we only test on 3.12 diff --git a/tests/test_provider_huggingface.py b/tests/test_provider_huggingface.py index 29d9205f..f229ac56 100644 --- a/tests/test_provider_huggingface.py +++ b/tests/test_provider_huggingface.py @@ -1,4 +1,5 @@ import os + import pytest from chatlas import ChatHuggingFace @@ -7,14 +8,18 @@ assert_images_inline, assert_images_remote, assert_tools_async, - assert_tools_parallel, - assert_tools_sequential, assert_tools_simple, - assert_tools_simple_stream_content, assert_turns_existing, assert_turns_system, ) +# I think we would need to pay Hugging Face to actually run these tests? +api_key = os.getenv("HUGGINGFACE_API_KEY") +if api_key is None: + pytest.skip( + "HUGGINGFACE_API_KEY is not set; skipping tests", allow_module_level=True + ) + def test_huggingface_simple_request(): chat = ChatHuggingFace( @@ -53,55 +58,36 @@ def test_huggingface_respects_turns_interface(): def test_huggingface_tools(): - chat_fun = lambda **kwargs: ChatHuggingFace( - model="meta-llama/Llama-3.1-8B-Instruct", **kwargs - ) + def chat_fun(**kwargs): + return ChatHuggingFace(model="meta-llama/Llama-3.1-8B-Instruct", **kwargs) + assert_tools_simple(chat_fun) - assert_tools_sequential(chat_fun) - assert_tools_parallel(chat_fun) - assert_tools_simple_stream_content(chat_fun) @pytest.mark.asyncio async def test_huggingface_tools_async(): - chat_fun = lambda **kwargs: ChatHuggingFace( - model="meta-llama/Llama-3.1-8B-Instruct", **kwargs - ) + def chat_fun(**kwargs): + return ChatHuggingFace(model="meta-llama/Llama-3.1-8B-Instruct", **kwargs) + await assert_tools_async(chat_fun) def test_huggingface_data_extraction(): - chat_fun = lambda **kwargs: ChatHuggingFace( - model="meta-llama/Llama-3.1-8B-Instruct", **kwargs - ) + def chat_fun(**kwargs): + return ChatHuggingFace(model="meta-llama/Llama-3.1-8B-Instruct", **kwargs) + assert_data_extraction(chat_fun) def test_huggingface_images(): # Use a vision model that supports images - chat_fun = lambda **kwargs: ChatHuggingFace( - model="Qwen/Qwen2.5-VL-7B-Instruct", **kwargs - ) + def chat_fun(**kwargs): + return ChatHuggingFace(model="Qwen/Qwen2.5-VL-7B-Instruct", **kwargs) + assert_images_inline(chat_fun) assert_images_remote(chat_fun) -def test_huggingface_api_key_from_env(): - # Test that API key is read from environment - original_key = os.environ.get("HUGGINGFACE_API_KEY") - test_key = "test_key_123" - - try: - os.environ["HUGGINGFACE_API_KEY"] = test_key - chat = ChatHuggingFace() - assert chat.provider._client.api_key == test_key - finally: - if original_key is not None: - os.environ["HUGGINGFACE_API_KEY"] = original_key - elif "HUGGINGFACE_API_KEY" in os.environ: - del os.environ["HUGGINGFACE_API_KEY"] - - def test_huggingface_custom_model(): chat = ChatHuggingFace(model="microsoft/DialoGPT-medium") assert chat.provider.model == "microsoft/DialoGPT-medium" @@ -114,4 +100,4 @@ def test_huggingface_base_url(): def test_huggingface_provider_name(): chat = ChatHuggingFace() - assert chat.provider.name == "HuggingFace" \ No newline at end of file + assert chat.provider.name == "HuggingFace" diff --git a/tests/test_provider_portkey.py b/tests/test_provider_portkey.py index 96bb7bc0..02f36c89 100644 --- a/tests/test_provider_portkey.py +++ b/tests/test_provider_portkey.py @@ -1,3 +1,5 @@ +import os + import pytest from chatlas import ChatPortkey @@ -14,6 +16,10 @@ assert_turns_system, ) +api_key = os.getenv("PORTKEY_API_KEY") +if api_key is None: + pytest.skip("PORTKEY_API_KEY is not set; skipping tests", allow_module_level=True) + def _chat_portkey_test(**kwargs): model = kwargs.pop("model", "@openai/gpt-4o-mini") From c1bf0057a37f55a63779112cd394a874f5856e39 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 11 Aug 2025 17:10:10 -0500 Subject: [PATCH 4/4] Proofread docstring; update changelog --- CHANGELOG.md | 5 ++++- chatlas/_provider_huggingface.py | 23 +++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a467098d..6aeb4ba2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [UNRELEASED] -* Added a new `ChatPortkey()` which integrates with [Portkey AI](https://portkey.ai/). (#) +### New features + +* Added `ChatHuggingFace()` for chatting via [Hugging Face](https://huggingface.co/). (#144) +* Added `ChatPortkey()` for chatting via [Portkey AI](https://portkey.ai/). (#143) ## [0.9.2] - 2025-08-08 diff --git a/chatlas/_provider_huggingface.py b/chatlas/_provider_huggingface.py index 15128336..4bfcb007 100644 --- a/chatlas/_provider_huggingface.py +++ b/chatlas/_provider_huggingface.py @@ -30,16 +30,6 @@ def ChatHuggingFace( Ensure that at least "Make calls to Inference Providers" and "Make calls to your Inference Endpoints" is checked. - This function is a lightweight wrapper around ChatOpenAI(), with - the defaults adjusted for Hugging Face. - - ## Known limitations - - * Some models do not support the chat interface or parts of it, for example - `google/gemma-2-2b-it` does not support a system prompt. You will need to - carefully choose the model. - * Tool calling support varies by model - many models do not support it. - Prerequisites -------------- @@ -82,6 +72,19 @@ def ChatHuggingFace( Chat A chat object that retains the state of the conversation. + Known limitations + ----------------- + + * Some models do not support the chat interface or parts of it, for example + `google/gemma-2-2b-it` does not support a system prompt. You will need to + carefully choose the model. + * Tool calling support varies by model - many models do not support it. + + Note + ---- + This function is a lightweight wrapper around [](`~chatlas.ChatOpenAI`), with + the defaults tweaked for Hugging Face. + Note ---- Pasting an API key into a chat constructor (e.g., `ChatHuggingFace(api_key="...")`)