# Custom Clients

In [None]:
import os
from pprint import pprint as pp

os.environ["LOG_LEVEL"] = "debug"

LANGGATE_URL = "http://localhost:4000/api/v1"

# If running the langgate server behind Envoy proxy in docker:
# LANGGATE_URL = "http://localhost:10000/api/v1"

# If running and accessing langgate from within a kubernetes cluster:
# LANGGATE_URL = "http://langgate.ns.svc.cluster.local:10000/api/v1"

## Custom Local Registry Client

In [2]:
# If you wan to set a custom config file:
# os.environ["LANGGATE_CONFIG"] = "some_custom_path/langgate_config.yaml"
# os.environ["LANGGATE_CONFIG"] = "some_custom_path/langgate_config.yaml"

### Example 1: Using the SDK's default local registry client

In [None]:
from langgate.sdk import LangGateLocal

# `LangGateLocal.registry` is a singleton
client = LangGateLocal()
models = await client.list_models()  # returns the default LLMInfo schema list
pp(models[0])

### Example 2: Using the standalone default local registry client
The registry can be installed without the SDK if you do not need to use any other SDK features such as transforming paramaters.
It can be installed separately with:
```bash
uv add langgate[registry]
```
or with pip:
```bash
pip install langgate[registry]
```

In [13]:
from langgate.registry.local import LocalRegistryClient

# The concrete `LocalRegistryClient` class is a singleton
client = LocalRegistryClient()
models = await client.list_models()  # returns the default LLMInfo schema list
pp(models[0])

LLMInfo(id='openai/gpt-4o', name='GPT-4o', provider=ModelProvider(id='openai', name='OpenAI', description=None), description='The GPT-4o (omni) model from OpenAI builds upon the GPT-4 series with improved performance and multimodal capabilities. GPT-4o is great for most tasks.', costs=ModelCost(input_cost_per_token=Decimal('0.0000025'), output_cost_per_token=Decimal('0.00001'), input_cost_per_token_batches=Decimal('0.00000125'), output_cost_per_token_batches=Decimal('0.000005'), cache_read_input_token_cost=Decimal('0.00000125'), input_cost_per_image='0.003613'), capabilities=ModelCapabilities(supports_tools=True, supports_parallel_tool_calls=True, supports_vision=True, supports_audio_input=None, supports_audio_output=None, supports_prompt_caching=True, supports_response_schema=True, supports_system_messages=True, supports_tool_choice=True), context_window=ContextWindow(max_input_tokens=128000, max_output_tokens=16384), updated_dt=datetime.datetime(2025, 4, 2, 21, 27, 1, 339331, tzinfo=

### Example 3: Subclassing BaseLocalRegistryClient with custom schema

In [14]:
from langgate.registry.local import BaseLocalRegistryClient
from langgate.core.models import LLMInfo


class CustomLLMInfo(LLMInfo):
    extra_field: str = ""
    custom_metadata: dict = {}


class CustomLocalRegistryClient(BaseLocalRegistryClient[CustomLLMInfo]):
    """Custom Local registry client with CustomLLMInfo schema."""

    # This is not a singleton unless you implement it as such.


custom_client = CustomLocalRegistryClient()
custom_models = (
    await custom_client.list_models()
)  # Typed and validated as list[CustomLLMInfo]
pp(custom_models[0])

[2m2025-04-02 22:27:39[0m [[32m[1mdebug    [0m] [1mreusing_registry_singleton    [0m [36minitialized[0m=[35mTrue[0m
[2m2025-04-02 22:27:39[0m [[32m[1mdebug    [0m] [1minitialized_base_local_registry_client[0m
[2m2025-04-02 22:27:39[0m [[32m[1mdebug    [0m] [1mrefreshed_model_cache         [0m [36mmodel_count[0m=[35m5[0m
CustomLLMInfo(id='openai/gpt-4o', name='GPT-4o', provider=ModelProvider(id='openai', name='OpenAI', description=None), description='The GPT-4o (omni) model from OpenAI builds upon the GPT-4 series with improved performance and multimodal capabilities. GPT-4o is great for most tasks.', costs=ModelCost(input_cost_per_token=Decimal('0.0000025'), output_cost_per_token=Decimal('0.00001'), input_cost_per_token_batches=Decimal('0.00000125'), output_cost_per_token_batches=Decimal('0.000005'), cache_read_input_token_cost=Decimal('0.00000125'), input_cost_per_image='0.003613'), capabilities=ModelCapabilities(supports_tools=True, supports_parallel_tool

### Example 4: Using the base class directly with type parameter

In [15]:
# This is not a singleton.
local_client_with_custom_schema = BaseLocalRegistryClient[CustomLLMInfo](
    model_info_cls=CustomLLMInfo
)
direct_models = (
    await local_client_with_custom_schema.list_models()
)  # Typed and validated as list[CustomLLMInfo]

pp(direct_models[0])

[2m2025-04-02 22:27:54[0m [[32m[1mdebug    [0m] [1mreusing_registry_singleton    [0m [36minitialized[0m=[35mTrue[0m
[2m2025-04-02 22:27:54[0m [[32m[1mdebug    [0m] [1minitialized_base_local_registry_client[0m
[2m2025-04-02 22:27:54[0m [[32m[1mdebug    [0m] [1mrefreshed_model_cache         [0m [36mmodel_count[0m=[35m5[0m
CustomLLMInfo(id='openai/gpt-4o', name='GPT-4o', provider=ModelProvider(id='openai', name='OpenAI', description=None), description='The GPT-4o (omni) model from OpenAI builds upon the GPT-4 series with improved performance and multimodal capabilities. GPT-4o is great for most tasks.', costs=ModelCost(input_cost_per_token=Decimal('0.0000025'), output_cost_per_token=Decimal('0.00001'), input_cost_per_token_batches=Decimal('0.00000125'), output_cost_per_token_batches=Decimal('0.000005'), cache_read_input_token_cost=Decimal('0.00000125'), input_cost_per_image='0.003613'), capabilities=ModelCapabilities(supports_tools=True, supports_parallel_tool

## Custom HTTP Registry Client

In [None]:
from pydantic import SecretStr

# Optional: Set API key (if your LangGate service requires authentication)
api_key = SecretStr("your_api_key")

### Example 1: Using the default http client

In [4]:
from langgate.client.http import HTTPRegistryClient

# The concrete `HTTPRegistryClient` class is a singleton
client = HTTPRegistryClient(base_url=LANGGATE_URL, api_key=api_key)
models = await client.list_models()  # returns the default LLMInfo schema list
pp(models[0])

[2m2025-04-02 19:14:21[0m [[32m[1mdebug    [0m] [1mrefreshed_model_cache         [0m [36mmodel_count[0m=[35m34[0m
LLMInfo(id='openai/gpt-4o', name='GPT-4o', provider=ModelProvider(id='openai', name='OpenAI', description=None), description='The GPT-4o (omni) model from OpenAI builds upon the GPT-4 series with improved performance and multimodal capabilities. GPT-4o is great for most tasks.', costs=ModelCost(input_cost_per_token=Decimal('0.0000025'), output_cost_per_token=Decimal('0.00001'), input_cost_per_token_batches=Decimal('0.00000125'), output_cost_per_token_batches=Decimal('0.000005'), cache_read_input_token_cost=Decimal('0.00000125'), input_cost_per_image='0.003613'), capabilities=ModelCapabilities(supports_tools=True, supports_parallel_tool_calls=True, supports_vision=True, supports_audio_input=None, supports_audio_output=None, supports_prompt_caching=True, supports_response_schema=True, supports_system_messages=True, supports_tool_choice=True), context_window=Context

### Example 2: Subclassing BaseHTTPRegistryClient with custom schema

In [5]:
from langgate.client.http import BaseHTTPRegistryClient
from langgate.core.models import LLMInfo


class CustomLLMInfo(LLMInfo):
    extra_field: str = ""
    custom_metadata: dict = {}


class CustomHTTPClient(BaseHTTPRegistryClient[CustomLLMInfo]):
    """Custom HTTP client with CustomLLMInfo schema."""

    # This is not a singleton unless you implement it as such.


custom_client = CustomHTTPClient(LANGGATE_URL, api_key=api_key)
custom_models = (
    await custom_client.list_models()
)  # Typed and validated as list[CustomLLMInfo]
pp(custom_models[0])

[2m2025-04-02 19:14:23[0m [[32m[1mdebug    [0m] [1mrefreshed_model_cache         [0m [36mmodel_count[0m=[35m34[0m
CustomLLMInfo(id='openai/gpt-4o', name='GPT-4o', provider=ModelProvider(id='openai', name='OpenAI', description=None), description='The GPT-4o (omni) model from OpenAI builds upon the GPT-4 series with improved performance and multimodal capabilities. GPT-4o is great for most tasks.', costs=ModelCost(input_cost_per_token=Decimal('0.0000025'), output_cost_per_token=Decimal('0.00001'), input_cost_per_token_batches=Decimal('0.00000125'), output_cost_per_token_batches=Decimal('0.000005'), cache_read_input_token_cost=Decimal('0.00000125'), input_cost_per_image='0.003613'), capabilities=ModelCapabilities(supports_tools=True, supports_parallel_tool_calls=True, supports_vision=True, supports_audio_input=None, supports_audio_output=None, supports_prompt_caching=True, supports_response_schema=True, supports_system_messages=True, supports_tool_choice=True), context_window=C

### Example 3: Using the base class directly with type parameter

In [None]:
# This is not a singleton.
client_with_custom_schema = BaseHTTPRegistryClient[CustomLLMInfo](
    base_url=LANGGATE_URL, api_key=api_key, model_info_cls=CustomLLMInfo
)
direct_models = (
    await client_with_custom_schema.list_models()
)  # Typed and validated as list[CustomLLMInfo]

pp(direct_models[0])

[2m2025-04-02 19:14:24[0m [[32m[1mdebug    [0m] [1mrefreshed_model_cache         [0m [36mmodel_count[0m=[35m34[0m
CustomLLMInfo(id='openai/gpt-4o', name='GPT-4o', provider=ModelProvider(id='openai', name='OpenAI', description=None), description='The GPT-4o (omni) model from OpenAI builds upon the GPT-4 series with improved performance and multimodal capabilities. GPT-4o is great for most tasks.', costs=ModelCost(input_cost_per_token=Decimal('0.0000025'), output_cost_per_token=Decimal('0.00001'), input_cost_per_token_batches=Decimal('0.00000125'), output_cost_per_token_batches=Decimal('0.000005'), cache_read_input_token_cost=Decimal('0.00000125'), input_cost_per_image='0.003613'), capabilities=ModelCapabilities(supports_tools=True, supports_parallel_tool_calls=True, supports_vision=True, supports_audio_input=None, supports_audio_output=None, supports_prompt_caching=True, supports_response_schema=True, supports_system_messages=True, supports_tool_choice=True), context_window=C