# MLX Server Structured Output Examples

This is a detailed text version of the structured output examples for MLX Server with OpenAI-compatible API.

## Setup

In [8]:
from openai import OpenAI

## Initialize the client

Connect to your local MLX server:

In [18]:
client = OpenAI(
    base_url = "http://localhost:8000/v1",
    api_key = "mlx-server-api-key"
)

## Function calling example

In [19]:
# Define the user message
messages = [
    {
        "role": "user",
        "content": "What is the weather in Tokyo?"
    }
]

# Define the available tools/functions
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the weather in a given city",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {"type": "string", "description": "The city to get the weather for"}
                }
            }
        }
    }
]

### Non Streaming Function Calling Example

In [20]:
# Make the API call
completion = client.chat.completions.create(
    model="mlx-server-model",
    messages=messages,
    tools=tools,
    tool_choice="auto",
    max_tokens = 512
)

# Get the result
print(completion)

ChatCompletion(id='chatcmpl_1754135306120611', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content='', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_1754135306725351', function=Function(arguments='{"city": "Tokyo"}', name='get_weather'), type='function', index=0)], reasoning_content=None))], created=1754135306, model='mlx-server-model', object='chat.completion', service_tier=None, system_fingerprint=None, usage=None)


### Streaming Function Calling Example

In [21]:
# Set stream=True in the API call
completion = client.chat.completions.create(
    model="mlx-server-model",
    messages=messages,
    tools=tools,
    tool_choice="auto",
    stream=True
)

# Process the streaming response
for chunk in completion:
    print(chunk)

ChatCompletionChunk(id='chatcmpl_1754135306422307', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role='assistant', tool_calls=None, reasoning_content=None), finish_reason=None, index=0, logprobs=None)], created=1754135306, model='mlx-server-model', object='chat.completion.chunk', service_tier=None, system_fingerprint=None, usage=None)
ChatCompletionChunk(id='chatcmpl_1754135306422307', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role='assistant', tool_calls=[ChoiceDeltaToolCall(index=0, id='call_1754135307829795', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')], reasoning_content=None), finish_reason=None, index=0, logprobs=None)], created=1754135306, model='mlx-server-model', object='chat.completion.chunk', service_tier=None, system_fingerprint=None, usage=None)
ChatCompletionChunk(id='chatcmpl_1754135306422307', choices=[Choice(delta=ChoiceDelta(content=None, function_

# JSON Schema Example

In [22]:
messages = [
    {
        "role": "system",
        "content": "Extract the address from the user input into the specified JSON format."
    },
    {
        "role": "user",
        "content": "Please format this address: 1 Hacker Wy Menlo Park CA 94025"
    }
]

response_format = {
    "type": "json_schema",
    "json_schema": {
        "name": "Address",
        "schema": {
            "properties": {
                "address": {
                "type": "object",
                "properties": {
                    "street": {"type": "string"},
                    "city": {"type": "string"},
                    "state": {
                    "type": "string", 
                    "description": "2 letter abbreviation of the state"
                    },
                    "zip": {
                    "type": "string", 
                    "description": "5 digit zip code"
                    }
                },
                "required": ["street", "city", "state", "zip"]
                }
            },
            "required": ["address"],
            "type": "object"
        }
    }
}


### Non-streaming Structured Output Example

In [23]:
# Make the API call
completion = client.chat.completions.create(
    model="mlx-server-model",
    messages=messages,
    max_tokens = 512,
    response_format = response_format
)

# Get the result
print(completion)

ChatCompletion(id='chatcmpl_1754135313793796', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{"address": {"street": "1 Hacker Wy", "city": "Menlo Park", "state": "CA", "zip": "94025"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning_content=None))], created=1754135313, model='mlx-server-model', object='chat.completion', service_tier=None, system_fingerprint=None, usage=None)


### Streaming Structured Output Example

In [25]:
# Make the API call
completion = client.chat.completions.create(
    model="mlx-server-model",
    messages=messages,
    max_tokens = 512,
    response_format = response_format,
    stream = True
)

# Process the streaming response
for chunk in completion:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

{"address": {"street": "1 Hacker Wy", "city": "Menlo Park", "state": "CA", "zip": "94025"}}