# Chat Examples

In [4]:
from openai import OpenAI

# Configure client to use local server
client = OpenAI(
    base_url="http://localhost:10240/v1",  # Point to local server
    api_key="not-needed"  # API key is not required for local server
)

## v1/chat/completions

You can directly test using the curl method, as follows:

```shell
curl http://localhost:10240/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "mlx-community/gemma-3-1b-it-4bit-DWQ",
    "messages": [
      {
        "role": "system",
        "content": "You are a helpful assistant."
      },
      {
        "role": "user",
        "content": "Hello!"
      }
    ]
  }'

```

You can also use OpenAI's Python SDK in the project for access, which can basically be done without feeling. As follows:

In [2]:
completion = client.chat.completions.create(
  model="mlx-community/gemma-3-1b-it-4bit-DWQ",
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
  ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content='How can I assist you today?', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)


In [72]:
completion

ChatCompletion(id='chatcmpl-ac6138de1f', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! It's nice to meet you. Is there something I can help you with or would you like to chat for a bit?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1732542634, model='mlx-community/Llama-3.2-3B-Instruct-4bit', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=28, prompt_tokens=3, total_tokens=31, completion_tokens_details=None, prompt_tokens_details=None))

In [78]:
response = client.chat.completions.create(
    model="mlx-community/gemma-3-1b-it-4bit-DWQ",
    messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
    ],
    temperature=0,
    stream=True  # this time, we set stream=True
)

for chunk in response:
    print(chunk)
    print(chunk.choices[0].delta.content)
    print("****************")

ChatCompletionChunk(id='chatcmpl-0870a60d5b', choices=[Choice(delta=ChoiceDelta(content='', function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1732542953, model='mlx-community/Llama-3.2-3B-Instruct-4bit', object='chat.completion.chunk', service_tier=None, system_fingerprint=None, usage=None)

****************
ChatCompletionChunk(id='chatcmpl-0870a60d5b', choices=[Choice(delta=ChoiceDelta(content='', function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1732542953, model='mlx-community/Llama-3.2-3B-Instruct-4bit', object='chat.completion.chunk', service_tier=None, system_fingerprint=None, usage=None)

****************
ChatCompletionChunk(id='chatcmpl-0870a60d5b', choices=[Choice(delta=ChoiceDelta(content='Hello!', function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1732542953

In [None]:
## Structured Output

https://platform.openai.com/docs/guides/structured-outputs


In [11]:
import json

# Define the conversation with the AI
messages = [
    {"role": "system", "content": "You are a helpful AI assistant."},
    {"role": "user", "content": "Create 1-3 fictional characters"}
]

# Define the expected response structure
character_schema = {
    "type": "json_schema",
    "json_schema": {
        "name": "characters",
        "schema": {
            "type": "object",
            "properties": {
                "characters": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": {"type": "string"},
                            "occupation": {"type": "string"},
                            "personality": {"type": "string"},
                            "background": {"type": "string"}
                        },
                        "required": ["name", "occupation", "personality", "background"]
                    },
                    "minItems": 1,
                }
            },
            "required": ["characters"]
        },
    }
}

# Get response from AI
response = client.chat.completions.create(
    model="mlx-community/Qwen3-1.7B-4bit-DWQ",
    messages=messages,
    response_format=character_schema,
)

# Parse and display the results
results = json.loads(response.choices[0].message.content)
print(json.dumps(results, indent=2))

{
  "characters": [
    {
      "name": "Sidra Shafer",
      "occupation": "Private Investigator",
      "personality": "Incited and independent",
      "background": "Former U.S. Army combat medic, Sidra lives in Savannah, Georgia. She has sharp instincts and a tenacious spirit that's driven her to risk everything to unravel the darkest mysteries of the Deep South."
    },
    {
      "name": "Axel Laurie",
      "occupation": "Science Writer",
      "personality": " 'A globetrotting thrill-seeker, Axel seeks journalistic scoops that let him scurry into powers that be web', a hardened city boy's mindset statistically wired with logical common senses mixed well with reverence him with spicy vindictive sensibilities.",
      "background": "Former dependent surveillance team consultant near Kings Bay military base near Georgia suburban area"
    }
  ]
}


## Tools (Function Calling)

https://platform.openai.com/docs/guides/function-calling

In [17]:
import json
from datetime import datetime


model = "mlx-community/Qwen3-1.7B-4bit-DWQ"

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_delivery_date",
            "description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
            "parameters": {
                "type": "object",
                "properties": {
                    "order_id": {
                        "type": "string",
                        "description": "The customer's order ID.",
                    },
                },
                "required": ["order_id"],
                "additionalProperties": False,
            },
        }
    }
]

messages = [
    {
        "role": "system",
        "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."
    },
    {
        "role": "user",
        "content": "Hi, can you tell me the delivery date for my order?"
    },
    {
        "role": "assistant", 
        "content": "Hi there! I can help with that. Can you please provide your order ID?"
    },
    {
        "role": "user", 
        "content": "i think it is order_12345"
    }
]

completion = client.chat.completions.create(
    model=model,
    messages=messages,
    tools=tools,
)

response_message = completion.choices[0].message
print(response_message)
print(response_message.tool_calls)

messages.append(response_message)

order_id = "order_12345"
delivery_date = datetime.now()
tool_call_id = response_message.tool_calls[0].id

function_call_result_message = {
    "role": "tool",
    "content": json.dumps({
        "order_id": order_id,
        "delivery_date": delivery_date.strftime('%Y-%m-%d %H:%M:%S')
    }),
    "tool_call_id": tool_call_id
}
messages.append(function_call_result_message)

completion = client.chat.completions.create(
    model=model,
    messages=messages,
    tools=tools,
)
print(completion.choices[0].message)


ChatCompletionMessage(content=None, refusal=None, role='assistant', audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_b0b11d04', function=Function(arguments='{"order_id": "order_12345"}', name='get_delivery_date'), type='function')])
[ChatCompletionMessageToolCall(id='call_b0b11d04', function=Function(arguments='{"order_id": "order_12345"}', name='get_delivery_date'), type='function')]
ChatCompletionMessage(content="Your order's delivery date is December 11, 2024, at approximately 00:47:36. Thank you for your patience.", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)


## logprobs

[openai cookbook](https://cookbook.openai.com/examples/using_logprobs#1-using-logprobs-to-assess-confidence-for-classification-tasks)


In [73]:
completion = client.chat.completions.create(
  model="mlx-community/Qwen3-1.7B-4bit-DWQ",
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
  ],
  # stream=True,
  logprobs=True,
  top_logprobs=2, 
)

print(completion.choices[0].message)

ChatCompletionMessage(content='Hello! How can I assist you today?', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)


In [74]:
completion

ChatCompletion(id='chatcmpl-a4a5b3e89c', choices=[Choice(finish_reason='stop', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='Hello', bytes=[72, 101, 108, 108, 111], logprob=-0.140625, top_logprobs=[TopLogprob(token='Hello', bytes=[72, 101, 108, 108, 111], logprob=-0.140625), TopLogprob(token='How', bytes=[72, 111, 119], logprob=-2.078125)]), ChatCompletionTokenLogprob(token='!', bytes=[33], logprob=-0.03125, top_logprobs=[TopLogprob(token='!', bytes=[33], logprob=-0.03125), TopLogprob(token='.', bytes=[46], logprob=-3.375)]), ChatCompletionTokenLogprob(token=' How', bytes=[32, 72, 111, 119], logprob=-0.71875, top_logprobs=[TopLogprob(token=' It', bytes=[32, 73, 116], logprob=-0.6875), TopLogprob(token=' How', bytes=[32, 72, 111, 119], logprob=-0.71875)]), ChatCompletionTokenLogprob(token=' can', bytes=[32, 99, 97, 110], logprob=0.0, top_logprobs=[TopLogprob(token=' can', bytes=[32, 99, 97, 110], logprob=0.0), TopLogprob(token=' may', bytes=[32, 109, 97, 12

In [80]:
response = client.chat.completions.create(
    model="mlx-community/Qwen3-1.7B-4bit-DWQ",
    messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
    ],
    temperature=0,
    stream=True,
    logprobs=True,
    top_logprobs=2, 
)

for chunk in response:
    print(chunk)
    print(chunk.choices[0].delta.content)
    print("****************")

ChatCompletionChunk(id='chatcmpl-16b6ac7760', choices=[Choice(delta=ChoiceDelta(content='', function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=ChoiceLogprobs(content=None, refusal=None, token='Hello', logprob=-0.140625, bytes=[72, 101, 108, 108, 111], top_logprobs=[{'token': 'Hello', 'logprob': -0.140625, 'bytes': [72, 101, 108, 108, 111]}, {'token': 'How', 'logprob': -2.078125, 'bytes': [72, 111, 119]}]))], created=1732545699, model='mlx-community/Llama-3.2-3B-Instruct-4bit', object='chat.completion.chunk', service_tier=None, system_fingerprint=None, usage=None)

****************
ChatCompletionChunk(id='chatcmpl-16b6ac7760', choices=[Choice(delta=ChoiceDelta(content='', function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=ChoiceLogprobs(content=None, refusal=None, token='!', logprob=-0.03125, bytes=[33], top_logprobs=[{'token': '!', 'logprob': -0.03125, 'bytes': [33]}, {'tok

## Tools

```shell
curl http://localhost:10240/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
  "model": "mlx-community/Llama-3.2-3B-Instruct-4bit",
  "messages": [
    {
      "role": "user",
      "content": "What'\''s the weather like in Boston today?"
    }
  ],
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The city and state, e.g. San Francisco, CA"
            },
            "unit": {
              "type": "string",
              "enum": ["celsius", "fahrenheit"]
            }
          },
          "required": ["location"]
        }
      }
    }
  ],
  "tool_choice": "auto"
}'

```


In [2]:
tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location"],
      },
    }
  }
]

messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]

completion = client.chat.completions.create(
  model="mlx-community/Llama-3.2-3B-Instruct-4bit",
  messages=messages,
  tools=tools,
  tool_choice="auto"
)

print(completion)


ChatCompletion(id='chatcmpl-5cd4a26f40', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='<|python_tag|><|python_tag|><|python_tag|><|python_tag|><|python_tag|>{"name":<|python_tag|>{"name":<|python_tag|>{"name":<|python_tag|>{"name":<|python_tag|>{"name":<|python_tag|>{"name": "get_current_weather",<|python_tag|>{"name": "get_current_weather",<|python_tag|>{"name": "get_current_weather",<|python_tag|>{"name": "get_current_weather", "parameters":<|python_tag|>{"name": "get_current_weather", "parameters":<|python_tag|>{"name": "get_current_weather", "parameters":<|python_tag|>{"name": "get_current_weather", "parameters": {"location":<|python_tag|>{"name": "get_current_weather", "parameters": {"location":<|python_tag|>{"name": "get_current_weather", "parameters": {"location":<|python_tag|>{"name": "get_current_weather", "parameters": {"location": "Boston,<|python_tag|>{"name": "get_current_weather", "parameters": {"location": "Boston,<|

## Models

api: https://platform.openai.com/docs/api-reference/models?lang=python

In [3]:
client.models.list()

SyncPage[Model](data=[Model(id='Locutusque/TinyMistral-248M', created=1718007710, object='model', owned_by='Locutusque', config={'architectures': ['MistralForCausalLM'], 'bos_token_id': 1, 'eos_token_id': 2, 'hidden_act': 'silu', 'hidden_size': 1024, 'initializer_range': 0.02, 'intermediate_size': 4096, 'max_position_embeddings': 32768, 'model_type': 'mistral', 'num_attention_heads': 32, 'num_hidden_layers': 12, 'num_key_value_heads': 8, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'sliding_window': 32, 'tie_word_embeddings': False, 'torch_dtype': 'float16', 'transformers_version': '4.35.0', 'use_cache': True, 'vocab_size': 32005}), Model(id='zeppdev/phi2-url', created=1706864758, object='model', owned_by='zeppdev', config={'vocab_size': 51200, 'hidden_size': 2560, 'intermediate_size': 10240, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'resid_pdrop': 0.1, 'embd_pdrop': 0.0, 'attention_dropout': 0.0, 'hidden_act': 'gelu_new', 'max_position_embeddings'