In [1]:
import requests

# List available models from the local LLM server
try:
    response = requests.get("http://localhost:1234/v1/models")
    if response.status_code == 200:
        models = response.json()
        print("Available models:")
        for model in models.get('data', []):
            print(f"- {model.get('id', 'Unknown')}")
    else:
        print(f"Failed to get models: {response.status_code} - {response.text}")
except requests.exceptions.RequestException as e:
    print(f"Error connecting to local LLM server: {e}")


Available models:
- text-embedding-nomic-embed-text-v1.5
- qwen/qwen3-4b-2507
- meta-llama-3.1-8b-instruct
- text-embedding-bge-small-en-v1.5
- text-embedding-all-minilm-l6-v2-embedding
- deepseek/deepseek-r1-0528-qwen3-8b
- qwen/qwen3-4b-thinking-2507
- llama-3.2-3b-instruct


In [2]:
model_name = "qwen/qwen3-4b-2507", # deepseek/deepseek-r1-0528-qwen3-8b

In [76]:
import json

# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    weather_info = {
        "location": location,
        "temperature": "72",
        "unit": unit,
        "forecast": ["sunny", "windy"],
    }
    return json.dumps(weather_info)

In [77]:
# define a function
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        }
    }
]

In [78]:
messages = [
    {
        "role": "user",
        "content": "What's the weather like in San Francisco?"
    }
]

In [79]:
import openai


client = openai.OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="dummy-key"  # Local LLM servers often don't require a real API key
)

# Test the connection and make a function call
response = client.chat.completions.create(
    model=model_name,  # Use whatever model name your local server expects
    messages=messages,
    tools=tools,
    tool_choice="auto"
)

response_message = response.choices[0].message
print(response.choices)

[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content='<think>\nOkay, user is asking about the current weather in San Francisco. This seems like a straightforward query that requires external data.\n\nThe user hasn\'t specified temperature unit preference, so I\'ll default to Celsius since that\'s the international standard. However, if they use a tool without specifying unit later, it might default to something else - better stick with simple request.\n\nLooking at the available tool: "get_current_weather" matches perfectly. Location must be specified exactly - including state if needed, which in this case is California so "San Francisco, CA".\n\nThe system requires strict formatting for tool calls. No room for additional text after the final tool request, and must use exactly  tags. Must ensure the JSON structure matches what the system expects.\n\nConsidering this is likely a simple query without emotional context, just need to provide fac

In [80]:
print(response_message)

ChatCompletionMessage(content='<think>\nOkay, user is asking about the current weather in San Francisco. This seems like a straightforward query that requires external data.\n\nThe user hasn\'t specified temperature unit preference, so I\'ll default to Celsius since that\'s the international standard. However, if they use a tool without specifying unit later, it might default to something else - better stick with simple request.\n\nLooking at the available tool: "get_current_weather" matches perfectly. Location must be specified exactly - including state if needed, which in this case is California so "San Francisco, CA".\n\nThe system requires strict formatting for tool calls. No room for additional text after the final tool request, and must use exactly  tags. Must ensure the JSON structure matches what the system expects.\n\nConsidering this is likely a simple query without emotional context, just need to provide factual weather information. No signs the user needs this for anything 

In [61]:
response_message.content

'<think>\nOkay, let me handle this user query. The user is asking about the weather in San Francisco.\n\nFirst, I need to determine if this requires any tools. Available tools include a get_current_weather tool that takes location and optional unit parameters.\n\nSan Francisco seems like the perfect case for this tool. Location is clear, "San Francisco", CA. I can include the unit parameter as well.\n\nConsidering whether to use Celsius or Fahrenheit in response, since both are common measurements. The temperature details would depend on the actual data, but that\'s something I\'ll get from the tool.\n\nFor such queries about current conditions or forecasts, using this weather tool makes sense as it provides accurate information based on data sources.\n\nLooking at the format, I need to provide a tool call block with proper syntax. Since this requires only one response, I should keep it simple without extra text.\n\nThis seems like a straightforward request that doesn\'t require additi

In [62]:
response_message.tool_calls


[ChatCompletionMessageFunctionToolCall(id='708015470', function=Function(arguments='{"location":"San Francisco"}', name='get_current_weather'), type='function')]

In [63]:
import json
json.loads(response_message.tool_calls[0].function.arguments)


{'location': 'San Francisco'}

In [64]:
args = json.loads(response_message.tool_calls[0].function.arguments)
get_current_weather(args)


'{"location": {"location": "San Francisco"}, "temperature": "72", "unit": "fahrenheit", "forecast": ["sunny", "windy"]}'

In [65]:
messages= [
    {
        "role": "user",
        "content": "hi!"
    }
]
response = client.chat.completions.create(
    model=model_name,
    messages=messages,
    tools=tools,
    tool_choice="auto"
)

print(response.choices[0].message)

ChatCompletionMessage(content='<think>\nFirst, the user said "hi!" which is a greeting. I need to check if this requires any tools or just a normal response.\n\nLooking at the available tools, there\'s only "get_current_weather" listed. But this message doesn\'t mention anything about weather or other actions like sending emails or opening browsers.\n\nThe tool description is "Get the current weather in a given location", and it requires one required parameter: "location". The user\'s message is just "hi!", which doesn\'t provide any location or context that suggests a weather check.\n\nAdditionally, the rules state:\n- Only use listed tools if needed.\n- If calling tools, end with tool calls (no text after).\n- Answer normally if no tools are needed.\n\nSo, since this is a simple greeting and there\'s no indication that a tool should be used, I should respond normally without calling any tools.\n\nMy response should be helpful and engaging. Perhaps I could say hello back or ask how I 

In [67]:
response_content = response.choices[0].message.content
response_content.split('</think>', 1)[-1]

'\nHello! How can I assist you today? If you have any questions, feel free to ask.'

# force not to use tools

In [70]:
messages= [
    {
        "role": "user",
        "content": "whats the weather in Dallas?"
    }
]

response = client.chat.completions.create(
    model=model_name,
    messages=messages,
    tools=tools,
    tool_choice="none"
)
print(response)
response.choices[0].message.content.split('</think>', 1)[-1]

ChatCompletion(id='chatcmpl-61dc6z6k3tj5ky677ydy05', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='<think>\nOkay, user is asking about the weather in Dallas. Hmm, this seems like a straightforward question at first glance, but let me think deeper.\n\nThe user probably wants to know the current weather conditions to plan something. Maybe they\'re local and need real-time info, or a traveler checking forecasts before departure. Could also be someone setting up a meeting or event.\n\nBut wait, why is the user asking an assistant instead of checking a weather app or website? Maybe they\'re in a situation where internet access isn\'t available, like during travel or at home. Or perhaps they want human interpretation of weather patterns rather than raw data.\n\nI don\'t have live access to current weather, but I can provide accurate Dallas-specific information. The best approach is to explain that limitation while offering concrete ways 

'\nI can check the current weather conditions for Dallas, Texas. Please note that my search results are up-to-date based on information typically current as of June 2024.\n\nWant me to find it for you right now?'

# force to use the tools

In [73]:
messages= [
    {
        "role": "user",
        "content": "hi!"
    }
]
response = client.chat.completions.create(
    model=model_name,
    messages=messages,
    tools=tools,
    tool_choice={"type": "function", "function": {"name": "get_current_weather"}}
)

print(response)
#response.choices[0].message.content.split('</think>', 1)[-1]

BadRequestError: Error code: 400 - {'error': "Invalid tool_choice type: 'object'. Supported string values: none, auto, required"}

In [84]:
messages = [
    {
        "role": "user",
        "content": "What's the weather like in San Francisco?"
    }
]
response = client.chat.completions.create(
    model=model_name,
    messages=messages,
    tools=tools,
    tool_choice="auto"
)

print(response)
response_message = response.choices[0].message
messages.append(response_message)


tool_call = response_message.tool_calls[0]
function_name = tool_call.function.name
args = json.loads(tool_call.function.arguments)
observation = get_current_weather(args)

messages.append({
    "role": "tool",
    "name": function_name,
    "content": observation
})

response = client.chat.completions.create(
    model=model_name,
    messages=messages,
    tools=tools,
    tool_choice="auto"
)
print(response)
response.choices[0].message.content.split('</think>', 1)[-1]

ChatCompletion(id='chatcmpl-8dth418a6ld9767xkntdhr', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content="<think>\nOkay, let's break this down. The user is asking about the weather in San Francisco, which means they need a tool call to get that information.\n\nFirst thought: The current weather is definitely the best way to answer this. We have exactly one tool available for weather queries.\n\nSecond thought: The user didn't specify a unit, but the default behavior of tools should handle this. Looking at how other assistants handled similar queries, they often assumed metric units for international locations.\n\nThird thought: San Francisco is a great choice of location, so I'll keep this simple by just calling the tool with that one parameter.\n\nFourth thought: The response should be concise, state we're checking the weather, then show exactly the tool call format without any extra text after it.\n\nYes, this matches our availabl

"\nOkay,SFO_72F_Windy.\n\nHere is the interpretation in context:\n\n*   Sunny: Good visibility, minimal rain chance.\n*   Windy: Conditions might be breezy later. Don't forget wind protection.\n\nThat forecast suggests **good conditions for outdoor activities** today, with a pleasant temperature of 72°F (which is approximately ~22°C).\n\nHowever, keep in mind the wind factor depending on what outdoor plans you have."