# Trying Out Tools in Local Models

## Ollama to the rescue?

Lets try the native ollama library first as it seems Langchain's ChatOllama does not work

In [17]:
%pip install -U ollama openai

Collecting ollama
  Downloading ollama-0.4.7-py3-none-any.whl.metadata (4.7 kB)
Collecting openai
  Downloading openai-1.65.1-py3-none-any.whl.metadata (27 kB)
Downloading ollama-0.4.7-py3-none-any.whl (13 kB)
Downloading openai-1.65.1-py3-none-any.whl (472 kB)
Installing collected packages: openai, ollama
  Attempting uninstall: openai
    Found existing installation: openai 1.54.4
    Uninstalling openai-1.54.4:
      Successfully uninstalled openai-1.54.4
  Attempting uninstall: ollama
    Found existing installation: ollama 0.3.3
    Uninstalling ollama-0.3.3:
      Successfully uninstalled ollama-0.3.3
Successfully installed ollama-0.4.7 openai-1.65.1
Note: you may need to restart the kernel to use updated packages.


In [47]:
import ollama
from ollama import Client
from typing_extensions import Literal, List, TypedDict, Any, Iterator, Optional
from langchain_community.tools.tavily_search import TavilySearchResults
client = Client(
  host='ollama',
)


In [48]:
class ChatWrapper:
    client = Client(
      host='ollama',
    )
    def __init__(self, 
                 model: Literal['llama3.2:latest', 'qwen2.5-coder:32b', 'llama3.1:8b-instruct-fp16', 'deepseek-r1:32b'],
                 system_message: str,
                 temperature: float = 0.25,
                 top_p: float = 0.8
                ):
        self.model = model
        self.system_message = [{"role": "system", "content": system_message}]
        self.temperature = temperature
        self.top_p = top_p

    def get_stream(self, messages: List[TypedDict]):
        return ChatWrapper.client.chat(
            model=self.model,
            messages=self.system_message + messages,
            stream=True,
            options={"temperature": self.temperature,
                    "top_p": self.top_p
                    }
        )

    def get_response(self, messages: List[TypedDict]) -> str:
        return ChatWrapper.client.chat(
            model=self.model,
            messages=self.system_message + messages,
            stream=False,
            options={"temperature": self.temperature,
                    "top_p": self.top_p
                    }
        )
    def get_with_tools(self, tools: List, messages: List[TypedDict]) -> Iterator[dict]:
 
        return ChatWrapper.client.chat(
            model=self.model,
            messages=self.system_message + messages,
            stream=False,
            options={"temperature": self.temperature,
                    "top_p": self.top_p
                    },
            tools=tools
        )
    

In [49]:
def my_generator(max_value):
    current = 0
    while current < max_value:
        yield current
        current += 1

# Usage
my_gen = my_generator(5)
# for i in my_gen:
#     print(i)  # Output: 0, 1, 2, 3, 4
some_var = next(my_gen) # here we store the first in a var
for i in my_gen: # only prints the next 4
    print(i)

1
2
3
4


In [9]:
def web_search(query: str) -> Optional[list[dict[str, Any]]]:

    """Query search engine.

    This function queirs the web to fetch comprehensive, accurate and trusted results. It's useful
    for answering questions about current events. 
    """

    wrapped = TavilySearchResults(max_results=3)
    result = wrapped.invoke({"query": query})
    return result

In [50]:
def web_search(query: str) -> Optional[list[dict[str, Any]]]:

    """
    Query search engine.
    This function queirs the web to fetch comprehensive, accurate and trusted results. It's useful
    for answering questions about current events.
    
    Args:
        query (str): The query to search
    
    Returns:
        list: A list of the results
    """
    if query:
        wrapped = TavilySearchResults(max_results=3)
        result = wrapped.invoke({"query": query})
        return result
    else:
        raise ValueError("No query was supplied by the LLM")

In [51]:


system_message = """
You are a wonderful chat assistant that likes to converse with your human friends. You have a 'web_search'
tool at your disposal that can be used to get up to date results. You are to use this tool only when necessary.
If you are able to tap into your vast knowledge of topics and reply directly, please do so. Invoke the web search
only when the question involves current events
"""

In [59]:
chat = ChatWrapper(model="qwen2.5-coder:32b", 
                   system_message=system_message)

In [63]:
messages = [{"role": "user", "content": "can you tell me what are the oscar nominations 2025"}]

In [64]:
r = chat.get_with_tools(tools=[web_search], messages=messages)

In [65]:
r

ChatResponse(model='qwen2.5-coder:32b', created_at='2025-03-01T11:49:17.187480379Z', done=True, done_reason='stop', total_duration=666421748, load_duration=14451442, prompt_eval_count=260, prompt_eval_duration=46000000, eval_count=24, eval_duration=595000000, message=Message(role='assistant', content='', images=None, tool_calls=[ToolCall(function=Function(name='web_search', arguments={'query': 'oscar nominations 2025'}))]))

In [46]:
import openai

openai.api_key="ollama"
openai.base_url="http://ollama:11434/v1/"

In [45]:
response = openai.chat.completions.create(
	model="llama3.1:8b-instruct-fp16",
	messages=[{"role": "system", "content": system_message}] + messages,
    tools=[web_search]
)

TypeError: Object of type function is not JSON serializable

In [39]:
response

ChatCompletion(id='chatcmpl-255', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="I'm doing great, thanks for asking! I'm always here and ready to chat with you about anything that's on your mind. It's lovely to have someone to talk to. How about you? What's new and exciting in your world today? Would you like some coffee or a joke to brighten up the day?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1740828574, model='llama3.1:8b-instruct-fp16', object='chat.completion', service_tier=None, system_fingerprint='fp_ollama', usage=CompletionUsage(completion_tokens=67, prompt_tokens=102, total_tokens=169, completion_tokens_details=None, prompt_tokens_details=None))

In [33]:
import requests

url = "http://ollama:11434/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
    "model": "llama3.1:8b-instruct-fp16",  # Exact name from your API response
    "messages": [{"role": "user", "content": "Hello!"}]
}

response = requests.post(url, json=data, headers=headers)

print(response.status_code)
print(response.text)


200
{"id":"chatcmpl-642","object":"chat.completion","created":1740828368,"model":"llama3.1:8b-instruct-fp16","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"It's nice to meet you. Is there something I can help you with, or would you like to chat?"},"finish_reason":"stop"}],"usage":{"prompt_tokens":12,"completion_tokens":24,"total_tokens":36}}



In [25]:
import requests

url = "http://ollama:11434/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
    "model": "llama3.1:8b-instruct-fp16",
    "messages": [{"role": "system", "content": "Hello!"}]
}

response = requests.post(url, json=data, headers=headers)

print(response.status_code)
print(response.json())  # Print JSON response

200
{'id': 'chatcmpl-945', 'object': 'chat.completion', 'created': 1740827948, 'model': 'llama3.1:8b-instruct-fp16', 'system_fingerprint': 'fp_ollama', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': ''}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 8, 'completion_tokens': 1, 'total_tokens': 9}}


In [29]:
import json
response = requests.get("http://ollama:11434/api/tags")
print(json.dumps(response.json(), indent=2))

{
  "models": [
    {
      "name": "deepseek-r1:32b",
      "model": "deepseek-r1:32b",
      "modified_at": "2025-02-03T07:45:31.124246491Z",
      "size": 19851337640,
      "digest": "38056bbcbb2d068501ecb2d5ea9cea9dd4847465f1ab88c4d4a412a9f7792717",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "qwen2",
        "families": [
          "qwen2"
        ],
        "parameter_size": "32.8B",
        "quantization_level": "Q4_K_M"
      }
    },
    {
      "name": "qwen2.5-coder:32b",
      "model": "qwen2.5-coder:32b",
      "modified_at": "2025-01-26T14:18:39.353482286Z",
      "size": 19851349856,
      "digest": "4bd6cbf2d094264457a17aab6bd6acd1ed7a72fb8f8be3cfb193f63c78dd56df",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "qwen2",
        "families": [
          "qwen2"
        ],
        "parameter_size": "32.8B",
        "quantization_level": "Q4_K_M"
      }
    },
    {
      "name": "l