In [67]:
from mlx_lm import load, generate
from mlx_lm.generate import stream_generate

import json

In [68]:
model, tokenizer = load("./models/gemma3-1b-it/transformers")

In [73]:
prompt = tokenizer.apply_chat_template([{
    "role": "system", "content": [{
        "text": """
        You are Gemma, a helpful chat bot. 
        
        You have access to functions. If you decide to invoke any of the function(s),
        you MUST put it in the format of
        {"name": function name, "parameters": dictionary of argument name and its value}. 

        The result is available as ```tool_output JSON```.
        
        You MUST NOT include any other text in the response if you call a function.
        [
          {
            "name": "get_stock_price",
            "description": "Finds the stock price by a symbol",
            "parameters": {
              "type": "object",
              "properties": {
                "SYMBOL": {
                  "type": "string"
                }
              },
              "required": [
                "SYMBOL"
              ]
            }
          }
        ]"""
    }]},
    {
        "role": "user", 
         "content": """Can you find the stock price with the symbol APPL ?"""
    }
], add_generation_prompt=True)

In [76]:
class ResponseParser:
    escape_mode = False
    tool_mode = False

    text = ""
    tool = ""

    had_tool = False 
    
    def call_tool(self, tool_name, tool_parameters): 
        return {"price": 180}
        
    def add_response(self, response): 
        if not self.escape_mode and not self.tool_mode and response.token == 2717: 
            self.escape_mode = True
            return

        if self.escape_mode and not self.tool_mode and response.token == 107: 
            self.tool_mode = True
            self.had_tool = True
            return
            
        if self.escape_mode and self.tool_mode and response.token == 2717: 
            try: 
                parsed = json.loads(self.tool)

                tool_name = parsed["name"]
                tool_parameters = parsed["parameters"]

                self.text += self.tool + "```tool_output " + json.dumps(self.call_tool(tool_name, tool_parameters)) + "```"
            except Exception as error:
                print("An exception occurred:", error)
                return
            
        if self.tool_mode: 
            self.tool += response.text
            return

        if self.escape_mode: 
            return
            
        self.text += response.text



In [81]:
responseParser = ResponseParser()

for response in stream_generate(model, tokenizer, prompt):
    text += response.text
    responseParser.add_response(response)

    if response.token == 106: 
        break

if responseParser.had_tool: 
    next_prompt = prompt + tokenizer.encode(responseParser.text + \
                                            "<end_of_turn><start_of_turn>user Decide to call another function or summarize the results.<end_of_turn>" + \
                                            "<start_of_turn>model", add_special_tokens = False)
    final_response = ""
else:
    final_response = responseParser.text 

if next_prompt: 
    for response in stream_generate(model, tokenizer, next_prompt):
        final_response += response.text
    
        if response.token == 106: 
            break

print(final_response)




I found the stock price for APPL to be 180.<end_of_turn>
