## Functions and Chat Completion
- [Create our own Code Interpreter | A hacker's guide to Language Models ](https://github.com/fastai/lm-hackers/blob/main/lm-hackers.ipynb)
- [GPT - OpenAI API ](https://platform.openai.com/docs/guides/gpt/function-calling)
- [Function-calling with an OpenAPI specification | OpenAI Cookbook](https://cookbook.openai.com/examples/function_calling_with_an_openapi_spec)

Jeremy Howard provided an overview of using ChatGPT to create you own "Code Interpreter" that can be used by LLMs to call python code.

See:
https://github.com/fastai/lm-hackers/blob/main/lm-hackers.ipynb

In [None]:
from pydantic import create_model
import inspect, json
from inspect import Parameter

In [None]:
from fastcore.utils import nested_idx
def response(compl): print(nested_idx(compl, 'choices', 0, 'message', 'content'))

In [None]:
def sums(a:int, b:int=1):
    "Adds a + b"
    return a + b

In [None]:
def schema(f):
    kw = {n:(o.annotation, ... if o.default==Parameter.empty else o.default)
          for n,o in inspect.signature(f).parameters.items()}
    s = create_model(f'Input for `{f.__name__}`', **kw).schema()
    return dict(name=f.__name__, description=f.__doc__, parameters=s)

In [None]:
schema(sums)

/var/folders/c5/gc7vgjds6tq1jy3b143hjtnm0000gn/T/ipykernel_79447/3240321801.py:4: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  s = create_model(f'Input for `{f.__name__}`', **kw).schema()


{'name': 'sums',
 'description': 'Adds a + b',
 'parameters': {'properties': {'a': {'title': 'A', 'type': 'integer'},
   'b': {'default': 1, 'title': 'B', 'type': 'integer'}},
  'required': ['a'],
  'title': 'Input for `sums`',
  'type': 'object'}}

We don't want to disclose our OPEN_API_KEY to the world, so a good best practice is to put the key in a .env file that is ignored in the .gitignore so we don't accidentaly import it into GitHub. We use the python-dotenv package to import the OPENAI_API_KEY variable from the environment and assign it to the openai.api_key. For codespaces and docker containers, there are methods to handle secrets that get set to environment variables at startup so as to not accidentally leak information.

In [None]:
import openai
from openai import ChatCompletion,Completion
from dotenv import load_dotenv, find_dotenv
from fastcore.utils import *
from pathlib import Path
from os import getenv
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

if not load_dotenv(".env"):
    raise ValueError("EnvironmentError: Failed to load `.env`")

api_key = getenv("OPENAI_API_KEY") or ""

if not api_key:
    raise ValueError("EnvironmentError: Failed to load `OPENAI_API_KEY`")

openai.api_key = api_key



In [None]:
def askgpt(user, system=None, model="gpt-3.5-turbo", **kwargs):
    msgs = []
    if system: msgs.append({"role": "system", "content": system})
    msgs.append({"role": "user", "content": user})
    return ChatCompletion.create(model=model, messages=msgs, **kwargs)

In [None]:
c = askgpt("Use the `sum` function to solve this: What is 6+3?",
           system = "You must use the `sum` function instead of adding yourself.",
           functions=[schema(sums)])

/var/folders/c5/gc7vgjds6tq1jy3b143hjtnm0000gn/T/ipykernel_79447/3240321801.py:4: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  s = create_model(f'Input for `{f.__name__}`', **kw).schema()


Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)

In [None]:
m = c.choices[0].message
m

<OpenAIObject> JSON: {
  "role": "assistant",
  "content": null,
  "function_call": {
    "name": "sums",
    "arguments": "{\n  \"a\": 6,\n  \"b\": 3\n}"
  }
}

In [None]:
k = m.function_call.arguments
print(k)

{
  "a": 6,
  "b": 3
}


In [None]:
funcs_ok = {'sums', 'python'}

In [None]:
def call_func(c):
    fc = c.choices[0].message.function_call
    if fc.name not in funcs_ok: return print(f'Not allowed: {fc.name}')
    f = globals()[fc.name]
    return f(**json.loads(fc.arguments))

In [None]:
call_func(c)

9

In [None]:
import ast

In [None]:
def run(code):
    tree = ast.parse(code)
    last_node = tree.body[-1] if tree.body else None
    
    # If the last node is an expression, modify the AST to capture the result
    if isinstance(last_node, ast.Expr):
        tgts = [ast.Name(id='_result', ctx=ast.Store())]
        assign = ast.Assign(targets=tgts, value=last_node.value)
        tree.body[-1] = ast.fix_missing_locations(assign)

    ns = {}
    exec(compile(tree, filename='<ast>', mode='exec'), ns)
    return ns.get('_result', None)

In [None]:
run("""
a=1
b=2
a+b
""")

3

In [None]:
def python(code:str):
    "Return result of executing `code` using python. If execution not permitted, returns `#FAIL#`"
    go = input(f'Proceed with execution?\n```\n{code}\n```\n')
    if go.lower()!='y': return '#FAIL#'
    return run(code)

In [None]:
c = askgpt("What is 12 factorial?",
           system = "Use python for any required computations.",
           functions=[schema(python)])

/var/folders/c5/gc7vgjds6tq1jy3b143hjtnm0000gn/T/ipykernel_22254/3240321801.py:4: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  s = create_model(f'Input for `{f.__name__}`', **kw).schema()


In [None]:
call_func(c)

KeyboardInterrupt: Interrupted by user

In [None]:
c = ChatCompletion.create(
    model="gpt-3.5-turbo",
    functions=[schema(python)],
    messages=[{"role": "user", "content": "What is 12 factorial?"},
              {"role": "function", "name": "python", "content": "479001600"}])

/var/folders/c5/gc7vgjds6tq1jy3b143hjtnm0000gn/T/ipykernel_42954/3240321801.py:4: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  s = create_model(f'Input for `{f.__name__}`', **kw).schema()


In [None]:
response(c)

12 factorial, denoted as 12!, is equal to 479,001,600.


In [None]:
c = askgpt("What is the capital of France?",
           system = "Use python for any required computations.",
           functions=[schema(python)])

/var/folders/c5/gc7vgjds6tq1jy3b143hjtnm0000gn/T/ipykernel_42954/3240321801.py:4: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  s = create_model(f'Input for `{f.__name__}`', **kw).schema()


In [None]:
response(c)


The capital of France is Paris.


## LLama based agents and Llama.cpp functions
[Zeng, Aohan, Mingdao Liu, Rui Lu, Bowen Wang, Xiao Liu, Yuxiao Dong, and Jie Tang. 2023. “AgentTuning: Enabling Generalized Agent Abilities for LLMs.” arXiv. http://arxiv.org/abs/2310.12823.
](http://arxiv.org/abs/2310.12823)

- [The Bloke Quantized](https://huggingface.co/TheBloke/agentlm-7B-GGUF/blob/main/agentlm-7b.Q4_K_M.gguf)

- [lamma.cpp Grammers](https://github.com/ggerganov/llama.cpp/tree/master/grammars)
- [Introducing Code Llama, a state-of-the-art large language model for coding](https://ai.meta.com/blog/code-llama-large-language-model-coding/)
- [lamma.cpp Python](https://github.com/abetlen/llama-cpp-python)
- [Implementation of Functions using Lamma.cpp grammar](https://github.com/teleprint-me/py.gpt.prompt/blob/main/docs/notebooks/llama_cpp_grammar_api.ipynb)
- [Using grammars to constrain the llama.cpp output](https://www.imaurer.com/llama-cpp-grammars/)
- [](https://til.simonwillison.net/llms/llama-cpp-python-grammars)

In [None]:
import json
import os
import pathlib
import requests
from pprint import pprint
from llama_cpp import ChatCompletionMessage, Llama, LlamaGrammar

In [None]:
llm = Llama(model_path="./models/agentlm-70b.Q5_K_S.gguf",n_gpu_layers=1,n_gqa=8, echo=False)

llama_model_loader: loaded meta data with 19 key-value pairs and 723 tensors from ./models/agentlm-70b.Q5_K_S.gguf (version unknown)
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  8192, 32256,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  8192,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q5_K     [ 28672,  8192,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  8192, 28672,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  8192, 28672,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  8192,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  8192,  1024,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q5_K     [  8192,  8192,     1,     1 ]
lla

In [None]:

llama_grammar = LlamaGrammar.from_file("json.gbnf")
import httpx
grammar_text = httpx.get("https://raw.githubusercontent.com/ggerganov/llama.cpp/master/grammars/json_arr.gbnf").text
grammar = LlamaGrammar.from_string(grammar_text)

from_string grammar:



root ::= object 
object ::= [{] ws object_11 [}] ws 
value ::= object | array | string | number | value_6 ws 
array ::= [[] ws array_15 []] ws 
string ::= ["] string_18 ["] ws 
number ::= number_19 number_25 number_29 ws 
value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] 
ws ::= ws_31 
object_8 ::= string [:] ws value object_10 
object_9 ::= [,] ws string [:] ws value 
object_10 ::= object_9 object_10 | 
object_11 ::= object_8 | 
array_12 ::= value array_14 
array_13 ::= [,] ws value 
array_14 ::= array_13 array_14 | 
array_15 ::= array_12 | 
string_16 ::= [^"\] | [\] string_17 
string_17 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_18 ::= string_16 string_18 | 
number_19 ::= number_20 number_21 
number_20 ::= [-] | 
number_21 ::= [0-9] | [1-9] number_22 
number_22 ::= [0-9] number_22 | 
number_23 ::= [.] number_24 
number_24 ::= [0-9] number_24 | [0-9] 
number_25 ::= number_23 | 
number_26 ::= [eE] number_27 number_28 
number_27 ::= [-

from_string grammar:



In [None]:
# Simple test of local LLM
output = llm("Q: Name the planets in the solar system? A: ", max_tokens=100, stop=["Q:", "\n"], echo=True)
print(output)

{'id': 'cmpl-79ae482b-6d22-4e57-9bda-4fbb29a22773', 'object': 'text_completion', 'created': 1699286479, 'model': './models/agentlm-70b.Q5_K_S.gguf', 'choices': [{'text': 'Q: Name the planets in the solar system? A: 1. Mercury, 2. Venus, 3. Earth, 4. Mars, 5. Jupiter, 6. Saturn, 7. Uranus, 8. Neptune.', 'index': 0, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 15, 'completion_tokens': 48, 'total_tokens': 63}}



llama_print_timings:        load time =  8651.60 ms
llama_print_timings:      sample time =    33.44 ms /    48 runs   (    0.70 ms per token,  1435.36 tokens per second)
llama_print_timings: prompt eval time =  8651.40 ms /    15 tokens (  576.76 ms per token,     1.73 tokens per second)
llama_print_timings:        eval time =  6618.43 ms /    47 runs   (  140.82 ms per token,     7.10 tokens per second)
llama_print_timings:       total time = 15364.97 ms


In [None]:
response = llm(
    "JSON list of name strings of attractions in SF:",
    grammar=grammar, max_tokens=-1
)

Llama.generate: prefix-match hit

llama_print_timings:        load time =  8651.60 ms
llama_print_timings:      sample time =   897.80 ms /   105 runs   (    8.55 ms per token,   116.95 tokens per second)
llama_print_timings: prompt eval time =  4153.66 ms /    11 tokens (  377.61 ms per token,     2.65 tokens per second)
llama_print_timings:        eval time = 14636.59 ms /   104 runs   (  140.74 ms per token,     7.11 tokens per second)
llama_print_timings:       total time = 19875.41 ms


In [None]:
import json
print(json.dumps(json.loads(response['choices'][0]['text']), indent=4))

[
    "Golden Gate Park",
    "Alcatraz Island",
    "Fisherman's Wharf",
    "Chinatown",
    "Union Square",
    "Presidio",
    "Pier 39",
    "Haight-Ashbury",
    "Castro District",
    "Exploratorium",
    "Ferry Building Marketplace",
    "Coit Tower"
]


In [None]:
responseLD = llm(
    "JSON-LD list of name strings of attractions in SF as a key-value using schema.org:",
    grammar=llama_grammar, max_tokens=-1
)
print(json.dumps(json.loads(response['choices'][0]['text']), indent=4))

Llama.generate: prefix-match hit


[
    "Golden Gate Park",
    "Alcatraz Island",
    "Fisherman's Wharf",
    "Chinatown",
    "Union Square",
    "Presidio",
    "Pier 39",
    "Haight-Ashbury",
    "Castro District",
    "Exploratorium",
    "Ferry Building Marketplace",
    "Coit Tower"
]



llama_print_timings:        load time =  8651.60 ms
llama_print_timings:      sample time =  3828.15 ms /   489 runs   (    7.83 ms per token,   127.74 tokens per second)
llama_print_timings: prompt eval time =  3939.81 ms /    21 tokens (  187.61 ms per token,     5.33 tokens per second)
llama_print_timings:        eval time = 69460.78 ms /   488 runs   (  142.34 ms per token,     7.03 tokens per second)
llama_print_timings:       total time = 78111.29 ms


## React Pattern for LLMs

Simion Wilison has a very simple blog post of implimenting the [ReAct: Synergizing Reasoning and Acting in Language Models](https://react-lm.github.io/) in one of his TILs -- [A simple Python implementation of the ReAct pattern for LLMs](https://til.simonwillison.net/llms/python-react-pattern). There are some issues in using regular expressions to select the tool the LLM want's to use. If the response can be constrained using a grammar as with the OpenAI model, more expressive response options are available. Using ChatGPT to explain the regular expression and the 

This line of code is utilizing the `re` module in Python, which is used for working with regular expressions. The `re.compile` function compiles a regular expression pattern into a regular expression object, which can then be used to match against strings.

Breaking down the regular expression `'^Action: (\w+): (.*)$'`:

- `^` asserts the start of a line. This means that the pattern will only match if it appears at the beginning of a string.
  
- `Action:` is a literal string. The pattern is looking for the word "Action:" exactly as it appears here.
  
- `\w+` is a special sequence that matches any word character (equivalent to `[a-zA-Z0-9_]`) and the `+` means "one or more" of the preceding element. So, `\w+` will match one or more word characters.
  
- `:` again, is a literal string, matching the colon character.
  
- `(.*)` is a capturing group that will match any character (except for line terminators) `.` zero or more times `*`. The `.` matches any single character except newline characters, and the `*` means "zero or more" of the preceding element. So this part of the pattern will capture everything after the second colon until the end of the line.
  
- `$` asserts the end of a line. This ensures that the pattern will match only if it extends all the way to the end of the string.

When this regular expression is used to search a string, it looks for lines that start with "Action:", followed by a word character sequence that represents some action name, followed by a colon and then any sequence of characters after that until the end of the line.

For example, the string "Action: Save: Document 1" would match this pattern. The `\w+` would match "Save" and the `(.*)` would match " Document 1". These matched parts are in parentheses, which means they are captured groups that can be later extracted or referenced from the matched object.

Here's what each group would capture in the given example:
- Group 1 would capture "Save" (the action type)
- Group 2 would capture " Document 1" (the action details)

By compiling the regular expression with `re.compile`, the resulting regular expression object can be used for matching multiple times more efficiently, as the expression does not need to be recompiled each time.

This code is part of a Python script that uses the OpenAI API to create a chatbot that interacts with a user, potentially performs web-based actions like searching Wikipedia, performing calculations, or searching a blog, and then uses the results of those actions to respond to the user.

The `action_re` regular expression is designed to parse lines from the chatbot's output that signify an "Action" that the bot decides to take. When the bot outputs a line starting with "Action:" followed by a word and a colon, then any text after that, the regular expression is used to capture this and process it accordingly.

Here's the regular expression again:

```python
action_re = re.compile('^Action: (\w+): (.*)$')
```

When this regular expression is applied to a string:

1. `^Action:` matches the beginning of a line that starts with "Action:".
2. `(\w+)` captures one or more word characters immediately following "Action:", which represents the action type (like "wikipedia" or "calculate").
3. `: ` matches the literal colon and space that follows the action type.
4. `(.*)` captures everything following the colon and space, which represents the action input or query (like "France" for a Wikipedia search or "4 * 7 / 3" for a calculation).
5. `$` asserts the end of the line, ensuring no extra characters are present after the pattern.

The `query` function runs the chatbot's interaction. The chatbot can respond multiple times, and part of its response can include an "Action" line, which is meant to be executed by the script.

When such a line is detected:

1. The script extracts the action and the action input using the `.groups()` method on the match object.
2. It checks if the action is in the `known_actions` dictionary. If it isn't, an exception is raised.
3. If the action is known, the corresponding function (either `wikipedia`, `calculate`, or `simon_blog_search`) is called with the action input.
4. The result of this action is then treated as an "Observation" and used in the next prompt to the chatbot.

The functions `wikipedia`, `simon_blog_search`, and `calculate` are defined to perform the respective actions:

- `wikipedia` uses `httpx` to make a GET request to the Wikipedia API and returns a search snippet.
- `simon_blog_search` performs a search on a blog using a provided JSON API.
- `calculate` uses Python's `eval` function to execute a calculation provided as a string.

Security Note: The use of `eval` is generally discouraged as it can run arbitrary code. It's important to sanitize any input before using it in `eval` to prevent potential security vulnerabilities.

In [None]:
# This code is Apache 2 licensed:
# https://www.apache.org/licenses/LICENSE-2.0
import openai
import re
import httpx

openai.api_key = "sk-..."
 
class ChatBot:
    def __init__(self, system=""):
        self.system = system
        self.messages = []
        if self.system:
            self.messages.append({"role": "system", "content": system})
    
    def __call__(self, message):
        self.messages.append({"role": "user", "content": message})
        result = self.execute()
        self.messages.append({"role": "assistant", "content": result})
        return result
    
    def execute(self):
        completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=self.messages)
        # Uncomment this to print out token usage each time, e.g.
        # {"completion_tokens": 86, "prompt_tokens": 26, "total_tokens": 112}
        # print(completion.usage)
        return completion.choices[0].message.content

prompt = """
You run in a loop of Thought, Action, PAUSE, Observation.
At the end of the loop you output an Answer
Use Thought to describe your thoughts about the question you have been asked.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.

Your available actions are:

calculate:
e.g. calculate: 4 * 7 / 3
Runs a calculation and returns the number - uses Python so be sure to use floating point syntax if necessary

wikipedia:
e.g. wikipedia: Django
Returns a summary from searching Wikipedia

simon_blog_search:
e.g. simon_blog_search: Django
Search Simon's blog for that term

Always look things up on Wikipedia if you have the opportunity to do so.

Example session:

Question: What is the capital of France?
Thought: I should look up France on Wikipedia
Action: wikipedia: France
PAUSE

You will be called again with this:

Observation: France is a country. The capital is Paris.

You then output:

Answer: The capital of France is Paris
""".strip()


action_re = re.compile('^Action: (\w+): (.*)$')

def query(question, max_turns=5):
    i = 0
    bot = ChatBot(prompt)
    next_prompt = question
    while i < max_turns:
        i += 1
        result = bot(next_prompt)
        print(result)
        actions = [action_re.match(a) for a in result.split('\n') if action_re.match(a)]
        if actions:
            # There is an action to run
            action, action_input = actions[0].groups()
            if action not in known_actions:
                raise Exception("Unknown action: {}: {}".format(action, action_input))
            print(" -- running {} {}".format(action, action_input))
            observation = known_actions[action](action_input)
            print("Observation:", observation)
            next_prompt = "Observation: {}".format(observation)
        else:
            return


def wikipedia(q):
    return httpx.get("https://en.wikipedia.org/w/api.php", params={
        "action": "query",
        "list": "search",
        "srsearch": q,
        "format": "json"
    }).json()["query"]["search"][0]["snippet"]


def simon_blog_search(q):
    results = httpx.get("https://datasette.simonwillison.net/simonwillisonblog.json", params={
        "sql": """
        select
          blog_entry.title || ': ' || substr(html_strip_tags(blog_entry.body), 0, 1000) as text,
          blog_entry.created
        from
          blog_entry join blog_entry_fts on blog_entry.rowid = blog_entry_fts.rowid
        where
          blog_entry_fts match escape_fts(:q)
        order by
          blog_entry_fts.rank
        limit
          1""".strip(),
        "_shape": "array",
        "q": q,
    }).json()
    return results[0]["text"]

def calculate(what):
    return eval(what)

known_actions = {
    "wikipedia": wikipedia,
    "calculate": calculate,
    "simon_blog_search": simon_blog_search
}