### Install Requirements

In [9]:
%pip install -qq langchain==0.1.10
%pip install -qq pydantic==1.10.8
%pip install -qq transformers==4.38.2

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


!!! IMPORTANT !!!
Restart kerner after installing requirements.

### Setup LLM Model Capable to Support Function Calling

In [36]:
MODEL_NAME = "teknium/OpenHermes-2.5-Mistral-7B"

In [37]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoTokenizer.from_pretrained(
    MODEL_NAME, device="cuda:0", torch_dtype=torch.float16
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [38]:
from langchain.pydantic_v1 import BaseModel, Field


class BookRecommendation(BaseModel):
    """Provides a book recommendation based on any specific interest."""

    interest: str = Field(
        description="seeking a book suggestion tailored to a particular interest"
    )
    recommended_book: str = Field(
        description="book recommendations tailored to the manifested interests"
    )

In [39]:
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

convert_pydantic_to_openai_function(BookRecommendation)

{'name': 'BookRecommendation',
 'description': 'Provides a book recommendation based on any specific interest.',
 'parameters': {'type': 'object',
  'properties': {'interest': {'description': 'seeking a book suggestion tailored to a particular interest',
    'type': 'string'},
   'recommended_book': {'description': 'book recommendations tailored to the manifested interests',
    'type': 'string'}},
  'required': ['interest', 'recommended_book']}}

In [40]:
import json
import re
import xml.etree.ElementTree as ET


def extract_function_calls(completion):
    completion = completion.strip()
    pattern = r"((.*?))"
    match = re.search(pattern, completion, re.DOTALL)

    if not match:
        return None

    multiplefn = match.group(1)
    root = ET.fromstring(multiplefn)
    functions = root.findall("functioncall")
    return [json.loads(fn.text) for fn in functions]

In [41]:
import torch

def generate_hermes(prompt, model, tokenizer, generation_config_overrides={}):
    fn = """{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": value_2, ...}}"""
    prompt = f"""<|im_start|>system
You are a helpful assistant with access to the following functions:

{convert_pydantic_to_openai_function(BookRecommendation)}

To use these functions respond with:

     {fn} 
    ...


Edge cases you must handle:
- If there are no functions that match the user request, you will respond politely that you cannot help.<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant"""
    generation_config = model.generation_config
    generation_config.update(
        **{
            **{
                "use_cache": True,
                "do_sample": True,
                "temperature": 0.2,
                "top_p": 1.0,
                "top_k": 0,
                "max_new_tokens": 512,
                "eos_token_id": tokenizer.eos_token_id,
                "pad_token_id": tokenizer.eos_token_id,
            },
            **generation_config_overrides,
        }
    )

    model = model.eval()
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    n_tokens = inputs.input_ids.numel()

    with torch.inference_mode():
        generated_tokens = model.generate(**inputs, generation_config=generation_config)

    return tokenizer.decode(
        generated_tokens.squeeze()[n_tokens:], skip_special_tokens=False
    )

In [42]:
from functools import partial

generation_func = partial(generate_hermes, model=model, tokenizer=tokenizer)

prompts = [
    "Recommend me a book on Crime Thriller."
]

for prompt in prompts:
    completion = generation_func(prompt)
    functions = extract_function_calls(completion)

    if functions:
        print(functions)
    else:
        print(completion.strip())
    print("="*100)

AttributeError: 'LlamaTokenizerFast' object has no attribute 'generation_config'