In [1]:
# simple hack to support import module from parent directory
import sys
sys.path.append('../')

from llama3.core.tokenizer import Tokenizer
from llama3.core.prompt import serialize_function_metadata

In [2]:
model_path = '/home/michael/models/Llama3/Llama-3-8B-Instruct/tokenizer.model'
tokenizer = Tokenizer(model_path)

In [3]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    },
    {
        'type': 'function',
        'function': {
            'name': 'rate_movie',
            'description': "Rate a movie based on user's review",
            'parameters': {
                'type': 'object',
                'properties': {'movie_title': {'type': 'string', 'description': 'The title of the movie'}, 'rating': {'type': 'number', 'description': 'The rating given by the user (1-10)'}, 'review': {'type': 'string', 'description': 'The review of the movie'}},
                'required': ['movie_title', 'rating', 'review'],
            },
        },
    },
    {
        'type': 'function',
        'function': {
            'name': 'search_books',
            'description': 'Search for books based on title, author, or genre',
            'parameters': {'type': 'object', 'properties': {'keyword': {'type': 'string', 'description': 'The keyword to search for in book title, author, or genre'}}, 'required': ['keyword']},
        },
    },
    {
        'type': 'function',
        'function': {
            'name': 'validate_email',
            'description': 'Validate if an email address is valid',
            'parameters': {'type': 'object', 'properties': {'email': {'type': 'string', 'format': 'email', 'description': 'The email address to validate'}}, 'required': ['email']},
        },
    },
]

In [4]:
for func in tools:
    tokens = tokenizer.encode(str(func), bos=False, eos=False)
    optimized_str = serialize_function_metadata(func)
    optimized_tokens = tokenizer.encode(optimized_str, bos=False, eos=False)
    
    print('-'*80)
    print(f'Number of tokens: {len(tokens)}\n')
    print(f'Serialized Number of tokens: {len(optimized_tokens)}\n')
    print(f'Serialized Function:\n{optimized_str}\n')


--------------------------------------------------------------------------------
Number of tokens: 96

Serialized Number of tokens: 51

Serialized Function:
// Get the current weather in a given location
type get_current_weather = (_: {
// The city and state, e.g. San Francisco, CA
location: string,
unit?: "celsius" | "fahrenheit",
}) => any;

--------------------------------------------------------------------------------
Number of tokens: 120

Serialized Number of tokens: 60

Serialized Function:
// Rate a movie based on user's review
type rate_movie = (_: {
// The title of the movie
movie_title: string,
// The rating given by the user (1-10)
rating: number,
// The review of the movie
review: string,
}) => any;

--------------------------------------------------------------------------------
Number of tokens: 79

Serialized Number of tokens: 43

Serialized Function:
// Search for books based on title, author, or genre
type search_books = (_: {
// The keyword to search for in book tit