In [1]:
import sys
import logging

import datasets
from datasets import load_dataset
import torch
import transformers
from trl import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
import os
import json
import wandb


In [2]:
#!pip install -U torch

In [3]:
# !pip install datasets transformers trl wandb

In [4]:
ds = load_dataset("NousResearch/hermes-function-calling-v1", "glaive_func_calling")

In [5]:
ds['train']['conversations'][10]

# Some changes to make based on this to match LLM FT paradigm

# from and value should be replaced with role and content (chat format)

# replace human with user, gpt with assistant
# check to see if tool call can be a special token or not

# is the way assisant follows tool response what we want? 

# are the list of tools available fixed for all system prompts? 
# can we possibly enhance and add more, and create synthetic data?

[{'from': 'system',
  'value': "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'get_stock_price', 'description': 'Get the current stock price of a company', 'parameters': {'type': 'object', 'properties': {'company': {'type': 'string', 'description': 'The name of the company'}}, 'required': ['company']}}}, {'type': 'function', 'function': {'name': 'send_email', 'description': 'Send an email to a recipient', 'parameters': {'type': 'object', 'properties': {'recipient': {'type': 'string', 'description': 'The email address of the recipient'}, 'subject': {'type': 'string', 'description': 'The subject of the email'}, 'message': {'type': 'string', 'description': 'The body of the email'}}, 'required': ['recipient', 'subject',

In [6]:

###################
# Tokenizer Loading
###################

checkpoint_path = "HuggingFaceTB/SmolLM-360M-instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
tokenizer.model_max_length = 2048
tokenizer.pad_token = "<|endoftext|>"  # note this is specific to smollm
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token )
tokenizer.padding_side = 'right'
# https://stackoverflow.com/questions/76446228/setting-padding-token-as-eos-token-when-using-datacollatorforlanguagemodeling-fr


In [7]:
tokenizer.eos_token_id, tokenizer.pad_token_id

(2, 0)

In [8]:
tokenizer

GPT2TokenizerFast(name_or_path='HuggingFaceTB/SmolLM-360M-instruct', vocab_size=49152, model_max_length=2048, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|im_start|>', 'eos_token': '<|im_end|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>']}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("<repo_name>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	4: AddedToken("<reponame>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	5: AddedToken("<file_s

In [9]:
##################
# Data Processing
##################
def clean_mesages(messages):
    messages_clean = []
    for m in messages:
        role = m['from']
        content = m['value']
        if role == 'human':
            role = 'user'
        elif role == 'gpt':
            role = 'assistant'
        messages_clean.append({'role': role, 'content': content})
    return messages_clean

def apply_chat_template(
    example,
    tokenizer,
):

    messages_init = example["conversations"]
    messages = clean_mesages(messages_init)
    example["text"] = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=False).strip('\n')
    return example



train_dataset = ds['train']
column_names = list(train_dataset.features)

processed_train_dataset = train_dataset.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
    num_proc=10,
    desc="Applying chat template to train_sft",
)

# shuffle the dataset
processed_train_dataset = processed_train_dataset.shuffle(seed=42)

In [10]:
print(processed_train_dataset['text'][0])

<|im_start|>system
You are a helpful assistant, with no access to external functions.<|im_end|>
<|im_start|>user
Transform these sentences into a passive voice
They are singing a song.<|im_end|>
<|im_start|>assistant
A song is being sung by them.<|im_end|>
<|im_start|>user
What is passive voice and why is it used?<|im_end|>
<|im_start|>assistant
In passive voice, the subject of a sentence receives the action instead of performing it. Passive voice is used to shift focus from the doer (subject) of an action to the receiver (object). It can also be used to make writing more formal or objective.<|im_end|>
<|im_start|>user
How can I identify passive voice in a sentence?<|im_end|>
<|im_start|>assistant
Passive voice can be identified by looking for a form of "to be" (is, was, are, were, be, being, been) followed by a past participle verb (usually ending in -ed or -en) and the absence of an explicit doer (or when the doer is introduced with "by"). For example, "The cake was baked by John" is

In [22]:
processed_train_dataset['text'][10]

'<|im_start|>system\nYou are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don\'t make assumptions about what values to plug into functions.Here are the available tools:<tools> [{\'type\': \'function\', \'function\': {\'name\': \'create_todo\', \'description\': \'Create a new todo item\', \'parameters\': {\'type\': \'object\', \'properties\': {\'title\': {\'type\': \'string\', \'description\': \'The title of the todo item\'}, \'description\': {\'type\': \'string\', \'description\': \'The description of the todo item\'}}, \'required\': [\'title\']}}}, {\'type\': \'function\', \'function\': {\'name\': \'create_invoice\', \'description\': \'Create a new invoice\', \'parameters\': {\'type\': \'object\', \'properties\': {\'client\': {\'type\': \'string\', \'description\': \'The name of the client\'}, \'items\': {\'type\': \'array\', \'items\': {\'type\': \'object\', \'p

# Perfect assistant example:

<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'create_todo', 'description': 'Create a new todo item', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the todo item'}, 'description': {'type': 'string', 'description': 'The description of the todo item'}}, 'required': ['title']}}}, {'type': 'function', 'function': {'name': 'create_invoice', 'description': 'Create a new invoice', 'parameters': {'type': 'object', 'properties': {'client': {'type': 'string', 'description': 'The name of the client'}, 'items': {'type': 'array', 'items': {'type': 'object', 'properties': {'description': {'type': 'string', 'description': 'The description of the item'}, 'quantity': {'type': 'integer', 'description': 'The quantity of the item'}, 'price': {'type': 'number', 'description': 'The price of the item'}}, 'required': ['description', 'quantity', 'price']}, 'description': 'The list of items in the invoice'}}, 'required': ['client', 'items']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{tool_call}
</tool_call><|im_end|>
<|im_start|>user
I need to create a new todo item.<|im_end|>
<|im_start|>assistant
Sure, I can help with that. Could you please provide me with the title and description of the todo item?<|im_end|>
<|im_start|>user
The title is "Grocery Shopping" and the description is "Buy fruits, vegetables, and bread".<|im_end|>
<|im_start|>assistant
<tool_call>
{'name': 'create_todo', 'arguments': {'title': 'Grocery Shopping', 'description': 'Buy fruits, vegetables, and bread'}}
</tool_call><|im_end|>
<|im_start|>tool
<tool_response>
{'status': 'success', 'message': "Todo item 'Grocery Shopping' has been created successfully."}
</tool_response><|im_end|>
<|im_start|>assistant
Your todo item "Grocery Shopping" has been created successfully. It includes "Buy fruits, vegetables, and bread".<|im_end|>


This todo list creation is exactly what you'd want an assistant to help with

In [11]:
# Now test how this works with the existing model

####################
# Base Model Loading
####################
checkpoint_path = "HuggingFaceTB/SmolLM-360M"
model_kwargs = dict(
    use_cache=False,
    trust_remote_code=True,
#    attn_implementation="flash_attention_2",  # only works on latest gpus, probably not worth it in most cases
     torch_dtype=torch.bfloat16,
   device_map='auto'
)
model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)


In [12]:
user_prompt = "I need to create a new todo item with the title 'Buy groceries'."
tool_calling_system_prompt = """<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'create_todo', 'description': 'Create a new todo item', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the todo item'}, 'description': {'type': 'string', 'description': 'The description of the todo item'}}, 'required': ['title']}}}, {'type': 'function', 'function': {'name': 'create_invoice', 'description': 'Create a new invoice', 'parameters': {'type': 'object', 'properties': {'client': {'type': 'string', 'description': 'The name of the client'}, 'items': {'type': 'array', 'items': {'type': 'object', 'properties': {'description': {'type': 'string', 'description': 'The description of the item'}, 'quantity': {'type': 'integer', 'description': 'The quantity of the item'}, 'price': {'type': 'number', 'description': 'The price of the item'}}, 'required': ['description', 'quantity', 'price']}, 'description': 'The list of items in the invoice'}}, 'required': ['client', 'items']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{tool_call}
</tool_call><|im_end|>"""

tool_calling_user_prompt = f"""
<|im_start|>user
{user_prompt}.<|im_end|>
<|im_start|>assistant
"""

tool_calling_prompt = tool_calling_system_prompt + tool_calling_user_prompt

print(tool_calling_prompt)

<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'create_todo', 'description': 'Create a new todo item', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the todo item'}, 'description': {'type': 'string', 'description': 'The description of the todo item'}}, 'required': ['title']}}}, {'type': 'function', 'function': {'name': 'create_invoice', 'description': 'Create a new invoice', 'parameters': {'type': 'object', 'properties': {'client': {'type': 'string', 'description': 'The name of the client'}, 'items': {'type': 'array', 'items': {'type': 'object', 'properties': {'description': {'type': 'string', 'description': 'The description of the item'},

In [13]:
model.eval();

input_ids = tokenizer.encode(tool_calling_prompt, return_tensors='pt')
input_ids = input_ids.to(model.device)
output = model.generate(input_ids, max_new_tokens=256,  do_sample=False, pad_token_id=tokenizer.eos_token_id)
output_text = tokenizer.decode(output[0], skip_special_tokens=False, pad_token_id = tokenizer.eos_token_id)
print(output_text)
formatted_output_text = "<|im_end|>".join(output_text.split("<|im_end|>")[:3]) + "<|im_end|>"
#print(formatted_output_text)

<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'create_todo', 'description': 'Create a new todo item', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the todo item'}, 'description': {'type': 'string', 'description': 'The description of the todo item'}}, 'required': ['title']}}}, {'type': 'function', 'function': {'name': 'create_invoice', 'description': 'Create a new invoice', 'parameters': {'type': 'object', 'properties': {'client': {'type': 'string', 'description': 'The name of the client'}, 'items': {'type': 'array', 'items': {'type': 'object', 'properties': {'description': {'type': 'string', 'description': 'The description of the item'},

In [14]:
logger = logging.getLogger(__name__)
wandb.init(project="smollm-ft-function-calling")
###################
# Hyper-parameters
###################
training_config = {
    "do_eval": False,
    "learning_rate": 5.0e-05,
    "per_device_train_batch_size": 4,
    "gradient_accumulation_steps": 2,
    "log_level": "info",
    "logging_steps": 100,
    "logging_strategy": "steps",
    "lr_scheduler_type": "cosine",
    "num_train_epochs": 5,
    "max_steps": -1,
    "output_dir": "./checkpoint_dir",
    "overwrite_output_dir": True,
    "remove_unused_columns": True,
    "save_steps": 500,
    "save_total_limit": 1,
    "seed": 0,
    "gradient_checkpointing": True,
    "gradient_checkpointing_kwargs":{"use_reentrant": False},
    "gradient_accumulation_steps": 1,
    "warmup_ratio": 0.05,
    "report_to":"wandb"
    }


train_conf = TrainingArguments(**training_config)

###############
# Setup logging
###############
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)
log_level = train_conf.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

logger.info(f"Training/evaluation parameters {train_conf}")


[34m[1mwandb[0m: Currently logged in as: [33mnoahpunintended[0m ([33mfdlx[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112931133377262, max=1.0…

2024-08-31 14:42:05 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_str

In [15]:
###########
# Training
###########

model.train();
trainer = SFTTrainer(
    model=model,
    args=train_conf,
    train_dataset=processed_train_dataset,
    max_seq_length=2048,
    dataset_text_field="text",
    tokenizer=tokenizer,
    #packing=True,
)
train_result = trainer.train()
metrics = train_result.metrics
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
[INFO|training_args.py:2100] 2024-08-31 14:42:25,451 >> PyTorch: setting up devices
Loading cached processed dataset at /root/.cache/huggingface/datasets/NousResearch___hermes-function-calling-v1/glaive_func_calling/0.0.0/8f025148382537ba84cd325e1834b706e1461692/cache-397674f2cf535607.arrow


2024-08-31 14:42:25 - INFO - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/NousResearch___hermes-function-calling-v1/glaive_func_calling/0.0.0/8f025148382537ba84cd325e1834b706e1461692/cache-397674f2cf535607.arrow


[INFO|trainer.py:2134] 2024-08-31 14:42:25,929 >> ***** Running training *****
[INFO|trainer.py:2135] 2024-08-31 14:42:25,930 >>   Num examples = 5,209
[INFO|trainer.py:2136] 2024-08-31 14:42:25,931 >>   Num Epochs = 5
[INFO|trainer.py:2137] 2024-08-31 14:42:25,933 >>   Instantaneous batch size per device = 4
[INFO|trainer.py:2140] 2024-08-31 14:42:25,933 >>   Total train batch size (w. parallel, distributed & accumulation) = 4
[INFO|trainer.py:2141] 2024-08-31 14:42:25,934 >>   Gradient Accumulation steps = 1
[INFO|trainer.py:2142] 2024-08-31 14:42:25,935 >>   Total optimization steps = 6,515
[INFO|trainer.py:2143] 2024-08-31 14:42:25,937 >>   Number of trainable parameters = 361,821,120
[INFO|integration_utils.py:807] 2024-08-31 14:42:25,939 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]


Step,Training Loss
100,1.6728
200,1.4465
300,0.6661
400,0.4446
500,0.4397
600,0.3868
700,0.3585
800,0.3975
900,0.3544
1000,0.3403


[INFO|trainer.py:3503] 2024-08-31 14:47:05,016 >> Saving model checkpoint to ./checkpoint_dir/checkpoint-500
[INFO|configuration_utils.py:472] 2024-08-31 14:47:05,028 >> Configuration saved in ./checkpoint_dir/checkpoint-500/config.json
[INFO|configuration_utils.py:807] 2024-08-31 14:47:05,037 >> Configuration saved in ./checkpoint_dir/checkpoint-500/generation_config.json
[INFO|modeling_utils.py:2799] 2024-08-31 14:47:06,874 >> Model weights saved in ./checkpoint_dir/checkpoint-500/model.safetensors
[INFO|tokenization_utils_base.py:2684] 2024-08-31 14:47:06,889 >> tokenizer config file saved in ./checkpoint_dir/checkpoint-500/tokenizer_config.json
[INFO|tokenization_utils_base.py:2693] 2024-08-31 14:47:06,898 >> Special tokens file saved in ./checkpoint_dir/checkpoint-500/special_tokens_map.json
[INFO|trainer.py:3595] 2024-08-31 14:47:10,188 >> Deleting older checkpoint [checkpoint_dir/checkpoint-500] due to args.save_total_limit
  with device_autocast_ctx, torch.cpu.amp.autocast(**cp

KeyboardInterrupt: 

In [23]:
# Load the model from the checkpoint

# find most recently created folder in checkpoint_dir and set as checkpoint path
checkpoint_path = sorted(os.listdir(train_conf.output_dir))[-1]
checkpoint_path = os.path.join(train_conf.output_dir, checkpoint_path)
model_kwargs = dict(
    use_cache=False,
    trust_remote_code=True,
     torch_dtype=torch.bfloat16,
   device_map='auto'
)
model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)


NameError: name 'train_conf' is not defined

In [27]:
user_prompt = "Who are you?"
tool_calling_system_prompt = """<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'create_todo', 'description': 'Create a new todo item', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the todo item'}, 'description': {'type': 'string', 'description': 'The description of the todo item'}}, 'required': ['title']}}}, {'type': 'function', 'function': {'name': 'create_invoice', 'description': 'Create a new invoice', 'parameters': {'type': 'object', 'properties': {'client': {'type': 'string', 'description': 'The name of the client'}, 'items': {'type': 'array', 'items': {'type': 'object', 'properties': {'description': {'type': 'string', 'description': 'The description of the item'}, 'quantity': {'type': 'integer', 'description': 'The quantity of the item'}, 'price': {'type': 'number', 'description': 'The price of the item'}}, 'required': ['description', 'quantity', 'price']}, 'description': 'The list of items in the invoice'}}, 'required': ['client', 'items']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{tool_call}
</tool_call><|im_end|>"""

tool_calling_user_prompt = f"""
<|im_start|>user
{user_prompt}.<|im_end|>
<|im_start|>assistant
"""

tool_calling_prompt = tool_calling_system_prompt + tool_calling_user_prompt

print(tool_calling_prompt)

<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'create_todo', 'description': 'Create a new todo item', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the todo item'}, 'description': {'type': 'string', 'description': 'The description of the todo item'}}, 'required': ['title']}}}, {'type': 'function', 'function': {'name': 'create_invoice', 'description': 'Create a new invoice', 'parameters': {'type': 'object', 'properties': {'client': {'type': 'string', 'description': 'The name of the client'}, 'items': {'type': 'array', 'items': {'type': 'object', 'properties': {'description': {'type': 'string', 'description': 'The description of the item'}, 

In [16]:
model.eval();

input_ids = tokenizer.encode(tool_calling_prompt, return_tensors='pt')
input_ids = input_ids.to(model.device)
output = model.generate(input_ids, max_new_tokens=256,  do_sample=False, pad_token_id=tokenizer.eos_token_id)
output_text = tokenizer.decode(output[0], skip_special_tokens=False, pad_token_id = tokenizer.eos_token_id)
formatted_output_text = "<|im_end|>".join(output_text.split("<|im_end|>")[:3]) + "<|im_end|>"
print(formatted_output_text)

<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'create_todo', 'description': 'Create a new todo item', 'parameters': {'type': 'object', 'properties': {'title': {'type': 'string', 'description': 'The title of the todo item'}, 'description': {'type': 'string', 'description': 'The description of the todo item'}}, 'required': ['title']}}}, {'type': 'function', 'function': {'name': 'create_invoice', 'description': 'Create a new invoice', 'parameters': {'type': 'object', 'properties': {'client': {'type': 'string', 'description': 'The name of the client'}, 'items': {'type': 'array', 'items': {'type': 'object', 'properties': {'description': {'type': 'string', 'description': 'The description of the item'},