From 365c6ab385e8300813366f0ffae76cbac9eb9bd0 Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Thu, 18 Apr 2024 14:05:38 -0400 Subject: [PATCH 1/3] Add conv template and model preset --- python/mlc_llm/conversation_template.py | 25 +++++++++++++ python/mlc_llm/model/model_preset.py | 50 +++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/python/mlc_llm/conversation_template.py b/python/mlc_llm/conversation_template.py index 1b2a06feab..fa4bee86d4 100644 --- a/python/mlc_llm/conversation_template.py +++ b/python/mlc_llm/conversation_template.py @@ -36,6 +36,31 @@ def get_conv_template(name: str) -> Optional[Conversation]: ############## Preset Conversation Templates ############## +# Llama3 +# See https://github.com/meta-llama/llama3?tab=readme-ov-file#instruction-tuned-models +# and https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py +ConvTemplateRegistry.register_conv_template( + Conversation( + name="llama-3", + system_template=( + "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n", + f"{MessagePlaceholders.SYSTEM.value}<|eot_id|>", + ), + system_message="You are a helpful, respectful and honest assistant.", + roles={ + "user": "<|start_header_id|>user", + "assistant": "<|start_header_id|>assistant", + "tool": "<|start_header_id|>user", + }, + seps=["<|eot_id|>"], + role_content_sep="<|end_header_id|>\n\n", + role_empty_sep="<|end_header_id|>\n\n", + stop_str=["<|end_of_text|>", "<|eot_id|>"], + stop_token_ids=[128001, 128009], # "<|end_of_text|>", "<|eot_id|>" + system_prefix_token_ids=[128000], # "<|begin_of_text|>" + ) +) + # Llama2 ConvTemplateRegistry.register_conv_template( Conversation( diff --git a/python/mlc_llm/model/model_preset.py b/python/mlc_llm/model/model_preset.py index 3bfe1cb891..41abf0292c 100644 --- a/python/mlc_llm/model/model_preset.py +++ b/python/mlc_llm/model/model_preset.py @@ -660,4 +660,54 @@ "eos_token_id": 2, "pad_token_id": 0, }, + "llama3_8b": { + "architectures": ["LlamaForCausalLM"], + "attention_bias": False, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": None, + "rope_theta": 500000.0, + "tie_word_embeddings": False, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": True, + "vocab_size": 128256, + }, + "llama3_70b": { + "architectures": ["LlamaForCausalLM"], + "attention_bias": False, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 8192, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": None, + "rope_theta": 500000.0, + "tie_word_embeddings": False, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": True, + "vocab_size": 128256, + }, } From dfa21a35434d3bd20818e1cd9165d4bb48e647dc Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:10:29 -0400 Subject: [PATCH 2/3] Fix conv template --- python/mlc_llm/conversation_template.py | 14 ++++++-------- python/mlc_llm/interface/gen_config.py | 1 + 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/python/mlc_llm/conversation_template.py b/python/mlc_llm/conversation_template.py index fa4bee86d4..88db8b1a3d 100644 --- a/python/mlc_llm/conversation_template.py +++ b/python/mlc_llm/conversation_template.py @@ -39,25 +39,23 @@ def get_conv_template(name: str) -> Optional[Conversation]: # Llama3 # See https://github.com/meta-llama/llama3?tab=readme-ov-file#instruction-tuned-models # and https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py +# Llama3 ConvTemplateRegistry.register_conv_template( Conversation( name="llama-3", system_template=( - "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n", - f"{MessagePlaceholders.SYSTEM.value}<|eot_id|>", + "<|start_header_id|>system<|end_header_id|>\n\n", + f"{MessagePlaceholders.SYSTEM.value}", ), system_message="You are a helpful, respectful and honest assistant.", - roles={ - "user": "<|start_header_id|>user", - "assistant": "<|start_header_id|>assistant", - "tool": "<|start_header_id|>user", - }, - seps=["<|eot_id|>"], + roles={"user": "user", "assistant": "assistant"}, + seps=["<|eot_id|><|start_header_id|>"], role_content_sep="<|end_header_id|>\n\n", role_empty_sep="<|end_header_id|>\n\n", stop_str=["<|end_of_text|>", "<|eot_id|>"], stop_token_ids=[128001, 128009], # "<|end_of_text|>", "<|eot_id|>" system_prefix_token_ids=[128000], # "<|begin_of_text|>" + add_role_after_system_message=True, ) ) diff --git a/python/mlc_llm/interface/gen_config.py b/python/mlc_llm/interface/gen_config.py index d22aa7d231..8e617fc3d2 100644 --- a/python/mlc_llm/interface/gen_config.py +++ b/python/mlc_llm/interface/gen_config.py @@ -274,6 +274,7 @@ def gen_config( # pylint: disable=too-many-locals,too-many-arguments,too-many-b # FIXME: Copy RWKV tokenizer file # pylint: disable=fixme CONV_TEMPLATES = { + "llama-3", "chatml", "open_hermes_mistral", "neural_hermes_mistral", From 80c72723af8d507528b8afb45ed82eee79589319 Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:11:46 -0400 Subject: [PATCH 3/3] Trivial --- python/mlc_llm/conversation_template.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/mlc_llm/conversation_template.py b/python/mlc_llm/conversation_template.py index 88db8b1a3d..fa926708d3 100644 --- a/python/mlc_llm/conversation_template.py +++ b/python/mlc_llm/conversation_template.py @@ -39,7 +39,6 @@ def get_conv_template(name: str) -> Optional[Conversation]: # Llama3 # See https://github.com/meta-llama/llama3?tab=readme-ov-file#instruction-tuned-models # and https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py -# Llama3 ConvTemplateRegistry.register_conv_template( Conversation( name="llama-3",