diff --git a/python/mlc_llm/conversation_template.py b/python/mlc_llm/conversation_template.py index 1b2a06feab..fa926708d3 100644 --- a/python/mlc_llm/conversation_template.py +++ b/python/mlc_llm/conversation_template.py @@ -36,6 +36,28 @@ def get_conv_template(name: str) -> Optional[Conversation]: ############## Preset Conversation Templates ############## +# Llama3 +# See https://github.com/meta-llama/llama3?tab=readme-ov-file#instruction-tuned-models +# and https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py +ConvTemplateRegistry.register_conv_template( + Conversation( + name="llama-3", + system_template=( + "<|start_header_id|>system<|end_header_id|>\n\n", + f"{MessagePlaceholders.SYSTEM.value}", + ), + system_message="You are a helpful, respectful and honest assistant.", + roles={"user": "user", "assistant": "assistant"}, + seps=["<|eot_id|><|start_header_id|>"], + role_content_sep="<|end_header_id|>\n\n", + role_empty_sep="<|end_header_id|>\n\n", + stop_str=["<|end_of_text|>", "<|eot_id|>"], + stop_token_ids=[128001, 128009], # "<|end_of_text|>", "<|eot_id|>" + system_prefix_token_ids=[128000], # "<|begin_of_text|>" + add_role_after_system_message=True, + ) +) + # Llama2 ConvTemplateRegistry.register_conv_template( Conversation( diff --git a/python/mlc_llm/interface/gen_config.py b/python/mlc_llm/interface/gen_config.py index d22aa7d231..8e617fc3d2 100644 --- a/python/mlc_llm/interface/gen_config.py +++ b/python/mlc_llm/interface/gen_config.py @@ -274,6 +274,7 @@ def gen_config( # pylint: disable=too-many-locals,too-many-arguments,too-many-b # FIXME: Copy RWKV tokenizer file # pylint: disable=fixme CONV_TEMPLATES = { + "llama-3", "chatml", "open_hermes_mistral", "neural_hermes_mistral", diff --git a/python/mlc_llm/model/model_preset.py b/python/mlc_llm/model/model_preset.py index 3bfe1cb891..41abf0292c 100644 --- a/python/mlc_llm/model/model_preset.py +++ b/python/mlc_llm/model/model_preset.py @@ -660,4 +660,54 @@ "eos_token_id": 2, "pad_token_id": 0, }, + "llama3_8b": { + "architectures": ["LlamaForCausalLM"], + "attention_bias": False, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": None, + "rope_theta": 500000.0, + "tie_word_embeddings": False, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": True, + "vocab_size": 128256, + }, + "llama3_70b": { + "architectures": ["LlamaForCausalLM"], + "attention_bias": False, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 8192, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": None, + "rope_theta": 500000.0, + "tie_word_embeddings": False, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": True, + "vocab_size": 128256, + }, }