meta-llama · mreso · Apr 19, 2024 · Apr 18, 2024 · Apr 19, 2024 · Apr 19, 2024
diff --git a/README.md b/README.md
@@ -7,10 +7,10 @@ The 'llama-recipes' repository is a companion to the [Meta Llama 2](https://gith
 > | Token | Description |
 > |---|---|
 > `<\|begin_of_text\|>` | This is equivalent to the BOS token. |
-> `<\|eot_id\|>` | This signifies the end of the message in a turn. |
+> `<\|eot_id\|>` | This signifies the end of the message in a turn. The generate function needs to be set up as shown below or in [this example](./recipes/inference/local_inference/chat_completion/chat_completion.py) to terminate the generation after the turn.|
 > `<\|start_header_id\|>{role}<\|end_header_id\|>` | These tokens enclose the role for a particular message. The possible roles can be: system, user, assistant. |
-> `<\|end_of_text\|>` | This is equivalent to the EOS token. On generating this token, Llama 3 will cease to generate more tokens |
-> 
+> `<\|end_of_text\|>` | This is equivalent to the EOS token. Its usually not used during multiturn-conversations. Instead, each message is terminated with `<\|eot_id\|>` |
+>
 > A multiturn-conversation with Llama 3 follows this prompt template:
 > ```
 > <|begin_of_text|><|start_header_id|>system<|end_header_id|>
@@ -23,10 +23,24 @@ The 'llama-recipes' repository is a companion to the [Meta Llama 2](https://gith
 >
 > {{ user_message_2 }}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 > ```
-> More details on the new tokenizer and prompt template: <PLACEHOLDER_URL>
+>
+> To signal the end of the current message the model emits the `<\|eot_id\|>` token. To terminate the generation we need to call the model's generate function as follows:
+> ```
+> terminators = [
+>     tokenizer.eos_token_id,
+>     tokenizer.convert_tokens_to_ids("<|eot_id|>")
+>     ]
+>  ...
+> outputs = model.generate(
+>     ...
+>     eos_token_id=terminators,
+>     )
+> ```
+>
+> More details on the new tokenizer and prompt template: https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3#special-tokens-used-with-meta-llama-3
 > [!NOTE]
 > The llama-recipes repository was recently refactored to promote a better developer experience of using the examples. Some files have been moved to new locations. The `src/` folder has NOT been modified, so the functionality of this repo and package is not impacted.
-> 
+>
 > Make sure you update your local clone by running `git pull origin main`
 
 ## Table of Contents
@@ -55,29 +69,29 @@ These instructions will get you a copy of the project up and running on your loc
 ### Prerequisites
 
 #### PyTorch Nightlies
-Some features (especially fine-tuning with FSDP + PEFT) currently require PyTorch nightlies to be installed. Please make sure to install the nightlies if you're using these features following [this guide](https://pytorch.org/get-started/locally/).
+I you want to use PyTorch nightlies instead of the stable release, go to [this guide](https://pytorch.org/get-started/locally/) to retrieve the right `--extra-index-url URL` parameter for the `pip install` commands on your platform.
 
 ### Installing
 Llama-recipes provides a pip distribution for easy install and usage in other projects. Alternatively, it can be installed from source.
 
 #### Install with pip
 ```
-pip install --extra-index-url https://download.pytorch.org/whl/test/cu118 llama-recipes
+pip install llama-recipes
 ```
 
 #### Install with optional dependencies
 Llama-recipes offers the installation of optional packages. There are three optional dependency groups.
 To run the unit tests we can install the required dependencies with:
 ```
-pip install --extra-index-url https://download.pytorch.org/whl/test/cu118 llama-recipes[tests]
+pip install llama-recipes[tests]
 ```
 For the vLLM example we need additional requirements that can be installed with:
 ```
-pip install --extra-index-url https://download.pytorch.org/whl/test/cu118 llama-recipes[vllm]
+pip install llama-recipes[vllm]
 ```
 To use the sensitive topics safety checker install with:
 ```
-pip install --extra-index-url https://download.pytorch.org/whl/test/cu118 llama-recipes[auditnlg]
+pip install llama-recipes[auditnlg]
 ```
 Optional dependencies can also be combines with [option1,option2].
 
@@ -87,14 +101,14 @@ To install from source e.g. for development use these commands. We're using hatc
 git clone git@github.com:meta-llama/llama-recipes.git
 cd llama-recipes
 pip install -U pip setuptools
-pip install --extra-index-url https://download.pytorch.org/whl/test/cu118 -e .
+pip install -e .
 ```
 For development and contributing to llama-recipes please install all optional dependencies:
 ```
 git clone git@github.com:meta-llama/llama-recipes.git
 cd llama-recipes
 pip install -U pip setuptools
-pip install --extra-index-url https://download.pytorch.org/whl/test/cu118 -e .[tests,auditnlg,vllm]
+pip install -e .[tests,auditnlg,vllm]
 ```
 
 
@@ -120,7 +134,7 @@ python src/transformers/models/llama/convert_llama_weights_to_hf.py \
 
 
 ## Repository Organization
-Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`. 
+Most of the code dealing with Llama usage is organized across 2 main folders: `recipes/` and `src/`.
 
 ### `recipes/`
 

diff --git a/recipes/inference/local_inference/chat_completion/chat_completion.py b/recipes/inference/local_inference/chat_completion/chat_completion.py
@@ -75,6 +75,11 @@ def main(
 
     chats = tokenizer.apply_chat_template(dialogs)
 
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+        ]
+
     with torch.no_grad():
         for idx, chat in enumerate(chats):
             safety_checker = get_safety_checker(enable_azure_content_safety,
@@ -113,6 +118,7 @@ def main(
                 top_k=top_k,
                 repetition_penalty=repetition_penalty,
                 length_penalty=length_penalty,
+                eos_token_id=terminators,
                 **kwargs
             )
 

diff --git a/scripts/spellcheck_conf/wordlist.txt b/scripts/spellcheck_conf/wordlist.txt
@@ -1294,3 +1294,4 @@ EOS
 eot
 multiturn
 tiktoken
+eos
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -6,7 +6,7 @@
 from transformers import AutoTokenizer
 
 ACCESS_ERROR_MSG = "Could not access tokenizer at 'meta-llama/Llama-2-7b-hf'. Did you log into huggingface hub and provided the correct token?"
-LLAMA_VERSIONS = ["meta-llama/Llama-2-7b-hf", "meta-llama/Llama-3-8b-hf"]
+LLAMA_VERSIONS = ["meta-llama/Llama-2-7b-hf", "meta-llama/Meta-Llama-3-8B"]
 
 @pytest.fixture(params=LLAMA_VERSIONS)
 def llama_version(request):

diff --git a/tests/datasets/test_custom_dataset.py b/tests/datasets/test_custom_dataset.py
@@ -11,7 +11,7 @@
         "example_1": "[INST] Who made Berlin [/INST] dunno",
         "example_2": "[INST] Quiero preparar una pizza de pepperoni, puedes darme los pasos para hacerla? [/INST] Claro!",
     },
-    "meta-llama/Llama-3-8b-hf":{
+    "meta-llama/Meta-Llama-3-8B":{
         "example_1": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nWho made Berlin<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\ndunno<|eot_id|><|end_of_text|>",
         "example_2": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nHow to start learning guitar and become a master at it?",
     },

diff --git a/tests/datasets/test_grammar_datasets.py b/tests/datasets/test_grammar_datasets.py
@@ -10,7 +10,7 @@
         "label": 1152,
         "pos": 31,
     },
-    "meta-llama/Llama-3-8b-hf":{
+    "meta-llama/Meta-Llama-3-8B":{
         "label": 40,
         "pos": 26,
     },

diff --git a/tests/datasets/test_samsum_datasets.py b/tests/datasets/test_samsum_datasets.py
@@ -10,7 +10,7 @@
         "label": 8432,
         "pos": 242,
     },
-    "meta-llama/Llama-3-8b-hf":{
+    "meta-llama/Meta-Llama-3-8B":{
         "label": 2250,
         "pos": 211,
     },

diff --git a/tests/test_batching.py b/tests/test_batching.py
@@ -9,7 +9,7 @@
         "train": 96,
         "eval": 42,
     },
-    "meta-llama/Llama-3-8b-hf": {
+    "meta-llama/Meta-Llama-3-8B": {
         "train": 79,
         "eval": 34,
     }