From ecc94cea168a105b1183f6d37727f61bfb0c2107 Mon Sep 17 00:00:00 2001
From: "Arun C. Murthy" <acm@scale.com>
Date: Tue, 18 Jul 2023 19:21:19 -0700
Subject: [PATCH] Doc enhancements to use llama-2

---
 .gitignore                              |  1 +
 clients/python/llmengine/fine_tuning.py |  4 +--
 docs/getting_started.md                 |  4 +--
 docs/guides/completions.md              | 35 +++++++++++--------------
 docs/index.md                           |  6 ++---
 5 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/.gitignore b/.gitignore
index d5bec1e7..276b0676 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+tags
 *.cache
 *.pt
 *.pkl
diff --git a/clients/python/llmengine/fine_tuning.py b/clients/python/llmengine/fine_tuning.py
index d2c3d96f..d8ffa84d 100644
--- a/clients/python/llmengine/fine_tuning.py
+++ b/clients/python/llmengine/fine_tuning.py
@@ -283,7 +283,7 @@ def get_events(cls, fine_tune_id: str) -> GetFineTuneEventsResponse:
         Returns:
             GetFineTuneEventsResponse: an object that contains the list of events for the fine-tuning job
 
-        Example:
+        === "Getting events for  fine-tuning jobs in Python"
             ```python
             from llmengine import FineTune
 
@@ -291,7 +291,7 @@ def get_events(cls, fine_tune_id: str) -> GetFineTuneEventsResponse:
             print(response.json())
             ```
 
-        JSON Response:
+        === "Response in JSON"
             ```json
             {
                 "events":
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 23ef79f3..ead931e2 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -48,7 +48,7 @@ With your API key set, you can now send LLM Engine requests using the Python cli
 from llmengine import Completion
 
 response = Completion.create(
-    model="falcon-7b-instruct",
+    model="llama-2-7b",
     prompt="I'm opening a pancake restaurant that specializes in unique pancake shapes, colors, and flavors. List 3 quirky names I could name my restaurant.",
     max_new_tokens=100,
     temperature=0.2,
@@ -66,7 +66,7 @@ import sys
 from llmengine import Completion
 
 stream = Completion.create(
-    model="falcon-7b-instruct",
+    model="llama-2-7b",
     prompt="Give me a 200 word summary on the current economic events in the US.",
     max_new_tokens=1000,
     temperature=0.2,
diff --git a/docs/guides/completions.md b/docs/guides/completions.md
index c9747889..eb16b94e 100644
--- a/docs/guides/completions.md
+++ b/docs/guides/completions.md
@@ -42,26 +42,21 @@ See the full [Completion API reference documentation](../../api/python_client/#l
 An example Completion API response looks as follows:
 
 === "Response in JSON"
-`python
-    >>> print(response.json())
-    `
-Example output:
-`json
-    {
-      "request_id": "c4bf0732-08e0-48a8-8b44-dfe8d4702fb0",
-      "output": {
-        "text": "_______ and I am a _______",
-        "num_completion_tokens": 10
-      }
-    }
-    `
+    ```python
+        >>> print(response.json())
+        {
+          "request_id": "c4bf0732-08e0-48a8-8b44-dfe8d4702fb0",
+          "output": {
+            "text": "_______ and I am a _______",
+            "num_completion_tokens": 10
+          }
+        }
+    ```
 === "Response in Python"
-`python
-    >>> print(response.output.text)
-    `
-Example output:
-`    _______ and I am a _______
-   `
+    ```python
+        >>> print(response.output.text)
+        _______ and I am a _______
+    ```
 
 ## Token streaming
 
@@ -81,7 +76,7 @@ import sys
 from llmengine import Completion
 
 stream = Completion.create(
-    model="falcon-7b-instruct",
+    model="llama-2-7b",
     prompt="Give me a 200 word summary on the current economic events in the US.",
     max_new_tokens=1000,
     temperature=0.2,
diff --git a/docs/index.md b/docs/index.md
index fcf8cf3e..798519ee 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -30,11 +30,11 @@ Kubernetes.
 ### Key Features
 
 **Ready-to-use APIs for your favorite models**: Deploy and serve
-open source foundation models - including LLaMA, MPT, and Falcon.
+open source foundation models - including Llama-2, MPT, and Falcon.
 Use Scale-hosted models or deploy to your own infrastructure.
 
-**Fine-tune your favorite models**: Fine-tune open-source foundation
-models like LLaMA, MPT, etc. with your own data for optimized performance.
+**Fine-tune the best open-source models**: Fine-tune open-source foundation
+models like Llama-2, MPT, etc. with your own data for optimized performance.
 
 **Optimized Inference**: LLM Engine provides inference APIs
 for streaming responses and dynamically batching inputs for higher throughput