From ecc94cea168a105b1183f6d37727f61bfb0c2107 Mon Sep 17 00:00:00 2001 From: "Arun C. Murthy" Date: Tue, 18 Jul 2023 19:21:19 -0700 Subject: [PATCH] Doc enhancements to use llama-2 --- .gitignore | 1 + clients/python/llmengine/fine_tuning.py | 4 +-- docs/getting_started.md | 4 +-- docs/guides/completions.md | 35 +++++++++++-------------- docs/index.md | 6 ++--- 5 files changed, 23 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index d5bec1e7..276b0676 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +tags *.cache *.pt *.pkl diff --git a/clients/python/llmengine/fine_tuning.py b/clients/python/llmengine/fine_tuning.py index d2c3d96f..d8ffa84d 100644 --- a/clients/python/llmengine/fine_tuning.py +++ b/clients/python/llmengine/fine_tuning.py @@ -283,7 +283,7 @@ def get_events(cls, fine_tune_id: str) -> GetFineTuneEventsResponse: Returns: GetFineTuneEventsResponse: an object that contains the list of events for the fine-tuning job - Example: + === "Getting events for fine-tuning jobs in Python" ```python from llmengine import FineTune @@ -291,7 +291,7 @@ def get_events(cls, fine_tune_id: str) -> GetFineTuneEventsResponse: print(response.json()) ``` - JSON Response: + === "Response in JSON" ```json { "events": diff --git a/docs/getting_started.md b/docs/getting_started.md index 23ef79f3..ead931e2 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -48,7 +48,7 @@ With your API key set, you can now send LLM Engine requests using the Python cli from llmengine import Completion response = Completion.create( - model="falcon-7b-instruct", + model="llama-2-7b", prompt="I'm opening a pancake restaurant that specializes in unique pancake shapes, colors, and flavors. List 3 quirky names I could name my restaurant.", max_new_tokens=100, temperature=0.2, @@ -66,7 +66,7 @@ import sys from llmengine import Completion stream = Completion.create( - model="falcon-7b-instruct", + model="llama-2-7b", prompt="Give me a 200 word summary on the current economic events in the US.", max_new_tokens=1000, temperature=0.2, diff --git a/docs/guides/completions.md b/docs/guides/completions.md index c9747889..eb16b94e 100644 --- a/docs/guides/completions.md +++ b/docs/guides/completions.md @@ -42,26 +42,21 @@ See the full [Completion API reference documentation](../../api/python_client/#l An example Completion API response looks as follows: === "Response in JSON" -`python - >>> print(response.json()) - ` -Example output: -`json - { - "request_id": "c4bf0732-08e0-48a8-8b44-dfe8d4702fb0", - "output": { - "text": "_______ and I am a _______", - "num_completion_tokens": 10 - } - } - ` + ```python + >>> print(response.json()) + { + "request_id": "c4bf0732-08e0-48a8-8b44-dfe8d4702fb0", + "output": { + "text": "_______ and I am a _______", + "num_completion_tokens": 10 + } + } + ``` === "Response in Python" -`python - >>> print(response.output.text) - ` -Example output: -` _______ and I am a _______ - ` + ```python + >>> print(response.output.text) + _______ and I am a _______ + ``` ## Token streaming @@ -81,7 +76,7 @@ import sys from llmengine import Completion stream = Completion.create( - model="falcon-7b-instruct", + model="llama-2-7b", prompt="Give me a 200 word summary on the current economic events in the US.", max_new_tokens=1000, temperature=0.2, diff --git a/docs/index.md b/docs/index.md index fcf8cf3e..798519ee 100644 --- a/docs/index.md +++ b/docs/index.md @@ -30,11 +30,11 @@ Kubernetes. ### Key Features **Ready-to-use APIs for your favorite models**: Deploy and serve -open source foundation models - including LLaMA, MPT, and Falcon. +open source foundation models - including Llama-2, MPT, and Falcon. Use Scale-hosted models or deploy to your own infrastructure. -**Fine-tune your favorite models**: Fine-tune open-source foundation -models like LLaMA, MPT, etc. with your own data for optimized performance. +**Fine-tune the best open-source models**: Fine-tune open-source foundation +models like Llama-2, MPT, etc. with your own data for optimized performance. **Optimized Inference**: LLM Engine provides inference APIs for streaming responses and dynamically batching inputs for higher throughput