diff --git a/ai-data/generative-apis/how-to/query-code-models.mdx b/ai-data/generative-apis/how-to/query-code-models.mdx index 90a0410ce0..b9eb9b492f 100644 --- a/ai-data/generative-apis/how-to/query-code-models.mdx +++ b/ai-data/generative-apis/how-to/query-code-models.mdx @@ -48,8 +48,7 @@ Here is an example configuration with Scaleway's OpenAI-compatible provider: { "model": "qwen2.5-coder-32b-instruct", "title": "Qwen2.5-coder", - "apiBase": "https://api.scaleway.ai/v1/", - "provider": "openai", + "provider": "scaleway", "apiKey": "###SCW SECRET KEY###" } ] diff --git a/ai-data/generative-apis/how-to/use-function-calling.mdx b/ai-data/generative-apis/how-to/use-function-calling.mdx index 0d7dc4f08c..054f0c0046 100644 --- a/ai-data/generative-apis/how-to/use-function-calling.mdx +++ b/ai-data/generative-apis/how-to/use-function-calling.mdx @@ -25,10 +25,7 @@ Function calling allows a large language model (LLM) to interact with external t ## Supported models -* llama-3.1-8b-instruct -* llama-3.1-70b-instruct -* mistral-nemo-instruct-2407 -* pixtral-12b-2409 +All the [chat models](/ai-data/generative-apis/reference-content/supported-models/#chat-models) hosted by Scaleway support function calling. ## Understanding function calling diff --git a/ai-data/managed-inference/reference-content/function-calling-support.mdx b/ai-data/managed-inference/reference-content/function-calling-support.mdx index 757a297e97..b544ffb3c3 100644 --- a/ai-data/managed-inference/reference-content/function-calling-support.mdx +++ b/ai-data/managed-inference/reference-content/function-calling-support.mdx @@ -7,7 +7,7 @@ content: paragraph: Function calling allows models to connect to external tools. tags: dates: - validation: 2024-11-18 + validation: 2024-12-12 posted: 2024-10-25 categories: - ai-data @@ -27,6 +27,7 @@ The following models in Scaleway's Managed Inference library can call tools as p * meta/llama-3.1-8b-instruct * meta/llama-3.1-70b-instruct +* meta/llama-3.3-70b-instruct * mistral/mistral-7b-instruct-v0.3 * mistral/mistral-nemo-instruct-2407 * mistral/pixtral-12b-2409 diff --git a/ai-data/managed-inference/reference-content/llama-3.3-70b-instruct.mdx b/ai-data/managed-inference/reference-content/llama-3.3-70b-instruct.mdx new file mode 100644 index 0000000000..4f7618359c --- /dev/null +++ b/ai-data/managed-inference/reference-content/llama-3.3-70b-instruct.mdx @@ -0,0 +1,79 @@ +--- +meta: + title: Understanding the Llama-3.3-70b-instruct model + description: Deploy your own secure Llama-3.3-70b-instruct model with Scaleway Managed Inference. Privacy-focused, fully managed. +content: + h1: Understanding the Llama-3.3-70b-instruct model + paragraph: This page provides information on the Llama-3.3-70b-instruct model +tags: +dates: + validation: 2024-12-12 + posted: 2024-12-12 +categories: + - ai-data +--- + +## Model overview + +| Attribute | Details | +|-----------------|------------------------------------| +| Provider | [Meta](https://www.llama.com/) | +| License | [Llama 3.3 community](https://www.llama.com/llama3_3/license/) | +| Compatible Instances | H100-2 (BF16) | +| Context length | Up to 70k tokens | + +## Model names + +```bash +meta/llama-3.3-70b-instruct:bf16 +``` + +## Compatible Instances + +| Instance type | Max context length | +| ------------- |-------------| +| H100-2 | 62k (BF16) | + +## Model introduction + +Released December 6, 2024, Meta’s Llama 3.3 70b is a fine-tune of the [Llama 3.1 70b](/ai-data/managed-inference/reference-content/llama-3.1-70b) model. +This model is still text-only (text in/text out). However, Llama 3.3 was designed to approach the performance of Llama 3.1 405B on some applications. + +## Why is it useful? + +- Llama 3.3 uses the same prompt format as Llama 3.1. Prompts written for Llama 3.1 work unchanged with Llama 3.3. +- Llama 3.3 supports 7 languages in addition to English: French, German, Hindi, Italian, Portuguese, Spanish, and Thai. + +## How to use it + +### Sending Managed Inference requests + +To perform inference tasks with your Llama-3.3 deployed at Scaleway, use the following command: + +```bash +curl -s \ +-H "Authorization: Bearer " \ +-H "Content-Type: application/json" \ +--request POST \ +--url "https://.ifr.fr-par.scaleway.com/v1/chat/completions" \ +--data '{"model":"meta/llama-3.3-70b-instruct:bf16", "messages":[{"role": "user","content": "There is a llama in my garden, what should I do?"}], "max_tokens": 500, "temperature": 0.7, "stream": false}' +``` + +Make sure to replace `` and `` with your actual [IAM API key](/identity-and-access-management/iam/how-to/create-api-keys/) and the Deployment UUID you are targeting. + + + The model name allows Scaleway to put your prompts in the expected format. + + + + Ensure that the `messages` array is properly formatted with roles (system, user, assistant) and content. + + +### Receiving Inference responses + +Upon sending the HTTP request to the public or private endpoints exposed by the server, you will receive inference responses from the managed Managed Inference server. +Process the output data according to your application's needs. The response will contain the output generated by the LLM model based on the input provided in the request. + + + Despite efforts for accuracy, the possibility of generated text containing inaccuracies or [hallucinations](/ai-data/managed-inference/concepts/#hallucinations) exists. Always verify the content generated independently. + \ No newline at end of file diff --git a/menu/navigation.json b/menu/navigation.json index 747ef44dd3..c2df61306c 100644 --- a/menu/navigation.json +++ b/menu/navigation.json @@ -665,6 +665,10 @@ "label": "Support for function calling in Scaleway Managed Inference", "slug": "function-calling-support" }, + { + "label": "BGE-Multilingual-Gemma2 model", + "slug": "bge-multilingual-gemma2" + }, { "label": "Llama-3-8b-instruct model", "slug": "llama-3-8b-instruct" @@ -686,13 +690,17 @@ "slug": "llama-3.1-nemotron-70b-instruct" }, { - "label": "Mistral-nemo-instruct-2407 model", - "slug": "mistral-nemo-instruct-2407" + "label": "Llama-3.3-70b-instruct model", + "slug": "llama-3.3-70b-instruct" }, { "label": "Mistral-7b-instruct-v0.3 model", "slug": "mistral-7b-instruct-v0.3" }, + { + "label": "Mistral-nemo-instruct-2407 model", + "slug": "mistral-nemo-instruct-2407" + }, { "label": "Mixtral-8x7b-instruct-v0.1 model", "slug": "mixtral-8x7b-instruct-v0.1" @@ -701,18 +709,6 @@ "label": "Molmo-72b-0924 model", "slug": "molmo-72b-0924" }, - { - "label": "Sentence-t5-xxl model", - "slug": "sentence-t5-xxl" - }, - { - "label": "BGE-Multilingual-Gemma2 model", - "slug": "bge-multilingual-gemma2" - }, - { - "label": "Pixtral-12b-2409 model", - "slug": "pixtral-12b-2409" - }, { "label": "Moshika-0.1-8b model", "slug": "moshika-0.1-8b" @@ -721,9 +717,17 @@ "label": "Moshiko-0.1-8b model", "slug": "moshiko-0.1-8b" }, + { + "label": "Pixtral-12b-2409 model", + "slug": "pixtral-12b-2409" + }, { "label": "Qwen2.5-coder-32b-instruct model", "slug": "qwen2.5-coder-32b-instruct" + }, + { + "label": "Sentence-t5-xxl model", + "slug": "sentence-t5-xxl" } ], "label": "Additional Content",