diff --git a/docs/capabilities/function-calling.mdx b/docs/capabilities/function-calling.mdx
index 9c7565ab..dbaad3b3 100644
--- a/docs/capabilities/function-calling.mdx
+++ b/docs/capabilities/function-calling.mdx
@@ -4,6 +4,9 @@ title: Function calling
sidebar_position: 2.5
---
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
@@ -38,6 +41,9 @@ In this guide, we will walk through a simple example to demonstrate how function
Before we get started, let’s assume we have a dataframe consisting of payment transactions. When users ask questions about this dataframe, they can use certain tools to answer questions about this data. This is just an example to emulate an external database that the LLM cannot directly access.
+
+
+
```python
import pandas as pd
@@ -54,6 +60,32 @@ data = {
df = pd.DataFrame(data)
```
+
+
+
+```typescript
+// Assuming we have the following data
+const data = {
+ transactionId: ['T1001', 'T1002', 'T1003', 'T1004', 'T1005'],
+ customerId: ['C001', 'C002', 'C003', 'C002', 'C001'],
+ paymentAmount: [125.50, 89.99, 120.00, 54.30, 210.20],
+ paymentDate: ['2021-10-05', '2021-10-06', '2021-10-07', '2021-10-05', '2021-10-08'],
+ paymentStatus: ['Paid', 'Unpaid', 'Paid', 'Paid', 'Pending']
+};
+
+// Convert data into an array of objects for easier manipulation
+const transactions = data.transactionId.map((id, index) => ({
+ transactionId: id,
+ customerId: data.customerId[index],
+ paymentAmount: data.paymentAmount[index],
+ paymentDate: data.paymentDate[index],
+ paymentStatus: data.paymentStatus[index]
+}));
+```
+
+
+
+
## Step 1. User: specify tools and query
@@ -63,6 +95,9 @@ Users can define all the necessary tools for their use cases.
- In many cases, we might have multiple tools at our disposal. For example, let’s consider we have two functions as our two tools: `retrieve_payment_status` and `retrieve_payment_date` to retrieve payment status and payment date given transaction ID.
+
+
+
```python
def retrieve_payment_status(df: data, transaction_id: str) -> str:
if transaction_id in df.transaction_id.values:
@@ -73,11 +108,36 @@ def retrieve_payment_date(df: data, transaction_id: str) -> str:
if transaction_id in df.transaction_id.values:
return json.dumps({'date': df[df.transaction_id == transaction_id].payment_date.item()})
return json.dumps({'error': 'transaction id not found.'})
+```
+
+
+
+```typescript
+function retrievePaymentStatus(transactions, transactionId) {
+ const transaction = transactions.find(t => t.transactionId === transactionId);
+ if (transaction) {
+ return JSON.stringify({ status: transaction.paymentStatus });
+ }
+ return JSON.stringify({ error: 'transaction id not found.' });
+}
+
+function retrievePaymentDate(transactions, transactionId) {
+ const transaction = transactions.find(t => t.transactionId === transactionId);
+ if (transaction) {
+ return JSON.stringify({ date: transaction.paymentDate });
+ }
+ return JSON.stringify({ error: 'transaction id not found.' });
+}
```
+
+
- In order for Mistral models to understand the functions, we need to outline the function specifications with a JSON schema. Specifically, we need to describe the type, function name, function description, function parameters, and the required parameter for the function. Since we have two functions here, let’s list two function specifications in a list.
+
+
+
```python
tools = [
{
@@ -117,8 +177,56 @@ tools = [
]
```
+
+
+
+```typescript
+const tools = [
+ {
+ type: "function",
+ function: {
+ name: "retrievePaymentStatus",
+ description: "Get payment status of a transaction",
+ parameters: {
+ type: "object",
+ properties: {
+ transactionId: {
+ type: "string",
+ description: "The transaction id.",
+ }
+ },
+ required: ["transactionId"],
+ },
+ },
+ },
+ {
+ type: "function",
+ function: {
+ name: "retrievePaymentDate",
+ description: "Get payment date of a transaction",
+ parameters: {
+ type: "object",
+ properties: {
+ transactionId: {
+ type: "string",
+ description: "The transaction id.",
+ }
+ },
+ required: ["transactionId"],
+ },
+ },
+ }
+];
+```
+
+
+
+
- Then we organize the two functions into a dictionary where keys represent the function name, and values are the function with the `df` defined. This allows us to call each function based on its function name.
+
+
+
```python
import functools
@@ -128,13 +236,39 @@ names_to_functions = {
}
```
+
+
+
+```typescript
+const namesToFunctions = {
+ 'retrievePaymentStatus': (transactionId) => retrievePaymentStatus(transactions, transactionId),
+ 'retrievePaymentDate': (transactionId) => retrievePaymentDate(transactions, transactionId)
+};
+```
+
+
+
+
### User query
Suppose a user asks the following question: “What’s the status of my transaction?” A standalone LLM would not be able to answer this question, as it needs to query the business logic backend to access the necessary data. But what if we have an exact tool we can use to answer this question? We could potentially provide an answer!
+
+
+
```python
messages = [{"role": "user", "content": "What's the status of my transaction T1001?"}]
```
+
+
+
+```typescript
+const messages = [{"role": "user", "content": "What's the status of my transaction T1001?"}];
+```
+
+
+
+
## Step 2. Model: Generate function arguments
@@ -148,6 +282,13 @@ Users can use `tool_choice` to specify how tools are used:
- "any": forces tool use.
- "none": prevents tool use.
+### parallel_tool_calls
+Users can use `parallel_tool_calls` to specify whether parallel tool calling is allowed.
+- true: default mode. The model decides if it uses parallel tool calls or not.
+- false: forces the model to use single tool calling.
+
+
+
```python
import os
@@ -162,6 +303,7 @@ response = client.chat.complete(
messages = messages,
tools = tools,
tool_choice = "any",
+ parallel_tool_calls = False,
)
response
```
@@ -172,12 +314,51 @@ Output:
```
ChatCompletionResponse(id='7cbd8962041442459eb3636e1e3cbf10', object='chat.completion', model='mistral-large-latest', usage=Usage(prompt_tokens=94, completion_tokens=30, total_tokens=124), created=1721403550, choices=[Choices(index=0, finish_reason='tool_calls', message=AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='retrieve_payment_status', arguments='{"transaction_id": "T1001"}'), id='D681PevKs', type='function')], prefix=False, role='assistant'))])
```
+
+
+
+
+```typescript
+import { Mistral } from '@mistralai/mistralai';
+
+const apiKey = process.env.MISTRAL_API_KEY;
+const model = "mistral-large-latest";
+
+const client = new Mistral({ apiKey: apiKey });
+
+let response = await client.chat.complete({
+ model: model,
+ messages: messages,
+ tools: tools,
+ toolChoice: "any",
+ parallelToolCalls: false,
+});
+```
+
+We get the response including toolCalls with the chosen function name `retrievePaymentStatus` and the arguments for this function.
+
+
+
+
Let’s add the response message to the `messages` list.
+
+
+
```python
messages.append(response.choices[0].message)
```
+
+
+
+```typescript
+messages.push(response.choices[0].message);
+```
+
+
+
+
## Step 3. User: Execute function to obtain tool results
@@ -186,6 +367,10 @@ How do we execute the function? Currently, it is the user’s responsibility to
Let’s extract some useful function information from model response including `function_name` and `function_params`. It’s clear here that our Mistral model has chosen to use the function `retrieve_payment_status` with the parameter `transaction_id` set to T1001.
+
+
+
+
```python
import json
@@ -199,8 +384,30 @@ Output
function_name: retrieve_payment_status
function_params: {'transaction_id': 'T1001'}
```
+
+
+
+
+```typescript
+const toolCall = response.choices[0].message.toolCalls[0];
+const functionName = toolCall.function.name;
+const functionParams = JSON.parse(toolCall.function.arguments);
+console.log("\nfunction_name: ", functionName, "\nfunction_params: ", functionParams);
+```
+Output
+```
+function_name: retrievePaymentStatus
+function_params: { transactionId: 'T1001' }
+```
+
+
+
+
Now we can execute the function and we get the function output `'{"status": "Paid"}'`.
+
+
+
```python
function_result = names_to_functions[function_name](**function_params)
function_result
@@ -210,14 +417,37 @@ Output
'{"status": "Paid"}'
```
+
+
+
+```typescript
+const functionResult = namesToFunctions[functionName](functionParams.transactionId);
+console.log(functionResult);
+```
+Output
+```
+{"status":"Paid"}
+```
+
+
+
+
## Step 4. Model: Generate final answer
We can now provide the output from the tools to Mistral models, and in return, the Mistral model can produce a customised final response for the specific user.
+
+
+
```python
-messages.append({"role":"tool", "name":function_name, "content":function_result, "tool_call_id":tool_call.id})
+messages.append({
+ "role":"tool",
+ "name":function_name,
+ "content":function_result,
+ "tool_call_id":tool_call.id
+})
response = client.chat.complete(
model = model,
@@ -230,3 +460,28 @@ Output:
```
The status of your transaction with ID T1001 is "Paid". Is there anything else I can assist you with?
```
+
+
+
+
+```typescript
+messages.push({
+ role: "tool",
+ name: functionName,
+ content: functionResult,
+ toolCallId: toolCall.id
+});
+
+response = await client.chat.complete({
+ model: model,
+ messages: messages
+});
+console.log(response.choices[0].message.content);
+```
+
+Output:
+```
+The status of your transaction with ID T1001 is "Paid". Is there anything else I can assist you with?
+```
+
+
\ No newline at end of file
diff --git a/docs/capabilities/structured-output/custom.mdx b/docs/capabilities/structured-output/custom.mdx
index 5bbc18ee..c6796c20 100644
--- a/docs/capabilities/structured-output/custom.mdx
+++ b/docs/capabilities/structured-output/custom.mdx
@@ -196,6 +196,15 @@ curl --location "https://api.mistral.ai/v1/chat/completions" \
+:::note
+To better guide the model, the following is being always prepended by default to the System Prompt when using this method:
+```
+Your output should be an instance of a JSON object following this schema: {{ json_schema }}
+```
+
+However, it is recommended to add more explanations and iterate on your system prompt to better clarify the expected schema and behavior.
+:::
+
### FAQ
**Q: Which models support custom Structured Outputs?**
**A:** All currently available models except for `codestral-mamba` are supported.
diff --git a/docs/deployment/laplateforme/tier.mdx b/docs/deployment/laplateforme/tier.mdx
index dae65786..26630af9 100644
--- a/docs/deployment/laplateforme/tier.mdx
+++ b/docs/deployment/laplateforme/tier.mdx
@@ -5,7 +5,7 @@ sidebar_position: 3.12
---
:::note[ ]
-Please visit https://console.mistral.ai/limits/ for detailed information on the current rate limit and usage tiers for your workspace.
+Please visit https://admin.mistral.ai/plateforme/limits for detailed information on the current rate limit and usage tiers for your workspace.
:::
## How does rate limits rate work?
@@ -25,7 +25,7 @@ Key points to note:
## Usage tiers
-You can view the rate and usage limits for your workspace under the [limits](https://console.mistral.ai/limits/) section on la Plateforme.
+You can view the rate and usage limits for your workspace under the [limits](https://admin.mistral.ai/plateforme/limits) section on la Plateforme.
We offer various tiers on the platform, including a **free API tier** with restrictive rate limits. The free API tier is designed to allow you to try and explore our API. For actual projects and production use, we recommend upgrading to a higher tier.
diff --git a/docs/guides/finetuning_sections/_04_faq.md b/docs/guides/finetuning_sections/_04_faq.md
index badeab29..abb09787 100644
--- a/docs/guides/finetuning_sections/_04_faq.md
+++ b/docs/guides/finetuning_sections/_04_faq.md
@@ -29,6 +29,13 @@ The size limit for the validation data is 1MB. As a rule of thumb:
`validation_set_max_size = min(1MB, 5% of training data)`
+### What happens if I try to create a job that already exists?
+
+At job creation, you will receive a `409 Conflict` error in case a similar job is already running / validated / queued. This mechanism helps avoid inadvertently creating duplicate jobs, saving resources and preventing redundancy.
+
+### What if I upload an already existing file?
+
+If a file is uploaded and matches an existing file in both content and name, the pre-existing file is returned instead of creating a new one.
### How many epochs are in the training process?
diff --git a/openapi.yaml b/openapi.yaml
index c05cfad8..6867baea 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -747,23 +747,22 @@ paths:
description: FIM completion.
tags:
- fim
- /v1/ocr:
+ /v1/agents/completions:
post:
- summary: OCR
- operationId: ocr_v1_ocr_post
+ summary: Agents Completion
+ operationId: agents_completion_v1_agents_completions_post
requestBody:
content:
application/json:
schema:
- $ref: '#/components/schemas/OCRRequest'
+ $ref: '#/components/schemas/AgentsCompletionRequest'
required: true
responses:
'200':
description: Successful Response
content:
application/json:
- schema:
- $ref: '#/components/schemas/OCRResponse'
+ schema: {$ref: '#/components/schemas/ChatCompletionResponse'}
'422':
description: Validation Error
content:
@@ -771,23 +770,24 @@ paths:
schema:
$ref: '#/components/schemas/HTTPValidationError'
tags:
- - ocr
- /v1/moderations:
+ - agents
+ /v1/embeddings:
post:
- summary: Moderations
- operationId: moderations_v1_moderations_post
+ summary: Embeddings
+ description: 'Embeddings'
+ operationId: embeddings_v1_embeddings_post
requestBody:
content:
application/json:
schema:
- $ref: '#/components/schemas/ClassificationRequest'
+ $ref: '#/components/schemas/EmbeddingRequest'
required: true
responses:
'200':
description: Successful Response
content:
application/json:
- schema: {$ref: '#/components/schemas/ClassificationResponse'}
+ schema: {$ref: '#/components/schemas/EmbeddingResponse'}
'422':
description: Validation Error
content:
@@ -795,16 +795,16 @@ paths:
schema:
$ref: '#/components/schemas/HTTPValidationError'
tags:
- - classifiers
- /v1/chat/moderations:
+ - embeddings
+ /v1/moderations:
post:
- summary: Moderations Chat
- operationId: moderations_chat_v1_chat_moderations_post
+ summary: Moderations
+ operationId: moderations_v1_moderations_post
requestBody:
content:
application/json:
schema:
- $ref: '#/components/schemas/ChatModerationRequest'
+ $ref: '#/components/schemas/ClassificationRequest'
required: true
responses:
'200':
@@ -820,23 +820,22 @@ paths:
$ref: '#/components/schemas/HTTPValidationError'
tags:
- classifiers
- /v1/embeddings:
+ /v1/chat/moderations:
post:
- summary: Embeddings
- description: 'Embeddings'
- operationId: embeddings_v1_embeddings_post
+ summary: Chat Moderations
+ operationId: chat_moderations_v1_chat_moderations_post
requestBody:
content:
application/json:
schema:
- $ref: '#/components/schemas/EmbeddingRequest'
+ $ref: '#/components/schemas/ChatModerationRequest'
required: true
responses:
'200':
description: Successful Response
content:
application/json:
- schema: {$ref: '#/components/schemas/EmbeddingResponse'}
+ schema: {$ref: '#/components/schemas/ClassificationResponse'}
'422':
description: Validation Error
content:
@@ -844,23 +843,24 @@ paths:
schema:
$ref: '#/components/schemas/HTTPValidationError'
tags:
- - embeddings
- /v1/agents/completions:
+ - classifiers
+ /v1/ocr:
post:
- summary: Agents Completion
- operationId: agents_completion_v1_agents_completions_post
+ summary: OCR
+ operationId: ocr_v1_ocr_post
requestBody:
content:
application/json:
schema:
- $ref: '#/components/schemas/AgentsCompletionRequest'
+ $ref: '#/components/schemas/OCRRequest'
required: true
responses:
'200':
description: Successful Response
content:
application/json:
- schema: {$ref: '#/components/schemas/ChatCompletionResponse'}
+ schema:
+ $ref: '#/components/schemas/OCRResponse'
'422':
description: Validation Error
content:
@@ -868,7 +868,7 @@ paths:
schema:
$ref: '#/components/schemas/HTTPValidationError'
tags:
- - agents
+ - ocr
components:
schemas:
BaseModelCard:
@@ -1110,6 +1110,7 @@ components:
enum:
- fine-tune
- batch
+ - ocr
SampleType:
title: SampleType
type: string
@@ -2601,6 +2602,10 @@ components:
type: content
content: ''
description: Enable users to specify expected results, optimizing response times by leveraging known or predictable content. This approach is especially effective for updating text documents or code files with minimal changes, reducing latency while maintaining high-quality results.
+ parallel_tool_calls:
+ type: boolean
+ title: Parallel Tool Calls
+ default: true
safe_prompt:
type: boolean
description: Whether to inject a safety prompt before all conversations.
@@ -2682,11 +2687,6 @@ components:
title: ClassificationRequest
DocumentURLChunk:
properties:
- type:
- type: string
- const: document_url
- title: Type
- default: document_url
document_url:
type: string
title: Document Url
@@ -2696,6 +2696,12 @@ components:
- type: 'null'
title: Document Name
description: The filename of the document
+ type:
+ type: string
+ enum:
+ - document_url
+ title: Type
+ default: document_url
additionalProperties: false
type: object
required:
@@ -2706,8 +2712,8 @@ components:
model:
title: Model
type: string
- default: mistral-embed
description: ID of the model to use.
+ example: mistral-embed
input:
anyOf:
- type: string
@@ -3391,6 +3397,10 @@ components:
type: content
content: ''
description: Enable users to specify expected results, optimizing response times by leveraging known or predictable content. This approach is especially effective for updating text documents or code files with minimal changes, reducing latency while maintaining high-quality results.
+ parallel_tool_calls:
+ type: boolean
+ title: Parallel Tool Calls
+ default: true
agent_id:
type: string
description: The ID of the agent to use for this completion.
@@ -3414,6 +3424,58 @@ components:
text: '#/components/schemas/TextChunk'
reference: '#/components/schemas/ReferenceChunk'
title: ContentChunk
+ CompletionEvent:
+ title: CompletionEvent
+ type: object
+ required:
+ - data
+ properties:
+ data:
+ $ref: '#/components/schemas/CompletionChunk'
+ CompletionChunk:
+ title: CompletionChunk
+ type: object
+ required:
+ - id
+ - model
+ - choices
+ properties:
+ id:
+ type: string
+ object:
+ type: string
+ created:
+ type: integer
+ model:
+ type: string
+ usage:
+ $ref: '#/components/schemas/UsageInfo'
+ choices:
+ type: array
+ items:
+ $ref: '#/components/schemas/CompletionResponseStreamChoice'
+ CompletionResponseStreamChoice:
+ title: CompletionResponseStreamChoice
+ type: object
+ required:
+ - index
+ - delta
+ - finish_reason
+ properties:
+ index:
+ type: integer
+ delta:
+ $ref: '#/components/schemas/DeltaMessage'
+ finish_reason:
+ type:
+ - string
+ - 'null'
+ enum:
+ - stop
+ - length
+ - error
+ - tool_calls
+ - null
UsageInfo:
title: UsageInfo
type: object
@@ -3595,58 +3657,6 @@ components:
type: object
additionalProperties:
type: number
- CompletionEvent:
- title: CompletionEvent
- type: object
- required:
- - data
- properties:
- data:
- $ref: '#/components/schemas/CompletionChunk'
- CompletionChunk:
- title: CompletionChunk
- type: object
- required:
- - id
- - model
- - choices
- properties:
- id:
- type: string
- object:
- type: string
- created:
- type: integer
- model:
- type: string
- usage:
- $ref: '#/components/schemas/UsageInfo'
- choices:
- type: array
- items:
- $ref: '#/components/schemas/CompletionResponseStreamChoice'
- CompletionResponseStreamChoice:
- title: CompletionResponseStreamChoice
- type: object
- required:
- - index
- - delta
- - finish_reason
- properties:
- index:
- type: integer
- delta:
- $ref: '#/components/schemas/DeltaMessage'
- finish_reason:
- type:
- - string
- - 'null'
- enum:
- - stop
- - length
- - error
- - tool_calls
- - null
securitySchemes:
ApiKey:
type: http