diff --git a/docs/capabilities/function-calling.mdx b/docs/capabilities/function-calling.mdx index 9c7565ab..dbaad3b3 100644 --- a/docs/capabilities/function-calling.mdx +++ b/docs/capabilities/function-calling.mdx @@ -4,6 +4,9 @@ title: Function calling sidebar_position: 2.5 --- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + Open In Colab @@ -38,6 +41,9 @@ In this guide, we will walk through a simple example to demonstrate how function Before we get started, let’s assume we have a dataframe consisting of payment transactions. When users ask questions about this dataframe, they can use certain tools to answer questions about this data. This is just an example to emulate an external database that the LLM cannot directly access. + + + ```python import pandas as pd @@ -54,6 +60,32 @@ data = { df = pd.DataFrame(data) ``` + + + +```typescript +// Assuming we have the following data +const data = { + transactionId: ['T1001', 'T1002', 'T1003', 'T1004', 'T1005'], + customerId: ['C001', 'C002', 'C003', 'C002', 'C001'], + paymentAmount: [125.50, 89.99, 120.00, 54.30, 210.20], + paymentDate: ['2021-10-05', '2021-10-06', '2021-10-07', '2021-10-05', '2021-10-08'], + paymentStatus: ['Paid', 'Unpaid', 'Paid', 'Paid', 'Pending'] +}; + +// Convert data into an array of objects for easier manipulation +const transactions = data.transactionId.map((id, index) => ({ + transactionId: id, + customerId: data.customerId[index], + paymentAmount: data.paymentAmount[index], + paymentDate: data.paymentDate[index], + paymentStatus: data.paymentStatus[index] +})); +``` + + + + ## Step 1. User: specify tools and query drawing @@ -63,6 +95,9 @@ Users can define all the necessary tools for their use cases. - In many cases, we might have multiple tools at our disposal. For example, let’s consider we have two functions as our two tools: `retrieve_payment_status` and `retrieve_payment_date` to retrieve payment status and payment date given transaction ID. + + + ```python def retrieve_payment_status(df: data, transaction_id: str) -> str: if transaction_id in df.transaction_id.values: @@ -73,11 +108,36 @@ def retrieve_payment_date(df: data, transaction_id: str) -> str: if transaction_id in df.transaction_id.values: return json.dumps({'date': df[df.transaction_id == transaction_id].payment_date.item()}) return json.dumps({'error': 'transaction id not found.'}) +``` + + + +```typescript +function retrievePaymentStatus(transactions, transactionId) { + const transaction = transactions.find(t => t.transactionId === transactionId); + if (transaction) { + return JSON.stringify({ status: transaction.paymentStatus }); + } + return JSON.stringify({ error: 'transaction id not found.' }); +} + +function retrievePaymentDate(transactions, transactionId) { + const transaction = transactions.find(t => t.transactionId === transactionId); + if (transaction) { + return JSON.stringify({ date: transaction.paymentDate }); + } + return JSON.stringify({ error: 'transaction id not found.' }); +} ``` + + - In order for Mistral models to understand the functions, we need to outline the function specifications with a JSON schema. Specifically, we need to describe the type, function name, function description, function parameters, and the required parameter for the function. Since we have two functions here, let’s list two function specifications in a list. + + + ```python tools = [ { @@ -117,8 +177,56 @@ tools = [ ] ``` + + + +```typescript +const tools = [ + { + type: "function", + function: { + name: "retrievePaymentStatus", + description: "Get payment status of a transaction", + parameters: { + type: "object", + properties: { + transactionId: { + type: "string", + description: "The transaction id.", + } + }, + required: ["transactionId"], + }, + }, + }, + { + type: "function", + function: { + name: "retrievePaymentDate", + description: "Get payment date of a transaction", + parameters: { + type: "object", + properties: { + transactionId: { + type: "string", + description: "The transaction id.", + } + }, + required: ["transactionId"], + }, + }, + } +]; +``` + + + + - Then we organize the two functions into a dictionary where keys represent the function name, and values are the function with the `df` defined. This allows us to call each function based on its function name. + + + ```python import functools @@ -128,13 +236,39 @@ names_to_functions = { } ``` + + + +```typescript +const namesToFunctions = { + 'retrievePaymentStatus': (transactionId) => retrievePaymentStatus(transactions, transactionId), + 'retrievePaymentDate': (transactionId) => retrievePaymentDate(transactions, transactionId) +}; +``` + + + + ### User query Suppose a user asks the following question: “What’s the status of my transaction?” A standalone LLM would not be able to answer this question, as it needs to query the business logic backend to access the necessary data. But what if we have an exact tool we can use to answer this question? We could potentially provide an answer! + + + ```python messages = [{"role": "user", "content": "What's the status of my transaction T1001?"}] ``` + + + +```typescript +const messages = [{"role": "user", "content": "What's the status of my transaction T1001?"}]; +``` + + + + ## Step 2. Model: Generate function arguments drawing @@ -148,6 +282,13 @@ Users can use `tool_choice` to specify how tools are used: - "any": forces tool use. - "none": prevents tool use. +### parallel_tool_calls +Users can use `parallel_tool_calls` to specify whether parallel tool calling is allowed. +- true: default mode. The model decides if it uses parallel tool calls or not. +- false: forces the model to use single tool calling. + + + ```python import os @@ -162,6 +303,7 @@ response = client.chat.complete( messages = messages, tools = tools, tool_choice = "any", + parallel_tool_calls = False, ) response ``` @@ -172,12 +314,51 @@ Output: ``` ChatCompletionResponse(id='7cbd8962041442459eb3636e1e3cbf10', object='chat.completion', model='mistral-large-latest', usage=Usage(prompt_tokens=94, completion_tokens=30, total_tokens=124), created=1721403550, choices=[Choices(index=0, finish_reason='tool_calls', message=AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='retrieve_payment_status', arguments='{"transaction_id": "T1001"}'), id='D681PevKs', type='function')], prefix=False, role='assistant'))]) ``` + + + + +```typescript +import { Mistral } from '@mistralai/mistralai'; + +const apiKey = process.env.MISTRAL_API_KEY; +const model = "mistral-large-latest"; + +const client = new Mistral({ apiKey: apiKey }); + +let response = await client.chat.complete({ + model: model, + messages: messages, + tools: tools, + toolChoice: "any", + parallelToolCalls: false, +}); +``` + +We get the response including toolCalls with the chosen function name `retrievePaymentStatus` and the arguments for this function. + + + + Let’s add the response message to the `messages` list. + + + ```python messages.append(response.choices[0].message) ``` + + + +```typescript +messages.push(response.choices[0].message); +``` + + + + ## Step 3. User: Execute function to obtain tool results drawing @@ -186,6 +367,10 @@ How do we execute the function? Currently, it is the user’s responsibility to Let’s extract some useful function information from model response including `function_name` and `function_params`. It’s clear here that our Mistral model has chosen to use the function `retrieve_payment_status` with the parameter `transaction_id` set to T1001. + + + + ```python import json @@ -199,8 +384,30 @@ Output function_name: retrieve_payment_status function_params: {'transaction_id': 'T1001'} ``` + + + + +```typescript +const toolCall = response.choices[0].message.toolCalls[0]; +const functionName = toolCall.function.name; +const functionParams = JSON.parse(toolCall.function.arguments); +console.log("\nfunction_name: ", functionName, "\nfunction_params: ", functionParams); +``` +Output +``` +function_name: retrievePaymentStatus +function_params: { transactionId: 'T1001' } +``` + + + + Now we can execute the function and we get the function output `'{"status": "Paid"}'`. + + + ```python function_result = names_to_functions[function_name](**function_params) function_result @@ -210,14 +417,37 @@ Output '{"status": "Paid"}' ``` + + + +```typescript +const functionResult = namesToFunctions[functionName](functionParams.transactionId); +console.log(functionResult); +``` +Output +``` +{"status":"Paid"} +``` + + + + ## Step 4. Model: Generate final answer drawing We can now provide the output from the tools to Mistral models, and in return, the Mistral model can produce a customised final response for the specific user. + + + ```python -messages.append({"role":"tool", "name":function_name, "content":function_result, "tool_call_id":tool_call.id}) +messages.append({ + "role":"tool", + "name":function_name, + "content":function_result, + "tool_call_id":tool_call.id +}) response = client.chat.complete( model = model, @@ -230,3 +460,28 @@ Output: ``` The status of your transaction with ID T1001 is "Paid". Is there anything else I can assist you with? ``` + + + + +```typescript +messages.push({ + role: "tool", + name: functionName, + content: functionResult, + toolCallId: toolCall.id +}); + +response = await client.chat.complete({ + model: model, + messages: messages +}); +console.log(response.choices[0].message.content); +``` + +Output: +``` +The status of your transaction with ID T1001 is "Paid". Is there anything else I can assist you with? +``` + + \ No newline at end of file diff --git a/docs/capabilities/structured-output/custom.mdx b/docs/capabilities/structured-output/custom.mdx index 5bbc18ee..c6796c20 100644 --- a/docs/capabilities/structured-output/custom.mdx +++ b/docs/capabilities/structured-output/custom.mdx @@ -196,6 +196,15 @@ curl --location "https://api.mistral.ai/v1/chat/completions" \ +:::note +To better guide the model, the following is being always prepended by default to the System Prompt when using this method: +``` +Your output should be an instance of a JSON object following this schema: {{ json_schema }} +``` + +However, it is recommended to add more explanations and iterate on your system prompt to better clarify the expected schema and behavior. +::: + ### FAQ **Q: Which models support custom Structured Outputs?** **A:** All currently available models except for `codestral-mamba` are supported. diff --git a/docs/deployment/laplateforme/tier.mdx b/docs/deployment/laplateforme/tier.mdx index dae65786..26630af9 100644 --- a/docs/deployment/laplateforme/tier.mdx +++ b/docs/deployment/laplateforme/tier.mdx @@ -5,7 +5,7 @@ sidebar_position: 3.12 --- :::note[ ] -Please visit https://console.mistral.ai/limits/ for detailed information on the current rate limit and usage tiers for your workspace. +Please visit https://admin.mistral.ai/plateforme/limits for detailed information on the current rate limit and usage tiers for your workspace. ::: ## How does rate limits rate work? @@ -25,7 +25,7 @@ Key points to note: ## Usage tiers -You can view the rate and usage limits for your workspace under the [limits](https://console.mistral.ai/limits/) section on la Plateforme. +You can view the rate and usage limits for your workspace under the [limits](https://admin.mistral.ai/plateforme/limits) section on la Plateforme. We offer various tiers on the platform, including a **free API tier** with restrictive rate limits. The free API tier is designed to allow you to try and explore our API. For actual projects and production use, we recommend upgrading to a higher tier. diff --git a/docs/guides/finetuning_sections/_04_faq.md b/docs/guides/finetuning_sections/_04_faq.md index badeab29..abb09787 100644 --- a/docs/guides/finetuning_sections/_04_faq.md +++ b/docs/guides/finetuning_sections/_04_faq.md @@ -29,6 +29,13 @@ The size limit for the validation data is 1MB. As a rule of thumb: `validation_set_max_size = min(1MB, 5% of training data)` +### What happens if I try to create a job that already exists? + +At job creation, you will receive a `409 Conflict` error in case a similar job is already running / validated / queued. This mechanism helps avoid inadvertently creating duplicate jobs, saving resources and preventing redundancy. + +### What if I upload an already existing file? + +If a file is uploaded and matches an existing file in both content and name, the pre-existing file is returned instead of creating a new one. ### How many epochs are in the training process? diff --git a/openapi.yaml b/openapi.yaml index c05cfad8..6867baea 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -747,23 +747,22 @@ paths: description: FIM completion. tags: - fim - /v1/ocr: + /v1/agents/completions: post: - summary: OCR - operationId: ocr_v1_ocr_post + summary: Agents Completion + operationId: agents_completion_v1_agents_completions_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/OCRRequest' + $ref: '#/components/schemas/AgentsCompletionRequest' required: true responses: '200': description: Successful Response content: application/json: - schema: - $ref: '#/components/schemas/OCRResponse' + schema: {$ref: '#/components/schemas/ChatCompletionResponse'} '422': description: Validation Error content: @@ -771,23 +770,24 @@ paths: schema: $ref: '#/components/schemas/HTTPValidationError' tags: - - ocr - /v1/moderations: + - agents + /v1/embeddings: post: - summary: Moderations - operationId: moderations_v1_moderations_post + summary: Embeddings + description: 'Embeddings' + operationId: embeddings_v1_embeddings_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/ClassificationRequest' + $ref: '#/components/schemas/EmbeddingRequest' required: true responses: '200': description: Successful Response content: application/json: - schema: {$ref: '#/components/schemas/ClassificationResponse'} + schema: {$ref: '#/components/schemas/EmbeddingResponse'} '422': description: Validation Error content: @@ -795,16 +795,16 @@ paths: schema: $ref: '#/components/schemas/HTTPValidationError' tags: - - classifiers - /v1/chat/moderations: + - embeddings + /v1/moderations: post: - summary: Moderations Chat - operationId: moderations_chat_v1_chat_moderations_post + summary: Moderations + operationId: moderations_v1_moderations_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/ChatModerationRequest' + $ref: '#/components/schemas/ClassificationRequest' required: true responses: '200': @@ -820,23 +820,22 @@ paths: $ref: '#/components/schemas/HTTPValidationError' tags: - classifiers - /v1/embeddings: + /v1/chat/moderations: post: - summary: Embeddings - description: 'Embeddings' - operationId: embeddings_v1_embeddings_post + summary: Chat Moderations + operationId: chat_moderations_v1_chat_moderations_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/EmbeddingRequest' + $ref: '#/components/schemas/ChatModerationRequest' required: true responses: '200': description: Successful Response content: application/json: - schema: {$ref: '#/components/schemas/EmbeddingResponse'} + schema: {$ref: '#/components/schemas/ClassificationResponse'} '422': description: Validation Error content: @@ -844,23 +843,24 @@ paths: schema: $ref: '#/components/schemas/HTTPValidationError' tags: - - embeddings - /v1/agents/completions: + - classifiers + /v1/ocr: post: - summary: Agents Completion - operationId: agents_completion_v1_agents_completions_post + summary: OCR + operationId: ocr_v1_ocr_post requestBody: content: application/json: schema: - $ref: '#/components/schemas/AgentsCompletionRequest' + $ref: '#/components/schemas/OCRRequest' required: true responses: '200': description: Successful Response content: application/json: - schema: {$ref: '#/components/schemas/ChatCompletionResponse'} + schema: + $ref: '#/components/schemas/OCRResponse' '422': description: Validation Error content: @@ -868,7 +868,7 @@ paths: schema: $ref: '#/components/schemas/HTTPValidationError' tags: - - agents + - ocr components: schemas: BaseModelCard: @@ -1110,6 +1110,7 @@ components: enum: - fine-tune - batch + - ocr SampleType: title: SampleType type: string @@ -2601,6 +2602,10 @@ components: type: content content: '' description: Enable users to specify expected results, optimizing response times by leveraging known or predictable content. This approach is especially effective for updating text documents or code files with minimal changes, reducing latency while maintaining high-quality results. + parallel_tool_calls: + type: boolean + title: Parallel Tool Calls + default: true safe_prompt: type: boolean description: Whether to inject a safety prompt before all conversations. @@ -2682,11 +2687,6 @@ components: title: ClassificationRequest DocumentURLChunk: properties: - type: - type: string - const: document_url - title: Type - default: document_url document_url: type: string title: Document Url @@ -2696,6 +2696,12 @@ components: - type: 'null' title: Document Name description: The filename of the document + type: + type: string + enum: + - document_url + title: Type + default: document_url additionalProperties: false type: object required: @@ -2706,8 +2712,8 @@ components: model: title: Model type: string - default: mistral-embed description: ID of the model to use. + example: mistral-embed input: anyOf: - type: string @@ -3391,6 +3397,10 @@ components: type: content content: '' description: Enable users to specify expected results, optimizing response times by leveraging known or predictable content. This approach is especially effective for updating text documents or code files with minimal changes, reducing latency while maintaining high-quality results. + parallel_tool_calls: + type: boolean + title: Parallel Tool Calls + default: true agent_id: type: string description: The ID of the agent to use for this completion. @@ -3414,6 +3424,58 @@ components: text: '#/components/schemas/TextChunk' reference: '#/components/schemas/ReferenceChunk' title: ContentChunk + CompletionEvent: + title: CompletionEvent + type: object + required: + - data + properties: + data: + $ref: '#/components/schemas/CompletionChunk' + CompletionChunk: + title: CompletionChunk + type: object + required: + - id + - model + - choices + properties: + id: + type: string + object: + type: string + created: + type: integer + model: + type: string + usage: + $ref: '#/components/schemas/UsageInfo' + choices: + type: array + items: + $ref: '#/components/schemas/CompletionResponseStreamChoice' + CompletionResponseStreamChoice: + title: CompletionResponseStreamChoice + type: object + required: + - index + - delta + - finish_reason + properties: + index: + type: integer + delta: + $ref: '#/components/schemas/DeltaMessage' + finish_reason: + type: + - string + - 'null' + enum: + - stop + - length + - error + - tool_calls + - null UsageInfo: title: UsageInfo type: object @@ -3595,58 +3657,6 @@ components: type: object additionalProperties: type: number - CompletionEvent: - title: CompletionEvent - type: object - required: - - data - properties: - data: - $ref: '#/components/schemas/CompletionChunk' - CompletionChunk: - title: CompletionChunk - type: object - required: - - id - - model - - choices - properties: - id: - type: string - object: - type: string - created: - type: integer - model: - type: string - usage: - $ref: '#/components/schemas/UsageInfo' - choices: - type: array - items: - $ref: '#/components/schemas/CompletionResponseStreamChoice' - CompletionResponseStreamChoice: - title: CompletionResponseStreamChoice - type: object - required: - - index - - delta - - finish_reason - properties: - index: - type: integer - delta: - $ref: '#/components/schemas/DeltaMessage' - finish_reason: - type: - - string - - 'null' - enum: - - stop - - length - - error - - tool_calls - - null securitySchemes: ApiKey: type: http