From 6d8c5e5c2d911872fbf28aff098865aef17e224a Mon Sep 17 00:00:00 2001 From: Oleg Ivaniv Date: Thu, 25 Apr 2024 10:51:47 +0200 Subject: [PATCH 1/2] feat(Ollama Chat Model Node): Add aditional Ollama config parameters & fix vision Signed-off-by: Oleg Ivaniv --- .../nodes/chains/ChainLLM/ChainLlm.node.ts | 6 +- .../llms/LMChatOllama/LmChatOllama.node.ts | 6 +- .../nodes/llms/LMOllama/description.ts | 145 +++++++++++++++++- 3 files changed, 147 insertions(+), 10 deletions(-) diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts index 578dc791c1725..b177755591215 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts @@ -22,6 +22,7 @@ import { LLMChain } from 'langchain/chains'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { HumanMessage } from '@langchain/core/messages'; import { ChatGoogleGenerativeAI } from '@langchain/google-genai'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; import { getTemplateNoticeField } from '../../../utils/sharedFields'; import { getOptionalOutputParsers, @@ -81,7 +82,10 @@ async function getImageMessage( )) as BaseLanguageModel; const dataURI = `data:image/jpeg;base64,${bufferData.toString('base64')}`; - const imageUrl = model instanceof ChatGoogleGenerativeAI ? dataURI : { url: dataURI, detail }; + const directUriModels = [ChatGoogleGenerativeAI, ChatOllama]; + const imageUrl = directUriModels.some((i) => model instanceof i) + ? dataURI + : { url: dataURI, detail }; return new HumanMessage({ content: [ diff --git a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts index b5314b067be3f..c5cd5eabfb801 100644 --- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts +++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts @@ -7,6 +7,7 @@ import { type SupplyData, } from 'n8n-workflow'; +import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama'; import { ChatOllama } from '@langchain/community/chat_models/ollama'; import { logWrapper } from '../../../utils/logWrapper'; import { getConnectionHintNoticeField } from '../../../utils/sharedFields'; @@ -54,12 +55,13 @@ export class LmChatOllama implements INodeType { const credentials = await this.getCredentials('ollamaApi'); const modelName = this.getNodeParameter('model', itemIndex) as string; - const options = this.getNodeParameter('options', itemIndex, {}) as object; + const options = this.getNodeParameter('options', itemIndex, {}) as ChatOllamaInput; const model = new ChatOllama({ + ...options, baseUrl: credentials.baseUrl as string, model: modelName, - ...options, + format: options.format === 'default' ? undefined : options.format, }); return { diff --git a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts index c9493fd573e14..860ef002db101 100644 --- a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts +++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts @@ -76,26 +76,157 @@ export const ollamaOptions: INodeProperties = { default: 0.7, typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 }, description: - 'Controls randomness: Lowering results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive.', + 'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random. Recommended range is 0.7 to 1.0.', type: 'number', }, { displayName: 'Top K', name: 'topK', - default: -1, - typeOptions: { maxValue: 1, minValue: -1, numberPrecision: 1 }, + default: 40, description: - 'Used to remove "long tail" low probability responses. Defaults to -1, which disables it.', + 'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable. Recommended value is 40.', type: 'number', }, { displayName: 'Top P', name: 'topP', - default: 1, - typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 }, + default: 0.95, + typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 2 }, + description: + 'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions. Recommended value is 0.95.', + type: 'number', + }, + { + displayName: 'Frequency Penalty', + name: 'frequencyPenalty', + type: 'number', + default: 0.0, + typeOptions: { minValue: 0 }, + description: + 'Adjusts the penalty for tokens that have already appeared in the generated text. Higher values discourage repetition.', + }, + { + displayName: 'Keep Alive', + name: 'keepAlive', + type: 'string', + default: '5m', + description: + 'Specifies the duration to keep the loaded model in memory after use. Useful for frequently used models. Format: 1h30m (1 hour 30 minutes).', + }, + { + displayName: 'Low VRAM Mode', + name: 'lowVram', + type: 'boolean', + default: false, + description: + 'Whether to Activate low VRAM mode, which reduces memory usage at the cost of slower generation speed. Useful for GPUs with limited memory.', + }, + { + displayName: 'Main GPU ID', + name: 'mainGpu', + type: 'number', + default: 0, + description: + 'Specifies the ID of the GPU to use for the main computation. Only change this if you have multiple GPUs.', + }, + { + displayName: 'Context Batch Size', + name: 'numBatch', + type: 'number', + default: 512, + description: + 'Sets the batch size for prompt processing. Larger batch sizes may improve generation speed but increase memory usage.', + }, + { + displayName: 'Context Length', + name: 'numCtx', + type: 'number', + default: 2048, + description: + 'The maximum number of tokens to use as context for generating the next token. Smaller values reduce memory usage, while larger values provide more context to the model.', + }, + { + displayName: 'Number of GPUs', + name: 'numGpu', + type: 'number', + default: -1, description: - 'Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options are considered. We generally recommend altering this or temperature but not both.', + 'Specifies the number of GPUs to use for parallel processing. Set to -1 for auto-detection.', + }, + { + displayName: 'Max Tokens to Generate', + name: 'numPredict', + type: 'number', + default: -1, + description: + 'The maximum number of tokens to generate. Set to -1 for no limit. Be cautious when setting this to a large value, as it can lead to very long outputs.', + }, + { + displayName: 'Number of CPU Threads', + name: 'numThread', + type: 'number', + default: 0, + description: + 'Specifies the number of CPU threads to use for processing. Set to 0 for auto-detection.', + }, + { + displayName: 'Penalize Newlines', + name: 'penalizeNewline', + type: 'boolean', + default: true, + description: + 'Whether the model will be less likely to generate newline characters, encouraging longer continuous sequences of text', + }, + { + displayName: 'Presence Penalty', + name: 'presencePenalty', + type: 'number', + default: 0.0, + description: + 'Adjusts the penalty for tokens based on their presence in the generated text so far. Positive values penalize tokens that have already appeared, encouraging diversity.', + }, + { + displayName: 'Repetition Penalty', + name: 'repeatPenalty', type: 'number', + default: 1.0, + description: + 'Adjusts the penalty factor for repeated tokens. Higher values more strongly discourage repetition. Set to 1.0 to disable repetition penalty.', + }, + { + displayName: 'Use Memory Locking', + name: 'useMLock', + type: 'boolean', + default: false, + description: + 'Whether to lock the model in memory to prevent swapping. This can improve performance but requires sufficient available memory.', + }, + { + displayName: 'Use Memory Mapping', + name: 'useMMap', + type: 'boolean', + default: true, + description: + 'Whether to use memory mapping for loading the model. This can reduce memory usage but may impact performance. Recommended to keep enabled.', + }, + { + displayName: 'Load Vocabulary Only', + name: 'vocabOnly', + type: 'boolean', + default: false, + description: + 'Whether to only load the model vocabulary without the weights. Useful for quickly testing tokenization.', + }, + { + displayName: 'Output Format', + name: 'format', + type: 'options', + options: [ + { name: 'Default', value: 'default' }, + { name: 'JSON', value: 'json' }, + ], + default: 'default', + description: 'Specifies the format of the API response', }, ], }; From 3cf0cc7287ea38474083859efbb14c126d017c07 Mon Sep 17 00:00:00 2001 From: Oleg Ivaniv Date: Thu, 25 Apr 2024 16:00:49 +0200 Subject: [PATCH 2/2] Improve descriptions Signed-off-by: Oleg Ivaniv --- .../nodes/llms/LMOllama/description.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts index 860ef002db101..382de60fddedb 100644 --- a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts +++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts @@ -76,24 +76,25 @@ export const ollamaOptions: INodeProperties = { default: 0.7, typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 }, description: - 'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random. Recommended range is 0.7 to 1.0.', + 'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random.', type: 'number', }, { displayName: 'Top K', name: 'topK', - default: 40, + default: -1, + typeOptions: { maxValue: 100, minValue: -1, numberPrecision: 1 }, description: - 'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable. Recommended value is 40.', + 'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable.', type: 'number', }, { displayName: 'Top P', name: 'topP', - default: 0.95, - typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 2 }, + default: 1, + typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 }, description: - 'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions. Recommended value is 0.95.', + 'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions.', type: 'number', }, {