Skip to content

Commit

Permalink
feat(Ollama Chat Model Node): Add aditional Ollama config parameters …
Browse files Browse the repository at this point in the history
…& fix vision (#9215)

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
Co-authored-by: Michael Kret <michael.k@radency.com>
  • Loading branch information
OlegIvaniv and michael-radency committed Apr 29, 2024
1 parent 3fbcbce commit e17e767
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 7 deletions.
Expand Up @@ -22,6 +22,7 @@ import { LLMChain } from 'langchain/chains';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { HumanMessage } from '@langchain/core/messages';
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { getTemplateNoticeField } from '../../../utils/sharedFields';
import {
getOptionalOutputParsers,
Expand Down Expand Up @@ -81,7 +82,10 @@ async function getImageMessage(
)) as BaseLanguageModel;
const dataURI = `data:image/jpeg;base64,${bufferData.toString('base64')}`;

const imageUrl = model instanceof ChatGoogleGenerativeAI ? dataURI : { url: dataURI, detail };
const directUriModels = [ChatGoogleGenerativeAI, ChatOllama];
const imageUrl = directUriModels.some((i) => model instanceof i)
? dataURI
: { url: dataURI, detail };

return new HumanMessage({
content: [
Expand Down
Expand Up @@ -7,6 +7,7 @@ import {
type SupplyData,
} from 'n8n-workflow';

import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { logWrapper } from '../../../utils/logWrapper';
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
Expand Down Expand Up @@ -54,12 +55,13 @@ export class LmChatOllama implements INodeType {
const credentials = await this.getCredentials('ollamaApi');

const modelName = this.getNodeParameter('model', itemIndex) as string;
const options = this.getNodeParameter('options', itemIndex, {}) as object;
const options = this.getNodeParameter('options', itemIndex, {}) as ChatOllamaInput;

const model = new ChatOllama({
...options,
baseUrl: credentials.baseUrl as string,
model: modelName,
...options,
format: options.format === 'default' ? undefined : options.format,
});

return {
Expand Down
140 changes: 136 additions & 4 deletions packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts
Expand Up @@ -76,16 +76,16 @@ export const ollamaOptions: INodeProperties = {
default: 0.7,
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
description:
'Controls randomness: Lowering results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive.',
'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random.',
type: 'number',
},
{
displayName: 'Top K',
name: 'topK',
default: -1,
typeOptions: { maxValue: 1, minValue: -1, numberPrecision: 1 },
typeOptions: { maxValue: 100, minValue: -1, numberPrecision: 1 },
description:
'Used to remove "long tail" low probability responses. Defaults to -1, which disables it.',
'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable.',
type: 'number',
},
{
Expand All @@ -94,8 +94,140 @@ export const ollamaOptions: INodeProperties = {
default: 1,
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
description:
'Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options are considered. We generally recommend altering this or temperature but not both.',
'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions.',
type: 'number',
},
{
displayName: 'Frequency Penalty',
name: 'frequencyPenalty',
type: 'number',
default: 0.0,
typeOptions: { minValue: 0 },
description:
'Adjusts the penalty for tokens that have already appeared in the generated text. Higher values discourage repetition.',
},
{
displayName: 'Keep Alive',
name: 'keepAlive',
type: 'string',
default: '5m',
description:
'Specifies the duration to keep the loaded model in memory after use. Useful for frequently used models. Format: 1h30m (1 hour 30 minutes).',
},
{
displayName: 'Low VRAM Mode',
name: 'lowVram',
type: 'boolean',
default: false,
description:
'Whether to Activate low VRAM mode, which reduces memory usage at the cost of slower generation speed. Useful for GPUs with limited memory.',
},
{
displayName: 'Main GPU ID',
name: 'mainGpu',
type: 'number',
default: 0,
description:
'Specifies the ID of the GPU to use for the main computation. Only change this if you have multiple GPUs.',
},
{
displayName: 'Context Batch Size',
name: 'numBatch',
type: 'number',
default: 512,
description:
'Sets the batch size for prompt processing. Larger batch sizes may improve generation speed but increase memory usage.',
},
{
displayName: 'Context Length',
name: 'numCtx',
type: 'number',
default: 2048,
description:
'The maximum number of tokens to use as context for generating the next token. Smaller values reduce memory usage, while larger values provide more context to the model.',
},
{
displayName: 'Number of GPUs',
name: 'numGpu',
type: 'number',
default: -1,
description:
'Specifies the number of GPUs to use for parallel processing. Set to -1 for auto-detection.',
},
{
displayName: 'Max Tokens to Generate',
name: 'numPredict',
type: 'number',
default: -1,
description:
'The maximum number of tokens to generate. Set to -1 for no limit. Be cautious when setting this to a large value, as it can lead to very long outputs.',
},
{
displayName: 'Number of CPU Threads',
name: 'numThread',
type: 'number',
default: 0,
description:
'Specifies the number of CPU threads to use for processing. Set to 0 for auto-detection.',
},
{
displayName: 'Penalize Newlines',
name: 'penalizeNewline',
type: 'boolean',
default: true,
description:
'Whether the model will be less likely to generate newline characters, encouraging longer continuous sequences of text',
},
{
displayName: 'Presence Penalty',
name: 'presencePenalty',
type: 'number',
default: 0.0,
description:
'Adjusts the penalty for tokens based on their presence in the generated text so far. Positive values penalize tokens that have already appeared, encouraging diversity.',
},
{
displayName: 'Repetition Penalty',
name: 'repeatPenalty',
type: 'number',
default: 1.0,
description:
'Adjusts the penalty factor for repeated tokens. Higher values more strongly discourage repetition. Set to 1.0 to disable repetition penalty.',
},
{
displayName: 'Use Memory Locking',
name: 'useMLock',
type: 'boolean',
default: false,
description:
'Whether to lock the model in memory to prevent swapping. This can improve performance but requires sufficient available memory.',
},
{
displayName: 'Use Memory Mapping',
name: 'useMMap',
type: 'boolean',
default: true,
description:
'Whether to use memory mapping for loading the model. This can reduce memory usage but may impact performance. Recommended to keep enabled.',
},
{
displayName: 'Load Vocabulary Only',
name: 'vocabOnly',
type: 'boolean',
default: false,
description:
'Whether to only load the model vocabulary without the weights. Useful for quickly testing tokenization.',
},
{
displayName: 'Output Format',
name: 'format',
type: 'options',
options: [
{ name: 'Default', value: 'default' },
{ name: 'JSON', value: 'json' },
],
default: 'default',
description: 'Specifies the format of the API response',
},
],
};

0 comments on commit e17e767

Please sign in to comment.