Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(Ollama Chat Model Node): Add aditional Ollama config parameters & fix vision #9215

Merged
merged 3 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { LLMChain } from 'langchain/chains';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { HumanMessage } from '@langchain/core/messages';
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { getTemplateNoticeField } from '../../../utils/sharedFields';
import {
getOptionalOutputParsers,
Expand Down Expand Up @@ -81,7 +82,10 @@ async function getImageMessage(
)) as BaseLanguageModel;
const dataURI = `data:image/jpeg;base64,${bufferData.toString('base64')}`;

const imageUrl = model instanceof ChatGoogleGenerativeAI ? dataURI : { url: dataURI, detail };
const directUriModels = [ChatGoogleGenerativeAI, ChatOllama];
const imageUrl = directUriModels.some((i) => model instanceof i)
? dataURI
: { url: dataURI, detail };

return new HumanMessage({
content: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
type SupplyData,
} from 'n8n-workflow';

import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { logWrapper } from '../../../utils/logWrapper';
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
Expand Down Expand Up @@ -54,12 +55,13 @@ export class LmChatOllama implements INodeType {
const credentials = await this.getCredentials('ollamaApi');

const modelName = this.getNodeParameter('model', itemIndex) as string;
const options = this.getNodeParameter('options', itemIndex, {}) as object;
const options = this.getNodeParameter('options', itemIndex, {}) as ChatOllamaInput;

const model = new ChatOllama({
...options,
baseUrl: credentials.baseUrl as string,
model: modelName,
...options,
format: options.format === 'default' ? undefined : options.format,
});

return {
Expand Down
140 changes: 136 additions & 4 deletions packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,16 @@ export const ollamaOptions: INodeProperties = {
default: 0.7,
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
description:
'Controls randomness: Lowering results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive.',
'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random.',
type: 'number',
},
{
displayName: 'Top K',
name: 'topK',
default: -1,
typeOptions: { maxValue: 1, minValue: -1, numberPrecision: 1 },
typeOptions: { maxValue: 100, minValue: -1, numberPrecision: 1 },
description:
'Used to remove "long tail" low probability responses. Defaults to -1, which disables it.',
'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable.',
type: 'number',
},
{
Expand All @@ -94,8 +94,140 @@ export const ollamaOptions: INodeProperties = {
default: 1,
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
description:
'Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options are considered. We generally recommend altering this or temperature but not both.',
'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions.',
type: 'number',
},
{
displayName: 'Frequency Penalty',
name: 'frequencyPenalty',
type: 'number',
default: 0.0,
typeOptions: { minValue: 0 },
description:
'Adjusts the penalty for tokens that have already appeared in the generated text. Higher values discourage repetition.',
},
{
displayName: 'Keep Alive',
name: 'keepAlive',
type: 'string',
default: '5m',
description:
'Specifies the duration to keep the loaded model in memory after use. Useful for frequently used models. Format: 1h30m (1 hour 30 minutes).',
},
{
displayName: 'Low VRAM Mode',
name: 'lowVram',
type: 'boolean',
default: false,
description:
'Whether to Activate low VRAM mode, which reduces memory usage at the cost of slower generation speed. Useful for GPUs with limited memory.',
},
{
displayName: 'Main GPU ID',
name: 'mainGpu',
type: 'number',
default: 0,
description:
'Specifies the ID of the GPU to use for the main computation. Only change this if you have multiple GPUs.',
},
{
displayName: 'Context Batch Size',
name: 'numBatch',
type: 'number',
default: 512,
description:
'Sets the batch size for prompt processing. Larger batch sizes may improve generation speed but increase memory usage.',
},
{
displayName: 'Context Length',
name: 'numCtx',
type: 'number',
default: 2048,
description:
'The maximum number of tokens to use as context for generating the next token. Smaller values reduce memory usage, while larger values provide more context to the model.',
},
{
displayName: 'Number of GPUs',
name: 'numGpu',
type: 'number',
default: -1,
description:
'Specifies the number of GPUs to use for parallel processing. Set to -1 for auto-detection.',
},
{
displayName: 'Max Tokens to Generate',
name: 'numPredict',
type: 'number',
default: -1,
description:
'The maximum number of tokens to generate. Set to -1 for no limit. Be cautious when setting this to a large value, as it can lead to very long outputs.',
},
{
displayName: 'Number of CPU Threads',
name: 'numThread',
type: 'number',
default: 0,
description:
'Specifies the number of CPU threads to use for processing. Set to 0 for auto-detection.',
},
{
displayName: 'Penalize Newlines',
name: 'penalizeNewline',
type: 'boolean',
default: true,
description:
'Whether the model will be less likely to generate newline characters, encouraging longer continuous sequences of text',
},
{
displayName: 'Presence Penalty',
name: 'presencePenalty',
type: 'number',
default: 0.0,
description:
'Adjusts the penalty for tokens based on their presence in the generated text so far. Positive values penalize tokens that have already appeared, encouraging diversity.',
},
{
displayName: 'Repetition Penalty',
name: 'repeatPenalty',
type: 'number',
default: 1.0,
description:
'Adjusts the penalty factor for repeated tokens. Higher values more strongly discourage repetition. Set to 1.0 to disable repetition penalty.',
},
{
displayName: 'Use Memory Locking',
name: 'useMLock',
type: 'boolean',
default: false,
description:
'Whether to lock the model in memory to prevent swapping. This can improve performance but requires sufficient available memory.',
},
{
displayName: 'Use Memory Mapping',
name: 'useMMap',
type: 'boolean',
default: true,
description:
'Whether to use memory mapping for loading the model. This can reduce memory usage but may impact performance. Recommended to keep enabled.',
},
{
displayName: 'Load Vocabulary Only',
name: 'vocabOnly',
type: 'boolean',
default: false,
description:
'Whether to only load the model vocabulary without the weights. Useful for quickly testing tokenization.',
},
{
displayName: 'Output Format',
name: 'format',
type: 'options',
options: [
{ name: 'Default', value: 'default' },
{ name: 'JSON', value: 'json' },
],
default: 'default',
description: 'Specifies the format of the API response',
},
],
};
Loading