From f5c4103cd1831a7bf5ebc031125a3227f4f48be6 Mon Sep 17 00:00:00 2001 From: DarkSky Date: Tue, 2 Apr 2024 18:31:40 +0800 Subject: [PATCH] feat: add image to text interface --- .../src/plugins/copilot/providers/openai.ts | 5 +++- .../server/src/plugins/copilot/types.ts | 25 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/backend/server/src/plugins/copilot/providers/openai.ts b/packages/backend/server/src/plugins/copilot/providers/openai.ts index 3523732732486..c221336eb7a7b 100644 --- a/packages/backend/server/src/plugins/copilot/providers/openai.ts +++ b/packages/backend/server/src/plugins/copilot/providers/openai.ts @@ -5,6 +5,7 @@ import { ClientOptions, OpenAI } from 'openai'; import { ChatMessageRole, CopilotCapability, + CopilotImageToTextProvider, CopilotProviderType, CopilotTextToEmbeddingProvider, CopilotTextToImageProvider, @@ -19,13 +20,15 @@ export class OpenAIProvider implements CopilotTextToTextProvider, CopilotTextToEmbeddingProvider, - CopilotTextToImageProvider + CopilotTextToImageProvider, + CopilotImageToTextProvider { static readonly type = CopilotProviderType.OpenAI; static readonly capabilities = [ CopilotCapability.TextToText, CopilotCapability.TextToEmbedding, CopilotCapability.TextToImage, + CopilotCapability.ImageToText, ]; readonly availableModels = [ diff --git a/packages/backend/server/src/plugins/copilot/types.ts b/packages/backend/server/src/plugins/copilot/types.ts index dc1829769c4f1..23124855bec6d 100644 --- a/packages/backend/server/src/plugins/copilot/types.ts +++ b/packages/backend/server/src/plugins/copilot/types.ts @@ -130,6 +130,7 @@ export enum CopilotCapability { TextToEmbedding = 'text-to-embedding', TextToImage = 'text-to-image', ImageToImage = 'image-to-image', + ImageToText = 'image-to-text', } export interface CopilotProvider { @@ -190,11 +191,35 @@ export interface CopilotTextToImageProvider extends CopilotProvider { ): AsyncIterable; } +export interface CopilotImageToTextProvider extends CopilotProvider { + generateText( + messages: PromptMessage[], + model: string, + options: { + temperature?: number; + maxTokens?: number; + signal?: AbortSignal; + user?: string; + } + ): Promise; + generateTextStream( + messages: PromptMessage[], + model: string, + options: { + temperature?: number; + maxTokens?: number; + signal?: AbortSignal; + user?: string; + } + ): AsyncIterable; +} + export interface CopilotImageToImageProvider extends CopilotProvider {} export type CapabilityToCopilotProvider = { [CopilotCapability.TextToText]: CopilotTextToTextProvider; [CopilotCapability.TextToEmbedding]: CopilotTextToEmbeddingProvider; [CopilotCapability.TextToImage]: CopilotTextToImageProvider; + [CopilotCapability.ImageToText]: CopilotImageToTextProvider; [CopilotCapability.ImageToImage]: CopilotImageToImageProvider; };