diff --git a/plugins/discourse-ai/app/models/ai_tool.rb b/plugins/discourse-ai/app/models/ai_tool.rb index 1ce463d37afd0..322148dbb1fff 100644 --- a/plugins/discourse-ai/app/models/ai_tool.rb +++ b/plugins/discourse-ai/app/models/ai_tool.rb @@ -240,9 +240,9 @@ def self.image_generation_presets }, { preset_id: "image_generation_flux", - name: "FLUX", + name: "FLUX 1.1 Pro", provider: "Together.ai", - model_name: "FLUX 1.1", + model_name: "FLUX 1.1 Pro", tool_name: "image_generation", description: "Generate images using the FLUX 1.1 Pro model from Black Forest Labs via Together.ai", @@ -260,10 +260,36 @@ def self.image_generation_presets description: "Optional seed for random number generation", }, ], - script: "#{preamble}\n#{load_script("presets/image_generation/flux.js")}", + script: "#{preamble}\n#{load_script("presets/image_generation/flux_together.js")}", summary: "Generate images with FLUX 1.1 Pro", category: "image_generation", }, + { + preset_id: "image_generation_flux2", + name: "FLUX 2 Pro", + provider: "Black Forest Labs", + model_name: "FLUX 2 Pro", + tool_name: "image_generation_flux2", + description: + "Generate and edit images using FLUX 2 Pro directly via Black Forest Labs API. Supports multi-image editing.", + parameters: [ + { + name: "prompt", + type: "string", + required: true, + description: "The text prompt for image generation or editing", + }, + { + name: "seed", + type: "number", + required: false, + description: "Optional seed for reproducible results", + }, + ], + script: "#{preamble}\n#{load_script("presets/image_generation/flux_2_bfl.js")}", + summary: "Generate and edit images with FLUX 2 Pro", + category: "image_generation", + }, ] end end diff --git a/plugins/discourse-ai/config/locales/server.en.yml b/plugins/discourse-ai/config/locales/server.en.yml index 59eac872fb5fe..52ccc47a067be 100644 --- a/plugins/discourse-ai/config/locales/server.en.yml +++ b/plugins/discourse-ai/config/locales/server.en.yml @@ -282,7 +282,9 @@ en: image_generation_gemini: name: "Nano Banana" image_generation_flux: - name: "FLUX" + name: "FLUX 1.1 Pro" + image_generation_flux2: + name: "FLUX 2 Pro" empty_tool: name: "Start from blank..." name: diff --git a/plugins/discourse-ai/config/settings.yml b/plugins/discourse-ai/config/settings.yml index 02e49b1bb9d4c..116ad8a6d21c0 100644 --- a/plugins/discourse-ai/config/settings.yml +++ b/plugins/discourse-ai/config/settings.yml @@ -34,31 +34,12 @@ discourse_ai: default: 60 hidden: true - ai_openai_image_generation_url: "https://api.openai.com/v1/images/generations" - ai_openai_image_edit_url: "https://api.openai.com/v1/images/edits" ai_openai_embeddings_url: hidden: true default: "https://api.openai.com/v1/embeddings" ai_openai_organization: default: "" hidden: true - ai_openai_api_key: - default: "" - secret: true - ai_stability_api_key: - default: "" - secret: true - ai_stability_api_url: - default: "https://api.stability.ai" - ai_stability_engine: - default: "stable-diffusion-xl-1024-v1-0" - type: enum - choices: - - "sd3" - - "sd3-turbo" - - "stable-diffusion-xl-1024-v1-0" - - "stable-diffusion-768-v2-1" - - "stable-diffusion-v1-5" ai_hugging_face_tei_endpoint: hidden: true default: "" @@ -96,6 +77,30 @@ discourse_ai: default: false hidden: true + # Deprecated image generation settings - TODO(keegan): Remove 2026-02-01 + ai_openai_image_generation_url: + default: "" + hidden: true + ai_openai_image_edit_url: + default: "" + hidden: true + ai_stability_api_key: + default: "" + secret: true + hidden: true + ai_stability_api_url: + default: "" + hidden: true + ai_stability_engine: + default: "" + hidden: true + ai_helper_illustrate_post_model: + default: "" + type: enum + allow_any: false + enum: "DiscourseAi::Configuration::LlmEnumerator" + hidden: true + ai_helper_enabled: default: false client: true @@ -138,15 +143,6 @@ discourse_ai: ai_helper_automatic_chat_thread_title_delay: default: 5 area: "ai-features/ai_helper" - ai_helper_illustrate_post_model: # Deprecated. TODO(keegan): Remove 2025-12-01 - default: disabled - type: enum - choices: - - stable_diffusion_xl - - dall_e_3 - - disabled - area: "ai-features/ai_helper" - hidden: true ai_helper_enabled_features: client: true default: "suggestions|context_menu" diff --git a/plugins/discourse-ai/lib/ai_tool_scripts/preamble.js b/plugins/discourse-ai/lib/ai_tool_scripts/preamble.js index 7ab296aaf9bb4..f6f11ff9d5d9d 100644 --- a/plugins/discourse-ai/lib/ai_tool_scripts/preamble.js +++ b/plugins/discourse-ai/lib/ai_tool_scripts/preamble.js @@ -82,6 +82,21 @@ * Returns: { id: number, url: string, short_url: string } - Details of the created upload record. * * upload.getUrl(shortUrl): Given a short URL, eg upload://12345, returns the full CDN friendly URL of the upload. + * + * upload.getBase64(uploadIdOrShortUrl, maxPixels): Fetches the base64-encoded content of an existing upload. + * Parameters: + * uploadIdOrShortUrl (number | string): Either an upload ID (number) or short URL (string, eg "upload://abc123"). + * maxPixels (number, optional): Maximum pixel count for automatic resizing (default: 10,000,000). + * Returns: string (base64-encoded image data) or null if upload not found. + * Use case: Image editing tools can fetch existing uploads to send to external APIs. + * + * Note for Image Editing: + * To implement image editing in a tool: + * 1. Accept an `image_urls` parameter (array of short URLs like ["upload://abc123"]). + * 2. Use upload.getBase64() to fetch the base64 data for each image. + * 3. Send the base64 data to your image editing API (e.g., OpenAI's /v1/images/edits endpoint). + * 4. Create a new upload with the edited image using upload.create(). + * 5. Use chain.setCustomRaw() to display the edited image. * 5. chain * Controls the execution flow. * diff --git a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux.js b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux.js deleted file mode 100644 index 5213a7e336d29..0000000000000 --- a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux.js +++ /dev/null @@ -1,41 +0,0 @@ -/* eslint-disable no-undef, no-unused-vars */ -const apiKey = "YOUR_KEY"; -const model = "black-forest-labs/FLUX.1.1-pro"; - -function invoke(params) { - let seed = parseInt(params.seed, 10); - if (!(seed > 0)) { - seed = Math.floor(Math.random() * 1000000) + 1; - } - - const prompt = params.prompt; - const body = { - model, - prompt, - width: 1024, - height: 768, - steps: 10, - n: 1, - seed, - response_format: "b64_json", - }; - - const result = http.post("https://api.together.xyz/v1/images/generations", { - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - }); - - const base64Image = JSON.parse(result.body).data[0].b64_json; - const image = upload.create("generated_image.png", base64Image); - const raw = `\n![${prompt}](${image.short_url})\n`; - chain.setCustomRaw(raw); - - return { result: "Image generated successfully", seed }; -} - -function details() { - return "Generates images based on a text prompt using the FLUX model."; -} diff --git a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux_2_bfl.js b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux_2_bfl.js new file mode 100644 index 0000000000000..0f3912958bf70 --- /dev/null +++ b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux_2_bfl.js @@ -0,0 +1,130 @@ +/* eslint-disable no-undef, no-unused-vars */ +const apiKey = "YOUR_BFL_API_KEY"; + +function invoke(params) { + const prompt = params.prompt; + const imageUrls = params.image_urls || []; + + let seed = parseInt(params.seed, 10); + if (!(seed > 0)) { + seed = Math.floor(Math.random() * 2147483647) + 1; + } + + if (imageUrls.length > 0) { + return performEdit(prompt, imageUrls, seed); + } else { + return performGeneration(prompt, seed); + } +} + +function performGeneration(prompt, seed) { + const body = { + prompt, + seed, + width: 1024, + height: 1024, + output_format: "png", + safety_tolerance: 2, + }; + + return submitAndPoll(body, prompt, seed); +} + +function performEdit(prompt, imageUrls, seed) { + const body = { + prompt, + seed, + output_format: "png", + safety_tolerance: 2, + }; + + // Add up to 10 reference images as base64 + const maxImages = Math.min(imageUrls.length, 10); + for (let i = 0; i < maxImages; i++) { + const base64Data = upload.getBase64(imageUrls[i]); + if (!base64Data) { + return { error: `Failed to get base64 data for: ${imageUrls[i]}` }; + } + const paramName = i === 0 ? "input_image" : `input_image_${i + 1}`; + body[paramName] = base64Data; + } + + return submitAndPoll(body, prompt, seed); +} + +function submitAndPoll(body, prompt, seed) { + // Submit request + const submitResult = http.post("https://api.bfl.ai/v1/flux-2-pro", { + headers: { + "x-key": apiKey, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }); + + const submitData = JSON.parse(submitResult.body); + + if (submitData.error) { + return { error: `BFL API Error: ${submitData.error}` }; + } + + if (!submitData.id) { + return { + error: "No task ID returned", + body_preview: JSON.stringify(submitData).substring(0, 500), + }; + } + + // Poll for result (max 25 attempts × 3s = 75s) + const pollingUrl = `https://api.bfl.ai/v1/get_result?id=${submitData.id}`; + + for (let attempt = 0; attempt < 25; attempt++) { + const pollResult = http.get(pollingUrl, { + headers: { "x-key": apiKey }, + }); + + const pollData = JSON.parse(pollResult.body); + + if (pollData.status === "Ready") { + // Download image from signed URL + const imageUrl = pollData.result.sample; + const imageResponse = http.get(imageUrl, { base64Encode: true }); + + if (!imageResponse.body) { + return { error: "Failed to download generated image" }; + } + + const image = upload.create("generated_image.png", imageResponse.body); + + if (!image || image.error) { + return { + error: `Upload failed: ${image ? image.error : "unknown"}`, + }; + } + + const raw = `\n![${prompt}](${image.short_url})\n`; + chain.setCustomRaw(raw); + + return { result: "Image generated successfully", seed }; + } + + if ( + pollData.status === "Failed" || + pollData.status === "Error" || + pollData.status === "Request Moderated" + ) { + return { + error: `Generation failed: ${pollData.error || pollData.status}`, + }; + } + + // Wait 3 seconds before next poll + sleep(3000); + } + + return { error: "Generation timed out after 75 seconds" }; +} + +function details() { + return "Generates and edits images using FLUX 2 Pro via Black Forest Labs API. Supports multi-image editing with up to 10 reference images."; +} diff --git a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux_together.js b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux_together.js new file mode 100644 index 0000000000000..c1676dbb4519d --- /dev/null +++ b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/flux_together.js @@ -0,0 +1,133 @@ +/* eslint-disable no-undef, no-unused-vars */ +const apiKey = "YOUR_KEY"; +const model = "black-forest-labs/FLUX.1.1-pro"; + +function invoke(params) { + const prompt = params.prompt; + const imageUrls = params.image_urls || []; + + // Determine mode: edit if image_urls provided, otherwise generate + const isEditMode = imageUrls.length > 0; + + let seed = parseInt(params.seed, 10); + if (!(seed > 0)) { + seed = Math.floor(Math.random() * 1000000) + 1; + } + + if (isEditMode) { + return performEdit(prompt, imageUrls, seed); + } else { + return performGeneration(prompt, seed); + } +} + +function performGeneration(prompt, seed) { + const body = { + model, + prompt, + width: 1024, + height: 768, + steps: 10, + n: 1, + seed, + response_format: "b64_json", + }; + + const result = http.post("https://api.together.xyz/v1/images/generations", { + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }); + + const responseData = JSON.parse(result.body); + + // Check for API errors + if (responseData.error) { + return { + error: `Together.ai API Error: ${responseData.error.message || JSON.stringify(responseData.error)}`, + }; + } + + if ( + !responseData.data || + !responseData.data[0] || + !responseData.data[0].b64_json + ) { + return { + error: "Unexpected API response format", + body_preview: JSON.stringify(responseData).substring(0, 500), + }; + } + + const base64Image = responseData.data[0].b64_json; + const image = upload.create("generated_image.png", base64Image); + const raw = `\n![${prompt}](${image.short_url})\n`; + chain.setCustomRaw(raw); + + return { result: "Image generated successfully", seed }; +} + +function performEdit(prompt, imageUrls, seed) { + // FLUX supports img2img via image_url parameter + // Together.ai expects a single image URL (uses first one) + const imageUrl = imageUrls[0]; + + // Convert short URL to full CDN URL + const fullImageUrl = upload.getUrl(imageUrl); + if (!fullImageUrl) { + return { error: `Failed to get full URL for: ${imageUrl}` }; + } + + const body = { + model, + prompt, + width: 1024, + height: 768, + steps: 28, // Use more steps for img2img + n: 1, + seed, + response_format: "b64_json", + image_url: fullImageUrl, + }; + + const result = http.post("https://api.together.xyz/v1/images/generations", { + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }); + + const responseData = JSON.parse(result.body); + + // Check for API errors + if (responseData.error) { + return { + error: `Together.ai API Error: ${responseData.error.message || JSON.stringify(responseData.error)}`, + }; + } + + if ( + !responseData.data || + !responseData.data[0] || + !responseData.data[0].b64_json + ) { + return { + error: "Unexpected API response format", + body_preview: JSON.stringify(responseData).substring(0, 500), + }; + } + + const base64Image = responseData.data[0].b64_json; + const image = upload.create("edited_image.png", base64Image); + const raw = `\n![${prompt}](${image.short_url})\n`; + chain.setCustomRaw(raw); + + return { result: "Image edited successfully", seed }; +} + +function details() { + return "Generates and edits images using the FLUX 1.1 Pro model via Together.ai. Supports generation mode (when no image_urls provided) and img2img edit mode (when image_urls array is provided, uses first image)."; +} diff --git a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/gemini.js b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/gemini.js index c6c7d3a39e07e..09073f4f32d58 100644 --- a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/gemini.js +++ b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/gemini.js @@ -3,7 +3,19 @@ const apiKey = "YOUR_GOOGLE_API_KEY"; function invoke(params) { const prompt = params.prompt; + const imageUrls = params.image_urls || []; + // Determine mode: edit if image_urls provided, otherwise generate + const isEditMode = imageUrls.length > 0; + + if (isEditMode) { + return performEdit(prompt, imageUrls); + } else { + return performGeneration(prompt); + } +} + +function performGeneration(prompt) { const body = { contents: [ { @@ -26,6 +38,59 @@ function invoke(params) { } ); + return processResponse(result, prompt); +} + +function performEdit(prompt, imageUrls) { + // Gemini supports multimodal input - include images in the parts array + // Limit to first 10 images + const imagesToEdit = imageUrls.slice(0, 10); + const parts = []; + + // Add each image as inline data + for (const imageUrl of imagesToEdit) { + const base64Data = upload.getBase64(imageUrl); + if (!base64Data) { + return { error: `Failed to fetch image data for: ${imageUrl}` }; + } + + parts.push({ + inlineData: { + mimeType: "image/png", + data: base64Data, + }, + }); + } + + // Add the text prompt after the images + parts.push({ text: prompt }); + + const body = { + contents: [ + { + parts, + }, + ], + generationConfig: { + responseModalities: ["Image"], + }, + }; + + const result = http.post( + `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent`, + { + headers: { + "x-goog-api-key": `${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + } + ); + + return processResponse(result, prompt); +} + +function processResponse(result, prompt) { const responseData = JSON.parse(result.body); // Check for API errors @@ -91,5 +156,5 @@ function invoke(params) { } function details() { - return "Generates images using Gemini 2.5 Flash Image (Nano Banana)."; + return "Generates and edits images using Gemini 2.5 Flash Image (Nano Banana). Supports generation mode (when no image_urls provided) and edit mode (when image_urls array is provided)."; } diff --git a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/openai.js b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/openai.js index 66fcd2ecd76b6..6d00a723472bd 100644 --- a/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/openai.js +++ b/plugins/discourse-ai/lib/ai_tool_scripts/presets/image_generation/openai.js @@ -4,7 +4,19 @@ const apiKey = "YOUR_OPENAI_API_KEY"; function invoke(params) { const prompt = params.prompt; const size = params.size || "1024x1024"; + const imageUrls = params.image_urls || []; + // Determine mode: edit if image_urls provided, otherwise generate + const isEditMode = imageUrls.length > 0; + + if (isEditMode) { + return performEdit(prompt, size, imageUrls); + } else { + return performGeneration(prompt, size); + } +} + +function performGeneration(prompt, size) { const body = { model: "gpt-image-1", prompt, @@ -50,6 +62,95 @@ function invoke(params) { return { result: "Image generated successfully" }; } +function performEdit(prompt, size, imageUrls) { + // Fetch base64 data for all images (limit to 16 per OpenAI API) + const imagesToEdit = imageUrls.slice(0, 16); + const imageDataArray = []; + + for (const imageUrl of imagesToEdit) { + const base64Data = upload.getBase64(imageUrl); + if (!base64Data) { + return { error: `Failed to fetch image data for: ${imageUrl}` }; + } + imageDataArray.push(base64Data); + } + + // Build multipart form data manually + const boundary = `----FormBoundary${Date.now()}`; + let body = ""; + + // Add model field + body += `--${boundary}\r\n`; + body += `Content-Disposition: form-data; name="model"\r\n\r\n`; + body += `gpt-image-1\r\n`; + + // Add image fields + for (let i = 0; i < imageDataArray.length; i++) { + body += `--${boundary}\r\n`; + body += `Content-Disposition: form-data; name="image[]"; filename="image_${i}.png"\r\n`; + body += `Content-Type: image/png\r\n`; + body += `Content-Transfer-Encoding: base64\r\n\r\n`; + body += `${imageDataArray[i]}\r\n`; + } + + // Add prompt field + body += `--${boundary}\r\n`; + body += `Content-Disposition: form-data; name="prompt"\r\n\r\n`; + body += `${prompt}\r\n`; + + // Add size field if provided + if (size) { + body += `--${boundary}\r\n`; + body += `Content-Disposition: form-data; name="size"\r\n\r\n`; + body += `${size}\r\n`; + } + + // Add n field (always 1 for edits) + body += `--${boundary}\r\n`; + body += `Content-Disposition: form-data; name="n"\r\n\r\n`; + body += `1\r\n`; + + // End boundary + body += `--${boundary}--\r\n`; + + const result = http.post("https://api.openai.com/v1/images/edits", { + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": `multipart/form-data; boundary=${boundary}`, + }, + body, + }); + + const responseData = JSON.parse(result.body); + + // Check for API errors + if (responseData.error) { + return { + error: `OpenAI API Error: ${responseData.error.message || JSON.stringify(responseData.error)}`, + }; + } + + // Validate response structure + if ( + !responseData.data || + !responseData.data[0] || + !responseData.data[0].b64_json + ) { + return { + error: "Unexpected API response format", + status: result.status, + body_preview: JSON.stringify(responseData).substring(0, 500), + }; + } + + const base64Image = responseData.data[0].b64_json; + const image = upload.create("edited_image.png", base64Image); + const raw = `\n![${prompt}](${image.short_url})\n`; + chain.setCustomRaw(raw); + + return { result: "Image edited successfully" }; +} + function details() { - return "Generates images using OpenAI's GPT Image 1 model."; + return "Generates and edits images using OpenAI's GPT Image 1 model. Supports generation mode (when no image_urls provided) and edit mode (when image_urls array is provided)."; } diff --git a/plugins/discourse-ai/lib/inference/open_ai_image_generator.rb b/plugins/discourse-ai/lib/inference/open_ai_image_generator.rb deleted file mode 100644 index e6b725b62c62d..0000000000000 --- a/plugins/discourse-ai/lib/inference/open_ai_image_generator.rb +++ /dev/null @@ -1,469 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Inference - class OpenAiImageGenerator - TIMEOUT = 60 - MAX_IMAGE_SIZE = 20_971_520 # 20MB (technically 25 is supported by API) - - def self.create_uploads!( - prompts, - model:, - size: nil, - api_key: nil, - api_url: nil, - user_id:, - for_private_message: false, - n: 1, - quality: nil, - style: nil, - background: nil, - moderation: "low", - output_compression: nil, - output_format: nil, - title: nil, - cancel_manager: nil - ) - # Get the API responses in parallel threads - api_responses = - generate_images_in_threads( - prompts, - model: model, - size: size, - api_key: api_key, - api_url: api_url, - n: n, - quality: quality, - style: style, - background: background, - moderation: moderation, - output_compression: output_compression, - output_format: output_format, - cancel_manager: cancel_manager, - ) - - raise api_responses[0] if api_responses.all? { |resp| resp.is_a?(StandardError) } - - api_responses = api_responses.filter { |response| !response.is_a?(StandardError) } - - create_uploads_from_responses(api_responses, user_id, for_private_message, title) - end - - # Method for image editing that returns Upload objects - def self.create_edited_upload!( - images, - prompt, - model: "gpt-image-1", - size: "auto", - api_key: nil, - api_url: nil, - user_id:, - for_private_message: false, - n: 1, - quality: nil, - cancel_manager: nil - ) - api_response = - edit_images( - images, - prompt, - model: model, - size: size, - api_key: api_key, - api_url: api_url, - n: n, - quality: quality, - cancel_manager: cancel_manager, - ) - - create_uploads_from_responses([api_response], user_id, for_private_message).first - end - - # Common method to create uploads from API responses - def self.create_uploads_from_responses( - api_responses, - user_id, - for_private_message, - title = nil - ) - all_uploads = [] - - api_responses.each do |response| - next unless response - - response[:data].each_with_index do |image, index| - Tempfile.create("ai_image_#{index}.png") do |file| - file.binmode - file.write(Base64.decode64(image[:b64_json])) - file.rewind - - upload = - UploadCreator.new( - file, - title || "image.png", - for_private_message: for_private_message, - ).create_for(user_id) - - all_uploads << { - # Use revised_prompt if available (DALL-E 3), otherwise use original prompt - prompt: image[:revised_prompt] || response[:original_prompt], - upload: upload, - } - end - end - end - - all_uploads - end - - def self.generate_images_in_threads( - prompts, - model:, - size:, - api_key:, - api_url:, - n:, - quality:, - style:, - background:, - moderation:, - output_compression:, - output_format:, - cancel_manager: - ) - prompts = [prompts] unless prompts.is_a?(Array) - prompts = prompts.take(4) # Limit to 4 prompts max - - # Use provided values or defaults - api_key ||= SiteSetting.ai_openai_api_key - api_url ||= SiteSetting.ai_openai_image_generation_url - - # Thread processing - threads = [] - prompts.each do |prompt| - threads << Thread.new(prompt) do |inner_prompt| - attempts = 0 - begin - perform_generation_api_call!( - inner_prompt, - model: model, - size: size, - api_key: api_key, - api_url: api_url, - n: n, - quality: quality, - style: style, - background: background, - moderation: moderation, - output_compression: output_compression, - output_format: output_format, - cancel_manager: cancel_manager, - ) - rescue => e - attempts += 1 - # to keep tests speedy - if !Rails.env.test? && !cancel_manager&.cancelled? - retry if attempts < 3 - end - if !cancel_manager&.cancelled? - Discourse.warn_exception( - e, - message: "Failed to generate image for prompt #{prompt}\n", - ) - puts "Error generating image for prompt: #{prompt} #{e}" if Rails.env.development? - end - e - end - end - end - - threads.each(&:join) - threads.filter_map(&:value) - end - - def self.edit_images( - images, - prompt, - model: "gpt-image-1", - size: "auto", - api_key: nil, - api_url: nil, - n: 1, - quality: nil, - cancel_manager: nil - ) - images = [images] if !images.is_a?(Array) - - # For dall-e-2, only one image is supported - if model == "dall-e-2" && images.length > 1 - raise "DALL-E 2 only supports editing one image at a time" - end - - # For gpt-image-1, limit to 16 images - images = images.take(16) if model == "gpt-image-1" && images.length > 16 - - # Use provided values or defaults - api_key ||= SiteSetting.ai_openai_api_key - api_url ||= SiteSetting.ai_openai_image_edit_url - - # Execute edit API call - attempts = 0 - begin - perform_edit_api_call!( - images, - prompt, - model: model, - size: size, - api_key: api_key, - api_url: api_url, - n: n, - quality: quality, - cancel_manager: cancel_manager, - ) - rescue => e - raise e if cancel_manager&.cancelled? - attempts += 1 - if !Rails.env.test? - sleep 2 - retry if attempts < 3 - end - if Rails.env.development? - puts "Error editing image(s) with prompt: #{prompt} #{e}" - p e - end - Discourse.warn_exception(e, message: "Failed to edit image(s) with prompt #{prompt}") - raise e - end - end - - # Image generation API call method - def self.perform_generation_api_call!( - prompt, - model:, - size: nil, - api_key: nil, - api_url: nil, - n: 1, - quality: nil, - style: nil, - background: nil, - moderation: nil, - output_compression: nil, - output_format: nil, - cancel_manager: nil - ) - api_key ||= SiteSetting.ai_openai_api_key - api_url ||= SiteSetting.ai_openai_image_generation_url - - uri = URI(api_url) - headers = { "Content-Type" => "application/json" } - - if uri.host.include?("azure") - headers["api-key"] = api_key - else - headers["Authorization"] = "Bearer #{api_key}" - end - - # Build payload based on model type - payload = { model: model, prompt: prompt, n: n } - - # Add model-specific parameters - if model == "gpt-image-1" - if size - payload[:size] = size - else - payload[:size] = "auto" - end - payload[:background] = background if background - payload[:moderation] = moderation if moderation - payload[:output_compression] = output_compression if output_compression - payload[:output_format] = output_format if output_format - payload[:quality] = quality if quality - elsif model.start_with?("dall") - payload[:size] = size || "1024x1024" - payload[:quality] = quality || "hd" - payload[:style] = style if style - payload[:response_format] = "b64_json" - end - - # Store original prompt for upload metadata - original_prompt = prompt - cancel_manager_callback = nil - - FinalDestination::HTTP.start( - uri.host, - uri.port, - use_ssl: uri.scheme == "https", - read_timeout: TIMEOUT, - open_timeout: TIMEOUT, - write_timeout: TIMEOUT, - ) do |http| - request = Net::HTTP::Post.new(uri, headers) - request.body = payload.to_json - - if cancel_manager - cancel_manager_callback = lambda { http.finish } - cancel_manager.add_callback(cancel_manager_callback) - end - - json = nil - http.request(request) do |response| - if response.code.to_i != 200 - raise "OpenAI API returned #{response.code} #{response.body}" - else - json = JSON.parse(response.body, symbolize_names: true) - # Add original prompt to response to preserve it - json[:original_prompt] = original_prompt - end - end - json - end - ensure - if cancel_manager && cancel_manager_callback - cancel_manager.remove_callback(cancel_manager_callback) - end - end - - def self.perform_edit_api_call!( - images, - prompt, - model: "gpt-image-1", - size: "auto", - api_key:, - api_url:, - n: 1, - quality: nil, - cancel_manager: nil - ) - uri = URI(api_url) - - # Setup for multipart/form-data request - boundary = SecureRandom.hex - headers = { "Content-Type" => "multipart/form-data; boundary=#{boundary}" } - - if uri.host.include?("azure") - headers["api-key"] = api_key - else - headers["Authorization"] = "Bearer #{api_key}" - end - - # Create multipart form data - body = [] - - # Add model - body << "--#{boundary}\r\n" - body << "Content-Disposition: form-data; name=\"model\"\r\n\r\n" - - body << "#{model}\r\n" - - files_to_delete = [] - - # Add images - images.each do |image| - image_data = nil - image_filename = nil - - # Handle different image input types - if image.is_a?(Upload) - image_path = - if image.local? - Discourse.store.path_for(image) - else - filename = - Discourse.store.download_safe(image, max_file_size_kb: MAX_IMAGE_SIZE)&.path - files_to_delete << filename if filename - filename - end - image_data = File.read(image_path) - image_filename = File.basename(image.url) - else - raise "Unsupported image format. Must be an Upload" - end - - body << "--#{boundary}\r\n" - body << "Content-Disposition: form-data; name=\"image[]\"; filename=\"#{image_filename}\"\r\n" - body << "Content-Type: image/png\r\n\r\n" - body << image_data - body << "\r\n" - end - - # Add prompt - body << "--#{boundary}\r\n" - body << "Content-Disposition: form-data; name=\"prompt\"\r\n\r\n" - body << "#{prompt}\r\n" - - # Add size if provided - if size - body << "--#{boundary}\r\n" - body << "Content-Disposition: form-data; name=\"size\"\r\n\r\n" - body << "#{size}\r\n" - end - - # Add n if provided and not the default - if n != 1 - body << "--#{boundary}\r\n" - body << "Content-Disposition: form-data; name=\"n\"\r\n\r\n" - body << "#{n}\r\n" - end - - # Add quality if provided - if quality - body << "--#{boundary}\r\n" - body << "Content-Disposition: form-data; name=\"quality\"\r\n\r\n" - body << "#{quality}\r\n" - end - - # Add response_format if provided - if model.start_with?("dall") - # Default to b64_json for consistency with generation - body << "--#{boundary}\r\n" - body << "Content-Disposition: form-data; name=\"response_format\"\r\n\r\n" - body << "b64_json\r\n" - end - - # End boundary - body << "--#{boundary}--\r\n" - - # Store original prompt for upload metadata - original_prompt = prompt - cancel_manager_callback = nil - - FinalDestination::HTTP.start( - uri.host, - uri.port, - use_ssl: uri.scheme == "https", - read_timeout: TIMEOUT, - open_timeout: TIMEOUT, - write_timeout: TIMEOUT, - ) do |http| - request = Net::HTTP::Post.new(uri.path, headers) - request.body = body.join - - if cancel_manager - cancel_manager_callback = lambda { http.finish } - cancel_manager.add_callback(cancel_manager_callback) - end - - json = nil - http.request(request) do |response| - if response.code.to_i != 200 - raise "OpenAI API returned #{response.code} #{response.body}" - else - json = JSON.parse(response.body, symbolize_names: true) - # Add original prompt to response to preserve it - json[:original_prompt] = original_prompt - end - end - json - end - ensure - if cancel_manager && cancel_manager_callback - cancel_manager.remove_callback(cancel_manager_callback) - end - if files_to_delete.present? - files_to_delete.each { |file| File.delete(file) if File.exist?(file) } - end - end - end - end -end diff --git a/plugins/discourse-ai/lib/inference/stability_generator.rb b/plugins/discourse-ai/lib/inference/stability_generator.rb deleted file mode 100644 index acf7ef01636bc..0000000000000 --- a/plugins/discourse-ai/lib/inference/stability_generator.rb +++ /dev/null @@ -1,160 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Inference - class StabilityGenerator - TIMEOUT = 120 - - # there is a new api for sd3 - def self.perform_sd3!( - prompt, - aspect_ratio: nil, - api_key: nil, - engine: nil, - api_url: nil, - output_format: "png", - seed: nil - ) - api_key ||= SiteSetting.ai_stability_api_key - engine ||= SiteSetting.ai_stability_engine - api_url ||= SiteSetting.ai_stability_api_url - - allowed_ratios = %w[16:9 1:1 21:9 2:3 3:2 4:5 5:4 9:16 9:21] - - aspect_ratio = "1:1" if !aspect_ratio || !allowed_ratios.include?(aspect_ratio) - - payload = { - prompt: prompt, - mode: "text-to-image", - model: engine, - output_format: output_format, - aspect_ratio: aspect_ratio, - } - - payload[:seed] = seed if seed - - endpoint = "v2beta/stable-image/generate/sd3" - - form_data = payload.to_a.map { |k, v| [k.to_s, v.to_s] } - - uri = URI("#{api_url}/#{endpoint}") - request = FinalDestination::HTTP::Post.new(uri) - - request["authorization"] = "Bearer #{api_key}" - request["accept"] = "application/json" - request["User-Agent"] = DiscourseAi::AiBot::USER_AGENT - request.set_form form_data, "multipart/form-data" - - response = - FinalDestination::HTTP.start( - uri.hostname, - uri.port, - use_ssl: uri.port != 80, - read_timeout: TIMEOUT, - open_timeout: TIMEOUT, - write_timeout: TIMEOUT, - ) { |http| http.request(request) } - - if response.code != "200" - Rails.logger.error( - "AI stability generator failed with status #{response.code}: #{response.body}}", - ) - raise Net::HTTPBadResponse - end - - parsed = JSON.parse(response.body, symbolize_names: true) - - # remap to old format - { artifacts: [{ base64: parsed[:image], seed: parsed[:seed] }] } - end - - def self.perform!( - prompt, - aspect_ratio: nil, - api_key: nil, - engine: nil, - api_url: nil, - image_count: 4, - seed: nil - ) - api_key ||= SiteSetting.ai_stability_api_key - engine ||= SiteSetting.ai_stability_engine - api_url ||= SiteSetting.ai_stability_api_url - - image_count = 4 if image_count > 4 - - if engine.start_with? "sd3" - artifacts = - image_count.times.map do - perform_sd3!( - prompt, - api_key: api_key, - engine: engine, - api_url: api_url, - aspect_ratio: aspect_ratio, - seed: seed, - )[ - :artifacts - ][ - 0 - ] - end - - return { artifacts: artifacts } - end - - headers = { - "Content-Type" => "application/json", - "Accept" => "application/json", - "Authorization" => "Bearer #{api_key}", - } - - ratio_to_dimension = { - "16:9" => [1536, 640], - "1:1" => [1024, 1024], - "21:9" => [1344, 768], - "2:3" => [896, 1152], - "3:2" => [1152, 896], - "4:5" => [832, 1216], - "5:4" => [1216, 832], - "9:16" => [640, 1536], - "9:21" => [768, 1344], - } - - if engine.include? "xl" - width, height = ratio_to_dimension[aspect_ratio] if aspect_ratio - - width, height = [1024, 1024] if !width || !height - else - width, height = [512, 512] - end - - payload = { - text_prompts: [{ text: prompt }], - cfg_scale: 7, - clip_guidance_preset: "FAST_BLUE", - height: width, - width: height, - samples: image_count, - steps: 30, - } - - payload[:seed] = seed if seed - - endpoint = "v1/generation/#{engine}/text-to-image" - - conn = Faraday.new { |f| f.adapter FinalDestination::FaradayAdapter } - response = conn.post("#{api_url}/#{endpoint}", payload.to_json, headers) - - if response.status != 200 - Rails.logger.error( - "AI stability generator failed with status #{response.status}: #{response.body}}", - ) - raise Net::HTTPBadResponse - end - - JSON.parse(response.body, symbolize_names: true) - end - end - end -end diff --git a/plugins/discourse-ai/lib/personas/artist.rb b/plugins/discourse-ai/lib/personas/artist.rb index 93e2361e2ad5f..8c81d98594b88 100644 --- a/plugins/discourse-ai/lib/personas/artist.rb +++ b/plugins/discourse-ai/lib/personas/artist.rb @@ -4,10 +4,12 @@ module DiscourseAi module Personas class Artist < Persona def tools - [Tools::Image] + # Only include Tools::Image if custom image generation tools are configured + Tools::Tool.available_custom_image_tools.present? ? [Tools::Image] : [] end def required_tools + # Always require Tools::Image - availability is checked by Persona.all() [Tools::Image] end @@ -15,7 +17,7 @@ def system_prompt <<~PROMPT You are artistbot and you are here to help people generate images. - You generate images using stable diffusion. + You generate images using configured image generation tools. - A good prompt needs to be detailed and specific. - You can specify subject, medium (e.g. oil on canvas), artist (person who drew it or photographed it) @@ -28,7 +30,6 @@ def system_prompt - When generating images, usually opt to generate 4 images unless the user specifies otherwise. - Be creative with your prompts, offer diverse options - - You can use the seeds to regenerate the same image and amend the prompt keeping general style PROMPT end end diff --git a/plugins/discourse-ai/lib/personas/dall_e_3.rb b/plugins/discourse-ai/lib/personas/dall_e_3.rb deleted file mode 100644 index 851756c828192..0000000000000 --- a/plugins/discourse-ai/lib/personas/dall_e_3.rb +++ /dev/null @@ -1,37 +0,0 @@ -#frozen_string_literal: true - -module DiscourseAi - module Personas - class DallE3 < Persona - def tools - [Tools::DallE] - end - - def required_tools - [Tools::DallE] - end - - def system_prompt - <<~PROMPT - As a DALL-E-3 bot, you're tasked with generating images based on user prompts. - - - Be specific and detailed in your prompts. Include elements like subject, medium (e.g., oil on canvas), artist style, lighting, time of day, and website style (e.g., ArtStation, DeviantArt). - - Add adjectives for more detail (e.g., beautiful, dystopian, futuristic). - - Prompts should be 40-100 words long, but remember the API accepts a maximum of 5000 characters per prompt. - - Enhance short, vague user prompts with your own creative details. - - Unless specified, generate 4 images per prompt. - - Don't seek user permission before generating images or run the prompts by the user. Generate immediately to save tokens. - - Example: - - User: "a cow" - You: Generate images immediately, without telling the user anything. Details will be provided to user with the generated images. - - DO NOT SAY "I will generate the following ... image 1 description ... image 2 description ... etc." - Just generate the images - - PROMPT - end - end - end -end diff --git a/plugins/discourse-ai/lib/personas/designer.rb b/plugins/discourse-ai/lib/personas/designer.rb index f2aa8dea41ef8..ecb6a7e1b9e5b 100644 --- a/plugins/discourse-ai/lib/personas/designer.rb +++ b/plugins/discourse-ai/lib/personas/designer.rb @@ -4,10 +4,16 @@ module DiscourseAi module Personas class Designer < Persona def tools - [Tools::CreateImage, Tools::EditImage] + # Only include image tools if custom image generation tools are configured + if Tools::Tool.available_custom_image_tools.present? + [Tools::CreateImage, Tools::EditImage] + else + [] + end end def required_tools + # Always require image tools - availability is checked by Persona.all() [Tools::CreateImage, Tools::EditImage] end diff --git a/plugins/discourse-ai/lib/personas/general.rb b/plugins/discourse-ai/lib/personas/general.rb index 6ec5e490d3513..3647c5ce43734 100644 --- a/plugins/discourse-ai/lib/personas/general.rb +++ b/plugins/discourse-ai/lib/personas/general.rb @@ -4,14 +4,18 @@ module DiscourseAi module Personas class General < Persona def tools - [ + base_tools = [ Tools::Search, Tools::Google, - Tools::Image, Tools::Read, Tools::ListCategories, Tools::ListTags, ] + + # Only include Tools::Image if custom image generation tools are configured + base_tools << Tools::Image if Tools::Tool.available_custom_image_tools.present? + + base_tools end def system_prompt diff --git a/plugins/discourse-ai/lib/personas/persona.rb b/plugins/discourse-ai/lib/personas/persona.rb index 97664d2ba1252..07106f276e0c4 100644 --- a/plugins/discourse-ai/lib/personas/persona.rb +++ b/plugins/discourse-ai/lib/personas/persona.rb @@ -44,7 +44,6 @@ def system_personas SettingsExplorer => -4, Researcher => -5, Creative => -6, - DallE3 => -7, DiscourseHelper => -8, GithubHelper => -9, WebArtifactCreator => -10, @@ -136,10 +135,10 @@ def all_available_tools tools << Tools::GithubSearchCode if SiteSetting.ai_bot_github_access_token.present? tools << Tools::ListTags if SiteSetting.tagging_enabled - tools << Tools::Image if SiteSetting.ai_stability_api_key.present? - if SiteSetting.ai_openai_api_key.present? - tools << Tools::DallE + # Image generation tools - use custom UI-configured tools + if Tools::Tool.available_custom_image_tools.present? + tools << Tools::Image tools << Tools::CreateImage tools << Tools::EditImage end diff --git a/plugins/discourse-ai/lib/personas/tools/create_image.rb b/plugins/discourse-ai/lib/personas/tools/create_image.rb index 8e2971fa1cf48..8a6c618f10288 100644 --- a/plugins/discourse-ai/lib/personas/tools/create_image.rb +++ b/plugins/discourse-ai/lib/personas/tools/create_image.rb @@ -34,46 +34,99 @@ def chain_next_response? end def invoke - # max 4 prompts + # Find available custom image generation tools + custom_tools = self.class.available_custom_image_tools + + if custom_tools.empty? + @error = true + return( + { + prompts: prompts, + error: + "No image generation tools configured. Please configure an image generation tool via the admin UI to use this feature.", + } + ) + end + + # Use the first available custom image tool + tool_class = custom_tools.first + + # Generate images for each prompt (up to 4) max_prompts = prompts.take(4) progress = prompts.first - yield(progress) - results = nil + uploads = [] + errors = [] - begin - results = - DiscourseAi::Inference::OpenAiImageGenerator.create_uploads!( - max_prompts, - model: "gpt-image-1", - user_id: bot_user.id, - cancel_manager: context.cancel_manager, + max_prompts.each do |prompt| + begin + # Create tool instance with parameters + tool_params = { prompt: prompt } + + tool_instance = + tool_class.new(tool_params, bot_user: bot_user, llm: llm, context: context) + + # Invoke the tool + tool_instance.invoke { |_progress| } + + # Extract the custom_raw which contains the generated image markdown + if tool_instance.custom_raw.present? + # Parse the upload short_url from the markdown + upload_match = tool_instance.custom_raw.match(%r{!\[.*?\]\((upload://[^)]+)\)}) + if upload_match + short_url = upload_match[1] + sha1 = Upload.sha1_from_short_url(short_url) + upload = Upload.find_by(sha1: sha1) if sha1 + uploads << { prompt: prompt, upload: upload, url: short_url } if upload + else + # Tool returned custom_raw but not in expected format + Rails.logger.error( + "CreateImage: Tool #{tool_class.name} returned custom_raw in unexpected format. " \ + "Expected markdown with upload:// URL. " \ + "custom_raw preview: #{tool_instance.custom_raw.truncate(200)}", + ) + errors << "Tool returned invalid image format" + end + else + # Tool returned no output + Rails.logger.warn( + "CreateImage: Tool #{tool_class.name} returned no custom_raw output for prompt: #{prompt.truncate(50)}", + ) + errors << "Tool returned no output" + end + rescue => e + Rails.logger.error( + "CreateImage: Failed to generate image for prompt '#{prompt.truncate(50)}'. " \ + "Tool: #{tool_class.name}, Error: #{e.class.name} - #{e.message}", ) - rescue => e - @error = e - return { prompts: max_prompts, error: e.message } + errors << e.message + end end - if results.blank? + if uploads.empty? @error = true - return { prompts: max_prompts, error: "Something went wrong, could not generate image" } + return( + { + prompts: max_prompts, + error: + "Failed to generate images. #{errors.first || "Please check your image generation tool configuration."}", + } + ) end self.custom_raw = <<~RAW [grid] #{ - results + uploads .map { |item| "![#{item[:prompt].gsub(/\|\'\"/, "")}](#{item[:upload].short_url})" } .join(" ") } [/grid] RAW - { - prompts: results.map { |item| { prompt: item[:prompt], url: item[:upload].short_url } }, - } + { prompts: uploads.map { |item| { prompt: item[:prompt], url: item[:url] } } } end protected diff --git a/plugins/discourse-ai/lib/personas/tools/dall_e.rb b/plugins/discourse-ai/lib/personas/tools/dall_e.rb deleted file mode 100644 index 1daa7ee17e97e..0000000000000 --- a/plugins/discourse-ai/lib/personas/tools/dall_e.rb +++ /dev/null @@ -1,97 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Personas - module Tools - class DallE < Tool - def self.signature - { - name: name, - description: "Renders images from supplied descriptions", - parameters: [ - { - name: "prompts", - description: - "The prompts used to generate or create or draw the image (5000 chars or less, be creative) up to 4 prompts", - type: "array", - item_type: "string", - required: true, - }, - { - name: "aspect_ratio", - description: "The aspect ratio (optional, square by default)", - type: "string", - required: false, - enum: %w[tall square wide], - }, - ], - } - end - - def self.name - "dall_e" - end - - def prompts - parameters[:prompts] - end - - def aspect_ratio - parameters[:aspect_ratio] - end - - def chain_next_response? - false - end - - def invoke - # max 4 prompts - max_prompts = prompts.take(4) - progress = prompts.first - - yield(progress) - - results = nil - - size = "1024x1024" - if aspect_ratio == "tall" - size = "1024x1792" - elsif aspect_ratio == "wide" - size = "1792x1024" - end - - results = - DiscourseAi::Inference::OpenAiImageGenerator.create_uploads!( - max_prompts, - model: "dall-e-3", - size: size, - user_id: bot_user.id, - ) - - if results.blank? - return { prompts: max_prompts, error: "Something went wrong, could not generate image" } - end - - self.custom_raw = <<~RAW - - [grid] - #{ - results - .map { |item| "![#{item[:prompt].gsub(/\|\'\"/, "")}](#{item[:upload].short_url})" } - .join(" ") - } - [/grid] - RAW - - { prompts: results.map { |item| item[:prompt] } } - end - - protected - - def description_args - { prompt: prompts.first } - end - end - end - end -end diff --git a/plugins/discourse-ai/lib/personas/tools/edit_image.rb b/plugins/discourse-ai/lib/personas/tools/edit_image.rb index b9e3249a52bc6..1388b177ed3c7 100644 --- a/plugins/discourse-ai/lib/personas/tools/edit_image.rb +++ b/plugins/discourse-ai/lib/personas/tools/edit_image.rb @@ -7,7 +7,7 @@ class EditImage < Tool def self.signature { name: name, - description: "Renders images from supplied descriptions", + description: "Edits images based on supplied descriptions and context images", parameters: [ { name: "prompt", @@ -47,34 +47,102 @@ def image_urls def invoke yield(prompt) - return { prompt: prompt, error: "No valid images provided" } if image_urls.blank? + if image_urls.blank? + @error = true + return { prompt: prompt, error: "No valid images provided" } + end + # Validate that the image URLs exist sha1s = image_urls.map { |url| Upload.sha1_from_short_url(url) }.compact - uploads = Upload.where(sha1: sha1s).order(created_at: :asc).limit(10).to_a + if sha1s.empty? + @error = true + return { prompt: prompt, error: "No valid image URLs provided" } + end - return { prompt: prompt, error: "No valid images provided" } if uploads.blank? + # Check permissions - use context.user (the human) not bot_user + guardian = Guardian.new(context.user) + uploads = Upload.where(sha1: sha1s) - begin - result = - DiscourseAi::Inference::OpenAiImageGenerator.create_edited_upload!( - uploads, - prompt, - user_id: bot_user.id, - cancel_manager: context.cancel_manager, - ) - rescue => e - @error = e - return { prompt: prompt, error: e.message } + uploads.each do |upload| + # Check if upload has access control + if upload.access_control_post_id.present? + post = Post.find_by(id: upload.access_control_post_id) + if post && !guardian.can_see?(post) + @error = true + return( + { + prompt: prompt, + error: + "Access denied: You don't have permission to edit one or more of the provided images", + } + ) + end + end end - if result.blank? + # Find available custom image generation tools + custom_tools = self.class.available_custom_image_tools + + if custom_tools.empty? @error = true - return { prompt: prompt, error: "Something went wrong, could not generate image" } + return( + { + prompt: prompt, + error: + "No image generation tools configured. Please configure an image generation tool via the admin UI to use this feature.", + } + ) end - self.custom_raw = "![#{result[:prompt].gsub(/\|\'\"/, "")}](#{result[:upload].short_url})" + # Use the first available custom image tool + # Pass image_urls to trigger edit mode in the tool + tool_class = custom_tools.first - { prompt: result[:prompt], url: result[:upload].short_url } + begin + tool_params = { prompt: prompt, image_urls: image_urls } + + tool_instance = + tool_class.new(tool_params, bot_user: bot_user, llm: llm, context: context) + + # Invoke the tool + tool_instance.invoke { |_progress| } + + # Extract the custom_raw which contains the edited image markdown + if tool_instance.custom_raw.present? + # Parse the upload short_url from the markdown + upload_match = tool_instance.custom_raw.match(%r{!\[.*?\]\((upload://[^)]+)\)}) + if upload_match + short_url = upload_match[1] + self.custom_raw = tool_instance.custom_raw + { prompt: prompt, url: short_url } + else + # Tool returned custom_raw but not in expected format + Rails.logger.error( + "EditImage: Tool #{tool_class.name} returned custom_raw in unexpected format. " \ + "Expected markdown with upload:// URL. " \ + "custom_raw preview: #{tool_instance.custom_raw.truncate(200)}", + ) + @error = true + { prompt: prompt, error: "Tool returned invalid image format" } + end + else + # Tool returned no output + Rails.logger.warn( + "EditImage: Tool #{tool_class.name} returned no custom_raw output. " \ + "Prompt: #{prompt.truncate(50)}, Image URLs: #{image_urls.length} provided", + ) + @error = true + { prompt: prompt, error: "Tool returned no output" } + end + rescue => e + @error = true + Rails.logger.error( + "EditImage: Failed to edit image. " \ + "Tool: #{tool_class.name}, Error: #{e.class.name} - #{e.message}. " \ + "Prompt: #{prompt.truncate(50)}, Image URLs: #{image_urls.join(", ")}", + ) + { prompt: prompt, error: e.message } + end end protected diff --git a/plugins/discourse-ai/lib/personas/tools/image.rb b/plugins/discourse-ai/lib/personas/tools/image.rb index 3ab2b70564410..7ad8f8cf966f0 100644 --- a/plugins/discourse-ai/lib/personas/tools/image.rb +++ b/plugins/discourse-ai/lib/personas/tools/image.rb @@ -62,83 +62,82 @@ def chain_next_response? end def invoke - # max 4 prompts - selected_prompts = prompts.take(4) - seeds = seeds.take(4) if seeds + # Find available custom image generation tools + custom_tools = self.class.available_custom_image_tools + + if custom_tools.empty? + @chain_next_response = true + return( + { + prompts: prompts, + error: + "No image generation tools configured. Please configure an image generation tool via the admin UI to use this feature.", + give_up: true, + } + ) + end + # Use the first available custom image tool + tool_class = custom_tools.first + + # Map aspect ratio to size parameter if provided + size = aspect_ratio_to_size(aspect_ratio) + + # Generate images for each prompt (up to 4) + selected_prompts = prompts.take(4) progress = prompts.first yield(progress) - results = nil - - # this ensures multisite safety since background threads - # generate the images - api_key = SiteSetting.ai_stability_api_key - engine = SiteSetting.ai_stability_engine - api_url = SiteSetting.ai_stability_api_url + uploads = [] + errors = [] - threads = [] selected_prompts.each_with_index do |prompt, index| - seed = seeds ? seeds[index] : nil - threads << Thread.new(seed, prompt) do |inner_seed, inner_prompt| - attempts = 0 - begin - DiscourseAi::Inference::StabilityGenerator.perform!( - inner_prompt, - engine: engine, - api_key: api_key, - api_url: api_url, - image_count: 1, - seed: inner_seed, - aspect_ratio: aspect_ratio, - ) - rescue => e - attempts += 1 - retry if attempts < 3 - Rails.logger.warn("Failed to generate image for prompt #{prompt}: #{e}") - nil + begin + # Create tool instance with parameters + tool_params = { prompt: prompt } + tool_params[:size] = size if size + + tool_instance = + tool_class.new(tool_params, bot_user: bot_user, llm: llm, context: context) + + # Invoke the tool + tool_instance.invoke { |_progress| } + + # Extract the custom_raw which contains the generated image markdown + if tool_instance.custom_raw.present? + # Parse the upload short_url from the markdown + upload_match = tool_instance.custom_raw.match(%r{!\[.*?\]\((upload://[^)]+)\)}) + if upload_match + short_url = upload_match[1] + sha1 = Upload.sha1_from_short_url(short_url) + upload = Upload.find_by(sha1: sha1) if sha1 + if upload + uploads << { + prompt: prompt, + upload: upload, + seed: nil, # Custom tools don't provide seeds + } + end + end end + rescue => e + Rails.logger.warn("Failed to generate image for prompt #{prompt}: #{e}") + errors << e.message end end - break if threads.all? { |t| t.join(2) } while true - - results = threads.map(&:value).compact - - if !results.present? + if uploads.empty? @chain_next_response = true return( { prompts: prompts, error: - "Something went wrong inform user you could not generate image, check Discourse logs, give up don't try anymore", + "Failed to generate images. #{errors.first || "Please check your image generation tool configuration."}", give_up: true, } ) end - uploads = [] - - results.each_with_index do |result, index| - result[:artifacts].each do |image| - Tempfile.create("v1_txt2img_#{index}.png") do |file| - file.binmode - file.write(Base64.decode64(image[:base64])) - file.rewind - uploads << { - prompt: prompts[index], - upload: - UploadCreator.new( - file, - "image.png", - for_private_message: context.private_message, - ).create_for(bot_user.id), - seed: image[:seed], - } - end - end - end - @custom_raw = <<~RAW [grid] @@ -161,6 +160,37 @@ def invoke def description_args { prompt: prompts.first } end + + private + + def aspect_ratio_to_size(aspect_ratio) + return nil unless aspect_ratio + + # Map common aspect ratios to size strings + # Different providers may handle these differently + case aspect_ratio + when "16:9" + "1792x1024" + when "1:1" + "1024x1024" + when "21:9" + "2048x768" + when "2:3" + "896x1152" + when "3:2" + "1152x896" + when "4:5" + "832x1216" + when "5:4" + "1216x832" + when "9:16" + "1024x1792" + when "9:21" + "768x2048" + else + nil + end + end end end end diff --git a/plugins/discourse-ai/lib/personas/tools/tool.rb b/plugins/discourse-ai/lib/personas/tools/tool.rb index 1b44d754b9abd..9299eafc400c5 100644 --- a/plugins/discourse-ai/lib/personas/tools/tool.rb +++ b/plugins/discourse-ai/lib/personas/tools/tool.rb @@ -45,6 +45,13 @@ def allow_partial_tool_calls? def inject_prompt(prompt:, context:, persona:) end + + def available_custom_image_tools + image_tool_ids = AiTool.where(enabled: true, is_image_generation_tool: true).pluck(:id) + image_tool_ids.map do |tool_id| + DiscourseAi::Personas::Tools::Custom.class_instance(tool_id) + end + end end # llm being public makes it a bit easier to test diff --git a/plugins/discourse-ai/spec/lib/modules/ai_bot/playground_spec.rb b/plugins/discourse-ai/spec/lib/modules/ai_bot/playground_spec.rb index bc7310251381a..b6a6d2402d078 100644 --- a/plugins/discourse-ai/spec/lib/modules/ai_bot/playground_spec.rb +++ b/plugins/discourse-ai/spec/lib/modules/ai_bot/playground_spec.rb @@ -1058,69 +1058,6 @@ expect(custom_prompt.last.first).to eq(response2) expect(custom_prompt.last.last).to eq(bot_user.username) end - - context "with Dall E bot" do - before { SiteSetting.ai_openai_api_key = "123" } - - let(:persona) do - AiPersona.find( - DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::DallE3], - ) - end - - let(:bot) { DiscourseAi::Personas::Bot.as(bot_user, persona: persona.class_instance.new) } - let(:data) do - image = - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==" - - [{ b64_json: image, revised_prompt: "a pink cow 1" }] - end - - let(:response) do - DiscourseAi::Completions::ToolCall.new( - name: "dall_e", - id: "dall_e", - parameters: { - prompts: ["a pink cow"], - }, - ) - end - - it "properly returns an image when skipping thinking" do - persona.update!(show_thinking: false) - - WebMock.stub_request(:post, SiteSetting.ai_openai_image_generation_url).to_return( - status: 200, - body: { data: data }.to_json, - ) - - DiscourseAi::Completions::Llm.with_prepared_responses([response]) do - playground.reply_to(third_post) - end - - last_post = third_post.topic.reload.posts.order(:post_number).last - - expect(last_post.raw).to include("a pink cow") - end - - it "does not include placeholders in conversation context (simulate DALL-E)" do - WebMock.stub_request(:post, SiteSetting.ai_openai_image_generation_url).to_return( - status: 200, - body: { data: data }.to_json, - ) - - DiscourseAi::Completions::Llm.with_prepared_responses([response]) do - playground.reply_to(third_post) - end - - last_post = third_post.topic.reload.posts.order(:post_number).last - custom_prompt = PostCustomPrompt.where(post_id: last_post.id).first.custom_prompt - - # DALL E has custom_raw, we do not want to inject this into the prompt stream - expect(custom_prompt.length).to eq(2) - expect(custom_prompt.to_s).not_to include("
") - end - end end describe "#canceling a completions" do diff --git a/plugins/discourse-ai/spec/lib/personas/persona_spec.rb b/plugins/discourse-ai/spec/lib/personas/persona_spec.rb index ace2792bd195d..55da39b16e494 100644 --- a/plugins/discourse-ai/spec/lib/personas/persona_spec.rb +++ b/plugins/discourse-ai/spec/lib/personas/persona_spec.rb @@ -82,23 +82,19 @@ def system_prompt end it "can parse string that are wrapped in quotes" do - SiteSetting.ai_stability_api_key = "123" - tool_call = DiscourseAi::Completions::ToolCall.new( - name: "image", + name: "search", id: "call_JtYQMful5QKqw97XFsHzPweB", parameters: { - prompts: ["cat oil painting", "big car"], - aspect_ratio: "16:9", + search_query: "\"quoted search term\"", }, ) tool_instance = - DiscourseAi::Personas::Artist.new.find_tool(tool_call, bot_user: nil, llm: nil, context: nil) + DiscourseAi::Personas::General.new.find_tool(tool_call, bot_user: nil, llm: nil, context: nil) - expect(tool_instance.parameters[:prompts]).to eq(["cat oil painting", "big car"]) - expect(tool_instance.parameters[:aspect_ratio]).to eq("16:9") + expect(tool_instance.parameters[:search_query]).to eq("quoted search term") end it "enforces enums" do @@ -155,23 +151,6 @@ def system_prompt expect(search.parameters.key?(:foo)).to eq(false) end - it "can correctly parse arrays in tools" do - SiteSetting.ai_openai_api_key = "123" - - tool_call = - DiscourseAi::Completions::ToolCall.new( - name: "dall_e", - id: "call_JtYQMful5QKqw97XFsHzPweB", - parameters: { - prompts: ["cat oil painting", "big car"], - }, - ) - - tool_instance = - DiscourseAi::Personas::DallE3.new.find_tool(tool_call, bot_user: nil, llm: nil, context: nil) - expect(tool_instance.parameters[:prompts]).to eq(["cat oil painting", "big car"]) - end - describe "custom personas" do it "is able to find custom personas" do Group.refresh_automatic_groups! @@ -217,15 +196,22 @@ def system_prompt it "includes all personas by default" do Group.refresh_automatic_groups! - # must be enabled to see it - SiteSetting.ai_stability_api_key = "abc" SiteSetting.ai_google_custom_search_api_key = "abc" SiteSetting.ai_google_custom_search_cx = "abc123" - # should be ordered by priority and then alpha - expect(DiscourseAi::Personas::Persona.all(user: user).map(&:superclass)).to contain_exactly( + # Note: Artist and Designer personas require custom image generation tools + # configured via AiTool. Testing them would require creating tools within + # the test transaction, which causes query isolation issues. They are tested + # separately in their respective tool specs. + # Filter to only system personas with specific classes (reject base Persona class) + personas = + DiscourseAi::Personas::Persona + .all(user: user) + .select(&:system) + .map(&:superclass) + .reject { |klass| klass == DiscourseAi::Personas::Persona } + expect(personas).to include( DiscourseAi::Personas::General, - DiscourseAi::Personas::Artist, DiscourseAi::Personas::Creative, DiscourseAi::Personas::DiscourseHelper, DiscourseAi::Personas::Discover, @@ -236,9 +222,14 @@ def system_prompt ) # it should allow staff access to WebArtifactCreator - expect(DiscourseAi::Personas::Persona.all(user: admin).map(&:superclass)).to contain_exactly( + admin_personas = + DiscourseAi::Personas::Persona + .all(user: admin) + .select(&:system) + .map(&:superclass) + .reject { |klass| klass == DiscourseAi::Personas::Persona } + expect(admin_personas).to include( DiscourseAi::Personas::General, - DiscourseAi::Personas::Artist, DiscourseAi::Personas::Creative, DiscourseAi::Personas::DiscourseHelper, DiscourseAi::Personas::Discover, @@ -250,11 +241,19 @@ def system_prompt ) # omits personas if key is missing - SiteSetting.ai_stability_api_key = "" SiteSetting.ai_google_custom_search_api_key = "" SiteSetting.ai_artifact_security = "disabled" - expect(DiscourseAi::Personas::Persona.all(user: admin).map(&:superclass)).to contain_exactly( + # Filter to only system personas with specific persona classes (not the base Persona class) + # The base Persona class appears for personas that don't have required tools available + system_persona_classes = + DiscourseAi::Personas::Persona + .all(user: admin) + .select(&:system) + .map(&:superclass) + .reject { |klass| klass == DiscourseAi::Personas::Persona } + + expect(system_persona_classes).to contain_exactly( DiscourseAi::Personas::General, DiscourseAi::Personas::SqlHelper, DiscourseAi::Personas::SettingsExplorer, @@ -268,7 +267,14 @@ def system_prompt DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::General], ).update!(enabled: false) - expect(DiscourseAi::Personas::Persona.all(user: user).map(&:superclass)).to contain_exactly( + system_persona_classes_after_disable = + DiscourseAi::Personas::Persona + .all(user: user) + .select(&:system) + .map(&:superclass) + .reject { |klass| klass == DiscourseAi::Personas::Persona } + + expect(system_persona_classes_after_disable).to contain_exactly( DiscourseAi::Personas::SqlHelper, DiscourseAi::Personas::SettingsExplorer, DiscourseAi::Personas::Creative, diff --git a/plugins/discourse-ai/spec/lib/personas/tools/create_image_spec.rb b/plugins/discourse-ai/spec/lib/personas/tools/create_image_spec.rb index f8913dfe299d7..2322d3b8c9aa6 100644 --- a/plugins/discourse-ai/spec/lib/personas/tools/create_image_spec.rb +++ b/plugins/discourse-ai/spec/lib/personas/tools/create_image_spec.rb @@ -4,12 +4,40 @@ let(:prompts) { ["a watercolor painting", "an abstract design"] } fab!(:gpt_35_turbo) { Fabricate(:llm_model, name: "gpt-3.5-turbo") } + fab!(:admin) + + fab!(:test_upload) do + Fabricate( + :upload, + sha1: Upload.sha1_from_short_url("upload://test123"), + original_filename: "test_image.png", + ) + end + + fab!(:image_tool) do + AiTool.create!( + name: "test_image_generator", + tool_name: "test_image_generator", + description: "Test image generation tool", + summary: "Generates test images", + parameters: [{ name: "prompt", type: "string", required: true }], + script: <<~JS, + function invoke(params) { + upload.create("test_image.png", "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="); + chain.setCustomRaw(`![${params.prompt}](upload://test123)`); + return { result: "success" }; + } + JS + created_by_id: admin.id, + enabled: true, + is_image_generation_tool: true, + ) + end before do enable_current_plugin SiteSetting.ai_bot_enabled = true toggle_enabled_bots(bots: [gpt_35_turbo]) - SiteSetting.ai_openai_api_key = "abc" end let(:bot_user) { DiscourseAi::AiBot::EntryPoint.find_user_from_model(gpt_35_turbo.name) } @@ -18,122 +46,63 @@ let(:create_image) { described_class.new({ prompts: prompts }, llm: llm, bot_user: bot_user) } - let(:base64_image) do - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==" - end + describe "#invoke" do + it "returns error when no image generation tools are configured" do + image_tool.update!(enabled: false) + + result = create_image.invoke(&progress_blk) - describe "#process" do - it "can reject generation of images and return a proper error to llm" do - error_message = { - error: { - message: - "Your request was rejected as a result of our safety system. Your request may contain content that is not allowed by our safety system.", - type: "user_error", - param: nil, - code: "moderation_blocked", - }, - } - - WebMock.stub_request(:post, "https://api.openai.com/v1/images/generations").to_return( - status: 400, - body: error_message.to_json, - ) - - info = create_image.invoke(&progress_blk).to_json - expect(info).to include("Your request was rejected as a result of our safety system.") + expect(result[:error]).to include("No image generation tools configured") expect(create_image.chain_next_response?).to eq(true) end - it "can generate images with gpt-image-1 model" do - data = [{ b64_json: base64_image, revised_prompt: "a watercolor painting of flowers" }] - - WebMock - .stub_request(:post, "https://api.openai.com/v1/images/generations") - .with do |request| - json = JSON.parse(request.body, symbolize_names: true) - - expect(prompts).to include(json[:prompt]) - expect(json[:model]).to eq("gpt-image-1") - expect(json[:size]).to eq("auto") - true - end - .to_return(status: 200, body: { data: data }.to_json) - - info = create_image.invoke(&progress_blk).to_json - - expect(JSON.parse(info)).to eq( - { - "prompts" => [ - { - "prompt" => "a watercolor painting of flowers", - "url" => "upload://pv9zsrM93Jz3U8xELTJCPYU2DD0.png", - }, - { - "prompt" => "a watercolor painting of flowers", - "url" => "upload://pv9zsrM93Jz3U8xELTJCPYU2DD0.png", - }, - ], - }, - ) - expect(create_image.custom_raw).to include("upload://") + + it "delegates to available custom image generation tool" do + result = create_image.invoke(&progress_blk) + + expect(result[:prompts]).to be_an(Array) + expect(result[:prompts].length).to eq(2) + expect(result[:prompts].first[:prompt]).to eq("a watercolor painting") + expect(result[:prompts].first[:url]).to include("upload://") expect(create_image.custom_raw).to include("[grid]") - expect(create_image.custom_raw).to include("a watercolor painting of flowers") + expect(create_image.custom_raw).to include("upload://") end - it "can defaults to auto size" do - create_image_with_size = - described_class.new({ prompts: ["a landscape"] }, llm: llm, bot_user: bot_user) - - data = [{ b64_json: base64_image, revised_prompt: "a detailed landscape" }] - - WebMock - .stub_request(:post, "https://api.openai.com/v1/images/generations") - .with do |request| - json = JSON.parse(request.body, symbolize_names: true) - - expect(json[:prompt]).to eq("a landscape") - expect(json[:size]).to eq("auto") - true - end - .to_return(status: 200, body: { data: data }.to_json) - - info = create_image_with_size.invoke(&progress_blk).to_json - expect(JSON.parse(info)).to eq( - "prompts" => [ - { - "prompt" => "a detailed landscape", - "url" => "upload://pv9zsrM93Jz3U8xELTJCPYU2DD0.png", - }, - ], - ) + it "limits to 4 prompts maximum" do + many_prompts = ["prompt 1", "prompt 2", "prompt 3", "prompt 4", "prompt 5", "prompt 6"] + create_image_many = + described_class.new({ prompts: many_prompts }, llm: llm, bot_user: bot_user) + + result = create_image_many.invoke(&progress_blk) + + expect(result[:prompts].length).to eq(4) end - it "handles custom API endpoint" do - SiteSetting.ai_openai_image_generation_url = "https://custom-api.example.com/images/generate" - - data = [{ b64_json: base64_image, revised_prompt: "a watercolor painting" }] - - WebMock - .stub_request(:post, SiteSetting.ai_openai_image_generation_url) - .with do |request| - json = JSON.parse(request.body, symbolize_names: true) - expect(prompts).to include(json[:prompt]) - true - end - .to_return(status: 200, body: { data: data }.to_json) - - info = create_image.invoke(&progress_blk).to_json - expect(JSON.parse(info)).to eq( - "prompts" => [ - { - "prompt" => "a watercolor painting", - "url" => "upload://pv9zsrM93Jz3U8xELTJCPYU2DD0.png", - }, - { - "prompt" => "a watercolor painting", - "url" => "upload://pv9zsrM93Jz3U8xELTJCPYU2DD0.png", - }, - ], - ) + it "handles errors from custom tools gracefully" do + # Create a tool that raises an error + failing_tool = + AiTool.create!( + name: "failing_tool", + tool_name: "failing_tool", + description: "A tool that fails", + summary: "Fails", + parameters: [{ name: "prompt", type: "string", required: true }], + script: <<~JS, + function invoke(params) { + throw new Error("Tool error"); + } + JS + created_by_id: admin.id, + enabled: true, + is_image_generation_tool: true, + ) + + # Disable the working tool so the failing one is used + image_tool.update!(enabled: false) + + result = create_image.invoke(&progress_blk) + + expect(result[:error]).to be_present + expect(create_image.chain_next_response?).to eq(true) end end end diff --git a/plugins/discourse-ai/spec/lib/personas/tools/dall_e_spec.rb b/plugins/discourse-ai/spec/lib/personas/tools/dall_e_spec.rb deleted file mode 100644 index afdf34d2b6b4f..0000000000000 --- a/plugins/discourse-ai/spec/lib/personas/tools/dall_e_spec.rb +++ /dev/null @@ -1,97 +0,0 @@ -#frozen_string_literal: true - -RSpec.describe DiscourseAi::Personas::Tools::DallE do - let(:prompts) { ["a pink cow", "a red cow"] } - - fab!(:gpt_35_turbo) { Fabricate(:llm_model, name: "gpt-3.5-turbo") } - - before do - enable_current_plugin - SiteSetting.ai_bot_enabled = true - toggle_enabled_bots(bots: [gpt_35_turbo]) - SiteSetting.ai_openai_api_key = "abc" - end - - let(:bot_user) { DiscourseAi::AiBot::EntryPoint.find_user_from_model(gpt_35_turbo.name) } - let(:llm) { DiscourseAi::Completions::Llm.proxy(gpt_35_turbo) } - let(:progress_blk) { Proc.new {} } - - let(:dall_e) { described_class.new({ prompts: prompts }, llm: llm, bot_user: bot_user) } - - let(:base64_image) do - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==" - end - - describe "#process" do - it "can generate tall images" do - generator = - described_class.new( - { prompts: ["a cat"], aspect_ratio: "tall" }, - llm: llm, - bot_user: bot_user, - ) - - data = [{ b64_json: base64_image, revised_prompt: "a tall cat" }] - - WebMock - .stub_request(:post, "https://api.openai.com/v1/images/generations") - .with do |request| - json = JSON.parse(request.body, symbolize_names: true) - - expect(json[:prompt]).to eq("a cat") - expect(json[:size]).to eq("1024x1792") - true - end - .to_return(status: 200, body: { data: data }.to_json) - - info = generator.invoke(&progress_blk).to_json - expect(JSON.parse(info)).to eq("prompts" => ["a tall cat"]) - end - - it "can generate correct info with azure" do - _post = Fabricate(:post) - - SiteSetting.ai_openai_image_generation_url = "https://test.azure.com/some_url" - - data = [{ b64_json: base64_image, revised_prompt: "a pink cow 1" }] - - WebMock - .stub_request(:post, SiteSetting.ai_openai_image_generation_url) - .with do |request| - json = JSON.parse(request.body, symbolize_names: true) - - expect(prompts).to include(json[:prompt]) - expect(request.headers["Api-Key"]).to eq("abc") - true - end - .to_return(status: 200, body: { data: data }.to_json) - - info = dall_e.invoke(&progress_blk).to_json - - expect(JSON.parse(info)).to eq("prompts" => ["a pink cow 1", "a pink cow 1"]) - expect(dall_e.custom_raw).to include("upload://") - expect(dall_e.custom_raw).to include("[grid]") - expect(dall_e.custom_raw).to include("a pink cow 1") - end - - it "can generate correct info" do - data = [{ b64_json: base64_image, revised_prompt: "a pink cow 1" }] - - WebMock - .stub_request(:post, "https://api.openai.com/v1/images/generations") - .with do |request| - json = JSON.parse(request.body, symbolize_names: true) - expect(prompts).to include(json[:prompt]) - true - end - .to_return(status: 200, body: { data: data }.to_json) - - info = dall_e.invoke(&progress_blk).to_json - - expect(JSON.parse(info)).to eq("prompts" => ["a pink cow 1", "a pink cow 1"]) - expect(dall_e.custom_raw).to include("upload://") - expect(dall_e.custom_raw).to include("[grid]") - expect(dall_e.custom_raw).to include("a pink cow 1") - end - end -end diff --git a/plugins/discourse-ai/spec/lib/personas/tools/edit_image_spec.rb b/plugins/discourse-ai/spec/lib/personas/tools/edit_image_spec.rb index 930000e19ba5e..1f2cab03ae4e9 100644 --- a/plugins/discourse-ai/spec/lib/personas/tools/edit_image_spec.rb +++ b/plugins/discourse-ai/spec/lib/personas/tools/edit_image_spec.rb @@ -2,19 +2,51 @@ RSpec.describe DiscourseAi::Personas::Tools::EditImage do fab!(:gpt_35_turbo) { Fabricate(:llm_model, name: "gpt-3.5-turbo") } + fab!(:admin) + fab!(:user) + + fab!(:edited_upload) do + Fabricate( + :upload, + sha1: Upload.sha1_from_short_url("upload://edited456"), + original_filename: "edited_image.png", + ) + end + + fab!(:image_tool) do + AiTool.create!( + name: "test_image_editor", + tool_name: "test_image_editor", + description: "Test image editing tool", + summary: "Edits test images", + parameters: [ + { name: "prompt", type: "string", required: true }, + { name: "image_urls", type: "array", item_type: "string", required: true }, + ], + script: <<~JS, + function invoke(params) { + upload.create("edited_image.png", "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="); + chain.setCustomRaw(`![${params.prompt}](upload://edited456)`); + return { result: "success" }; + } + JS + created_by_id: admin.id, + enabled: true, + is_image_generation_tool: true, + ) + end before do enable_current_plugin SiteSetting.ai_bot_enabled = true toggle_enabled_bots(bots: [gpt_35_turbo]) - SiteSetting.ai_openai_api_key = "abc" end let(:image_upload) do UploadCreator.new( File.open(Rails.root.join("spec/fixtures/images/smallest.png")), "smallest.png", - ).create_for(Discourse.system_user.id) + ).create_for(user.id) end let(:bot_user) { DiscourseAi::AiBot::EntryPoint.find_user_from_model(gpt_35_turbo.name) } @@ -28,83 +60,130 @@ { image_urls: [image_upload.short_url], prompt: prompt }, llm: llm, bot_user: bot_user, + context: DiscourseAi::Personas::BotContext.new(user: user), ) end - let(:base64_image) do - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==" - end + describe "#invoke" do + it "returns error when no image generation tools are configured" do + image_tool.update!(enabled: false) - describe "#process" do - it "can reject generation of images and return a proper error to llm" do - error_message = { - error: { - message: - "Your request was rejected as a result of our safety system. Your request may contain content that is not allowed by our safety system.", - type: "user_error", - param: nil, - code: "moderation_blocked", - }, - } - - WebMock.stub_request(:post, "https://api.openai.com/v1/images/edits").to_return( - status: 400, - body: error_message.to_json, - ) - - info = edit_image.invoke(&progress_blk).to_json - expect(info).to include("Your request was rejected as a result of our safety system.") + result = edit_image.invoke(&progress_blk) + + expect(result[:error]).to include("No image generation tools configured") expect(edit_image.chain_next_response?).to eq(true) end - it "can edit an image with the GPT image model" do - data = [{ b64_json: base64_image, revised_prompt: "image with rainbow added in background" }] - - # Stub the OpenAI API call - WebMock - .stub_request(:post, "https://api.openai.com/v1/images/edits") - .with do |request| - # The request is multipart/form-data, so we can't easily parse the body - # Just check that the request was made to the right endpoint - expect(request.headers["Content-Type"]).to include("multipart/form-data") - true - end - .to_return(status: 200, body: { data: data }.to_json) - - info = edit_image.invoke(&progress_blk).to_json - - expect(JSON.parse(info)).to eq( - { - "prompt" => "image with rainbow added in background", - "url" => "upload://pv9zsrM93Jz3U8xELTJCPYU2DD0.png", - }, - ) - expect(edit_image.custom_raw).to include("upload://") - expect(edit_image.custom_raw).to include("![image with rainbow added in background]") + it "returns error when no valid images provided" do + edit_image_no_images = + described_class.new( + { image_urls: [], prompt: prompt }, + llm: llm, + bot_user: bot_user, + context: DiscourseAi::Personas::BotContext.new(user: user), + ) + + result = edit_image_no_images.invoke(&progress_blk) + + expect(result[:error]).to include("No valid images provided") + expect(edit_image_no_images.chain_next_response?).to eq(true) end - it "handles custom API endpoint" do - SiteSetting.ai_openai_image_edit_url = "https://custom-api.example.com/images/edit" + it "delegates to available custom image editing tool" do + result = edit_image.invoke(&progress_blk) - data = [{ b64_json: base64_image, revised_prompt: "image with rainbow added" }] + expect(result[:prompt]).to eq(prompt) + expect(result[:url]).to include("upload://") + expect(edit_image.custom_raw).to include("upload://") + expect(edit_image.custom_raw).to include("![") + end - # Stub the custom API endpoint - WebMock - .stub_request(:post, SiteSetting.ai_openai_image_edit_url) - .with do |request| - expect(request.headers["Content-Type"]).to include("multipart/form-data") - true - end - .to_return(status: 200, body: { data: data }.to_json) + it "checks Guardian permissions for uploads from private posts" do + # Create a private message post with an upload + private_topic = Fabricate(:private_message_topic, user: admin) + private_post = Fabricate(:post, topic: private_topic, user: admin) + + # Create an upload associated with the private post + private_upload = + UploadCreator.new( + File.open(Rails.root.join("spec/fixtures/images/smallest.png")), + "private.png", + ).create_for(admin.id) + private_upload.update!(access_control_post_id: private_post.id) + + # Try to edit the private upload as a different user who doesn't have access + edit_private_image = + described_class.new( + { image_urls: [private_upload.short_url], prompt: prompt }, + llm: llm, + bot_user: bot_user, + context: DiscourseAi::Personas::BotContext.new(user: user), + ) + + result = edit_private_image.invoke(&progress_blk) + + expect(result[:error]).to include("Access denied") + expect(edit_private_image.chain_next_response?).to eq(true) + end - info = edit_image.invoke(&progress_blk).to_json + it "allows editing uploads from private posts if user has access" do + # Create a private message post with an upload + private_topic = Fabricate(:private_message_topic, user: user) + private_post = Fabricate(:post, topic: private_topic, user: user) + + # Create an upload associated with the private post + private_upload = + UploadCreator.new( + File.open(Rails.root.join("spec/fixtures/images/smallest.png")), + "private.png", + ).create_for(user.id) + private_upload.update!(access_control_post_id: private_post.id) + + # Try to edit the private upload as the same user who has access + edit_private_image = + described_class.new( + { image_urls: [private_upload.short_url], prompt: prompt }, + llm: llm, + bot_user: bot_user, + context: DiscourseAi::Personas::BotContext.new(user: user), + ) + + result = edit_private_image.invoke(&progress_blk) + + # Should succeed since user has access + expect(result[:error]).to be_nil + expect(result[:url]).to include("upload://") + end - expect(JSON.parse(info)).to eq( - { - "prompt" => "image with rainbow added", - "url" => "upload://pv9zsrM93Jz3U8xELTJCPYU2DD0.png", - }, - ) + it "handles errors from custom tools gracefully" do + # Create a tool that raises an error + failing_tool = + AiTool.create!( + name: "failing_edit_tool", + tool_name: "failing_edit_tool", + description: "A tool that fails", + summary: "Fails", + parameters: [ + { name: "prompt", type: "string", required: true }, + { name: "image_urls", type: "array", item_type: "string", required: true }, + ], + script: <<~JS, + function invoke(params) { + throw new Error("Tool error"); + } + JS + created_by_id: admin.id, + enabled: true, + is_image_generation_tool: true, + ) + + # Disable the working tool so the failing one is used + image_tool.update!(enabled: false) + + result = edit_image.invoke(&progress_blk) + + expect(result[:error]).to be_present + expect(edit_image.chain_next_response?).to eq(true) end end end diff --git a/plugins/discourse-ai/spec/lib/personas/tools/image_spec.rb b/plugins/discourse-ai/spec/lib/personas/tools/image_spec.rb index 461b324840af4..7ee3219a0ab69 100644 --- a/plugins/discourse-ai/spec/lib/personas/tools/image_spec.rb +++ b/plugins/discourse-ai/spec/lib/personas/tools/image_spec.rb @@ -14,6 +14,52 @@ end fab!(:gpt_35_turbo) { Fabricate(:llm_model, name: "gpt-3.5-turbo") } + fab!(:admin) + + fab!(:test_upload1) do + Fabricate( + :upload, + sha1: Upload.sha1_from_short_url("upload://test123"), + original_filename: "test_image1.png", + ) + end + + fab!(:test_upload2) do + Fabricate( + :upload, + sha1: Upload.sha1_from_short_url("upload://test456"), + original_filename: "test_image2.png", + ) + end + + fab!(:image_tool) do + AiTool.create!( + name: "test_image_generator", + tool_name: "test_image_generator", + description: "Test image generation tool", + summary: "Generates test images", + parameters: [ + { name: "prompt", type: "string", required: true }, + { name: "seeds", type: "array", item_type: "integer", required: false }, + ], + script: <<~JS, + function invoke(params) { + // Create images for each seed + const seeds = params.seeds || [99]; + const imageUrls = ["upload://test123", "upload://test456"]; + + upload.create("test_image1.png", "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="); + upload.create("test_image2.png", "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="); + + chain.setCustomRaw(`![${params.prompt}](${imageUrls[0]}) ![${params.prompt}](${imageUrls[1]})`); + return { seed: 99 }; + } + JS + created_by_id: admin.id, + enabled: true, + is_image_generation_tool: true, + ) + end before do enable_current_plugin @@ -27,31 +73,13 @@ describe "#process" do it "can generate correct info" do - _post = Fabricate(:post) - - SiteSetting.ai_stability_api_url = "https://api.stability.dev" - SiteSetting.ai_stability_api_key = "abc" - - image = - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==" - - artifacts = [{ base64: image, seed: 99 }] - - WebMock - .stub_request( - :post, - "https://api.stability.dev/v1/generation/#{SiteSetting.ai_stability_engine}/text-to-image", - ) - .with do |request| - json = JSON.parse(request.body, symbolize_names: true) - expect(prompts).to include(json[:text_prompts][0][:text]) - true - end - .to_return(status: 200, body: { artifacts: artifacts }.to_json) - info = tool.invoke(&progress_blk).to_json - expect(JSON.parse(info)).to eq("prompts" => ["a pink cow", "a red cow"], "seeds" => [99, 99]) + # Custom tools don't provide seeds, so they will be nil + expect(JSON.parse(info)).to eq( + "prompts" => ["a pink cow", "a red cow"], + "seeds" => [nil, nil], + ) expect(tool.custom_raw).to include("upload://") expect(tool.custom_raw).to include("[grid]") expect(tool.custom_raw).to include("a pink cow") diff --git a/plugins/discourse-ai/spec/shared/inference/stability_generator_spec.rb b/plugins/discourse-ai/spec/shared/inference/stability_generator_spec.rb deleted file mode 100644 index a8d334b8888d9..0000000000000 --- a/plugins/discourse-ai/spec/shared/inference/stability_generator_spec.rb +++ /dev/null @@ -1,81 +0,0 @@ -# frozen_string_literal: true - -describe DiscourseAi::Inference::StabilityGenerator do - def gen(prompt) - DiscourseAi::Inference::StabilityGenerator.perform!(prompt) - end - - let :sd3_response do - { image: "BASE64", seed: 1 }.to_json - end - - before { enable_current_plugin } - - it "is able to generate sd3 images" do - SiteSetting.ai_stability_engine = "sd3" - SiteSetting.ai_stability_api_url = "http://www.a.b.c" - SiteSetting.ai_stability_api_key = "123" - - # webmock does not support multipart form data :( - stub_request(:post, "http://www.a.b.c/v2beta/stable-image/generate/sd3").with( - headers: { - "Accept" => "application/json", - "Authorization" => "Bearer 123", - "Content-Type" => "multipart/form-data", - "Host" => "www.a.b.c", - "User-Agent" => DiscourseAi::AiBot::USER_AGENT, - }, - ).to_return(status: 200, body: sd3_response, headers: {}) - - json = - DiscourseAi::Inference::StabilityGenerator.perform!( - "a cow", - aspect_ratio: "16:9", - image_count: 2, - ) - - expect(json).to eq(artifacts: [{ base64: "BASE64", seed: 1 }, { base64: "BASE64", seed: 1 }]) - end - - it "sets dimensions to 512x512 for non XL model" do - SiteSetting.ai_stability_engine = "stable-diffusion-v1-5" - SiteSetting.ai_stability_api_url = "http://www.a.b.c" - SiteSetting.ai_stability_api_key = "123" - - stub_request(:post, "http://www.a.b.c/v1/generation/stable-diffusion-v1-5/text-to-image") - .with do |request| - json = JSON.parse(request.body) - expect(json["text_prompts"][0]["text"]).to eq("a cow") - expect(json["width"]).to eq(512) - expect(json["height"]).to eq(512) - expect(request.headers["Authorization"]).to eq("Bearer 123") - expect(request.headers["Content-Type"]).to eq("application/json") - true - end - .to_return(status: 200, body: "{}", headers: {}) - - gen("a cow") - end - - it "sets dimensions to 1024x1024 for XL model" do - SiteSetting.ai_stability_engine = "stable-diffusion-xl-1024-v1-0" - SiteSetting.ai_stability_api_url = "http://www.a.b.c" - SiteSetting.ai_stability_api_key = "123" - stub_request( - :post, - "http://www.a.b.c/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image", - ) - .with do |request| - json = JSON.parse(request.body) - expect(json["text_prompts"][0]["text"]).to eq("a cow") - expect(json["width"]).to eq(1024) - expect(json["height"]).to eq(1024) - expect(request.headers["Authorization"]).to eq("Bearer 123") - expect(request.headers["Content-Type"]).to eq("application/json") - true - end - .to_return(status: 200, body: "{}", headers: {}) - - gen("a cow") - end -end