From c6db384f4e357957b9ff4c8cdc5cb3ab15fc41dd Mon Sep 17 00:00:00 2001 From: merefield Date: Wed, 18 Jun 2025 15:37:36 +0100 Subject: [PATCH 1/5] REFACTOR: break out embedding api call to separate method --- lib/discourse_chatbot/embedding_process.rb | 25 +++++++++++++++- .../post/post_embedding_process.rb | 29 ++++--------------- .../topic/topic_title_embedding_process.rb | 26 +++-------------- plugin.rb | 2 +- 4 files changed, 34 insertions(+), 48 deletions(-) diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/embedding_process.rb index 510db20..0da7835 100644 --- a/lib/discourse_chatbot/embedding_process.rb +++ b/lib/discourse_chatbot/embedding_process.rb @@ -28,10 +28,33 @@ def upsert(id) raise "Overwrite me!" end - def get_embedding_from_api(id) + def get_embedding(id) raise "Overwrite me!" end + def get_embedding_from_api(text) + begin + self.setup_api + + response = @client.embeddings( + parameters: { + model: @model_name, + input: text + } + ) + + if response.dig("error") + error_text = response.dig("error", "message") + raise StandardError, error_text + end + rescue StandardError => e + Rails.logger.error("Chatbot: Error occurred while attempting to retrieve Embedding for post id '#{post_id}' in topic id '#{topic.id}': #{e.message}") + raise e + end + + embedding_vector = response.dig("data", 0, "embedding") + end + def semantic_search(query) raise "Overwrite me!" diff --git a/lib/discourse_chatbot/post/post_embedding_process.rb b/lib/discourse_chatbot/post/post_embedding_process.rb index 65c7e00..3dc8fd0 100644 --- a/lib/discourse_chatbot/post/post_embedding_process.rb +++ b/lib/discourse_chatbot/post/post_embedding_process.rb @@ -9,7 +9,7 @@ def upsert(post_id) if in_scope(post_id) if !is_valid(post_id) - embedding_vector = get_embedding_from_api(post_id) + embedding_vector = get_embedding(post_id) ::DiscourseChatbot::PostEmbedding.upsert({ post_id: post_id, model: SiteSetting.chatbot_open_ai_embeddings_model, embedding: "#{embedding_vector}" }, on_duplicate: :update, unique_by: :post_id) @@ -32,32 +32,13 @@ def upsert(post_id) end end - def get_embedding_from_api(post_id) - begin - self.setup_api - - post = ::Post.find_by(id: post_id) - topic = ::Topic.find_by(id: post.topic_id) - response = @client.embeddings( - parameters: { - model: @model_name, - input: post.raw[0..SiteSetting.chatbot_open_ai_embeddings_char_limit] - } - ) - - if response.dig("error") - error_text = response.dig("error", "message") - raise StandardError, error_text - end - rescue StandardError => e - Rails.logger.error("Chatbot: Error occurred while attempting to retrieve Embedding for post id '#{post_id}' in topic id '#{topic.id}': #{e.message}") - raise e - end + def get_embedding(post_id) + post = ::Post.find_by(id: post_id) + text = post.raw[0..SiteSetting.chatbot_open_ai_embeddings_char_limit] - embedding_vector = response.dig("data", 0, "embedding") + get_embedding_from_api(text) end - def semantic_search(query) self.setup_api diff --git a/lib/discourse_chatbot/topic/topic_title_embedding_process.rb b/lib/discourse_chatbot/topic/topic_title_embedding_process.rb index da72e01..129ab8f 100644 --- a/lib/discourse_chatbot/topic/topic_title_embedding_process.rb +++ b/lib/discourse_chatbot/topic/topic_title_embedding_process.rb @@ -9,7 +9,7 @@ def upsert(topic_id) if in_scope(topic_id) if !is_valid(topic_id) - embedding_vector = get_embedding_from_api(topic_id) + embedding_vector = get_embedding(topic_id) ::DiscourseChatbot::TopicTitleEmbedding.upsert({ topic_id: topic_id, model: SiteSetting.chatbot_open_ai_embeddings_model, embedding: "#{embedding_vector}" }, on_duplicate: :update, unique_by: :topic_id) @@ -32,28 +32,10 @@ def upsert(topic_id) end end - def get_embedding_from_api(topic_id) - begin - self.setup_api - - topic = ::Topic.find_by(id: topic_id) - response = @client.embeddings( - parameters: { - model: @model_name, - input: topic.title - } - ) - - if response.dig("error") - error_text = response.dig("error", "message") - raise StandardError, error_text - end - rescue StandardError => e - Rails.logger.error("Chatbot: Error occurred while attempting to retrieve Embedding for topic id '#{topic_id}': #{e.message}") - raise e - end + def get_embedding(topic_id) + topic = ::Topic.find_by(id: topic_id) - embedding_vector = response.dig("data", 0, "embedding") + get_embedding_from_api(topic.title) end diff --git a/plugin.rb b/plugin.rb index 7492b82..4bc74ce 100644 --- a/plugin.rb +++ b/plugin.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # name: discourse-chatbot # about: a plugin that allows you to have a conversation with a configurable chatbot in Discourse Chat, Topics and Private Messages -# version: 1.5.8 +# version: 1.5.9 # authors: merefield # url: https://github.com/merefield/discourse-chatbot From f91a1ac28952bd9114239d1f360a2ca18ab66bd4 Mon Sep 17 00:00:00 2001 From: merefield Date: Wed, 18 Jun 2025 16:04:16 +0100 Subject: [PATCH 2/5] update mime data gem --- plugin.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin.rb b/plugin.rb index 4bc74ce..3ea2ac8 100644 --- a/plugin.rb +++ b/plugin.rb @@ -5,7 +5,7 @@ # authors: merefield # url: https://github.com/merefield/discourse-chatbot -gem 'mime-types-data', '3.2025.0610', { require: false } +gem 'mime-types-data', '3.2025.0617', { require: false } gem 'mime-types', '3.7.0', { require: false } gem 'multipart-post', '2.4.0', { require: false } gem 'faraday-multipart', '1.0.4', { require: false } From e5e6abdf9bee5c699368ece57c8f5255d08b6fc3 Mon Sep 17 00:00:00 2001 From: merefield Date: Wed, 18 Jun 2025 20:51:09 +0100 Subject: [PATCH 3/5] FIX: embeddings not being updated for outdated posts and toipcs --- lib/discourse_chatbot/post/post_embedding_process.rb | 2 ++ lib/discourse_chatbot/topic/topic_title_embedding_process.rb | 2 ++ plugin.rb | 2 -- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/discourse_chatbot/post/post_embedding_process.rb b/lib/discourse_chatbot/post/post_embedding_process.rb index 3dc8fd0..bfa316a 100644 --- a/lib/discourse_chatbot/post/post_embedding_process.rb +++ b/lib/discourse_chatbot/post/post_embedding_process.rb @@ -138,9 +138,11 @@ def in_scope(post_id) end def is_valid(post_id) + post = ::Post.find_by(id: post_id) embedding_record = ::DiscourseChatbot::PostEmbedding.find_by(post_id: post_id) return false if !embedding_record.present? return false if embedding_record.model != SiteSetting.chatbot_open_ai_embeddings_model + return false if post.updated_at > embedding_record.updated_at true end diff --git a/lib/discourse_chatbot/topic/topic_title_embedding_process.rb b/lib/discourse_chatbot/topic/topic_title_embedding_process.rb index 129ab8f..907f7b8 100644 --- a/lib/discourse_chatbot/topic/topic_title_embedding_process.rb +++ b/lib/discourse_chatbot/topic/topic_title_embedding_process.rb @@ -120,9 +120,11 @@ def in_scope(topic_id) end def is_valid(topic_id) + topic = ::Topic.find_by(id: topic_id) embedding_record = ::DiscourseChatbot::TopicTitleEmbedding.find_by(topic_id: topic_id) return false if !embedding_record.present? return false if embedding_record.model != SiteSetting.chatbot_open_ai_embeddings_model + return false if topic.updated_at > embedding_record.updated_at true end diff --git a/plugin.rb b/plugin.rb index 3ea2ac8..b10bf68 100644 --- a/plugin.rb +++ b/plugin.rb @@ -5,8 +5,6 @@ # authors: merefield # url: https://github.com/merefield/discourse-chatbot -gem 'mime-types-data', '3.2025.0617', { require: false } -gem 'mime-types', '3.7.0', { require: false } gem 'multipart-post', '2.4.0', { require: false } gem 'faraday-multipart', '1.0.4', { require: false } gem 'event_stream_parser', '1.0.0', { require: false } From a84411a8bd347eda1842ff4f54df8b808a3d8e05 Mon Sep 17 00:00:00 2001 From: merefield Date: Wed, 18 Jun 2025 22:44:45 +0100 Subject: [PATCH 4/5] FIX: updated test --- spec/lib/post_embedding_process_spec.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spec/lib/post_embedding_process_spec.rb b/spec/lib/post_embedding_process_spec.rb index c952fff..108748d 100644 --- a/spec/lib/post_embedding_process_spec.rb +++ b/spec/lib/post_embedding_process_spec.rb @@ -50,9 +50,14 @@ describe 'validity' do it "checks if a post embedding is valid" do SiteSetting.chatbot_open_ai_embeddings_model = "text-embedding-ada-002" + freeze_time(3.days.ago) post = Fabricate(:post) + pp post.updated_at + freeze_time(2.days.ago) post_embedding = ::DiscourseChatbot::PostEmbedding.create!(post_id: post.id, model: "text-embedding-3-small", embedding: "[#{(1..1536).to_a.join(",")}]") + pp post_embedding.updated_at expect(subject.is_valid(post.id)).to eq(false) + freeze_time(1.days.ago) post_embedding = ::DiscourseChatbot::PostEmbedding.upsert({post_id: post.id, model: "text-embedding-ada-002", embedding: "[#{(1..1536).to_a.join(",")}]"}, on_duplicate: :update, unique_by: :post_id) expect(subject.is_valid(post.id)).to eq(true) end From 48e86fdd3dc4e18f61b4c84f089c9b07065d9768 Mon Sep 17 00:00:00 2001 From: merefield Date: Wed, 18 Jun 2025 22:48:23 +0100 Subject: [PATCH 5/5] remove console prints --- spec/lib/post_embedding_process_spec.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/spec/lib/post_embedding_process_spec.rb b/spec/lib/post_embedding_process_spec.rb index 108748d..3c5e5c2 100644 --- a/spec/lib/post_embedding_process_spec.rb +++ b/spec/lib/post_embedding_process_spec.rb @@ -52,10 +52,8 @@ SiteSetting.chatbot_open_ai_embeddings_model = "text-embedding-ada-002" freeze_time(3.days.ago) post = Fabricate(:post) - pp post.updated_at freeze_time(2.days.ago) post_embedding = ::DiscourseChatbot::PostEmbedding.create!(post_id: post.id, model: "text-embedding-3-small", embedding: "[#{(1..1536).to_a.join(",")}]") - pp post_embedding.updated_at expect(subject.is_valid(post.id)).to eq(false) freeze_time(1.days.ago) post_embedding = ::DiscourseChatbot::PostEmbedding.upsert({post_id: post.id, model: "text-embedding-ada-002", embedding: "[#{(1..1536).to_a.join(",")}]"}, on_duplicate: :update, unique_by: :post_id)