diff --git a/lib/discourse_chatbot/embedding_process.rb b/lib/discourse_chatbot/embedding_process.rb index 510db20..0da7835 100644 --- a/lib/discourse_chatbot/embedding_process.rb +++ b/lib/discourse_chatbot/embedding_process.rb @@ -28,10 +28,33 @@ def upsert(id) raise "Overwrite me!" end - def get_embedding_from_api(id) + def get_embedding(id) raise "Overwrite me!" end + def get_embedding_from_api(text) + begin + self.setup_api + + response = @client.embeddings( + parameters: { + model: @model_name, + input: text + } + ) + + if response.dig("error") + error_text = response.dig("error", "message") + raise StandardError, error_text + end + rescue StandardError => e + Rails.logger.error("Chatbot: Error occurred while attempting to retrieve Embedding for post id '#{post_id}' in topic id '#{topic.id}': #{e.message}") + raise e + end + + embedding_vector = response.dig("data", 0, "embedding") + end + def semantic_search(query) raise "Overwrite me!" diff --git a/lib/discourse_chatbot/post/post_embedding_process.rb b/lib/discourse_chatbot/post/post_embedding_process.rb index 65c7e00..bfa316a 100644 --- a/lib/discourse_chatbot/post/post_embedding_process.rb +++ b/lib/discourse_chatbot/post/post_embedding_process.rb @@ -9,7 +9,7 @@ def upsert(post_id) if in_scope(post_id) if !is_valid(post_id) - embedding_vector = get_embedding_from_api(post_id) + embedding_vector = get_embedding(post_id) ::DiscourseChatbot::PostEmbedding.upsert({ post_id: post_id, model: SiteSetting.chatbot_open_ai_embeddings_model, embedding: "#{embedding_vector}" }, on_duplicate: :update, unique_by: :post_id) @@ -32,32 +32,13 @@ def upsert(post_id) end end - def get_embedding_from_api(post_id) - begin - self.setup_api - - post = ::Post.find_by(id: post_id) - topic = ::Topic.find_by(id: post.topic_id) - response = @client.embeddings( - parameters: { - model: @model_name, - input: post.raw[0..SiteSetting.chatbot_open_ai_embeddings_char_limit] - } - ) - - if response.dig("error") - error_text = response.dig("error", "message") - raise StandardError, error_text - end - rescue StandardError => e - Rails.logger.error("Chatbot: Error occurred while attempting to retrieve Embedding for post id '#{post_id}' in topic id '#{topic.id}': #{e.message}") - raise e - end + def get_embedding(post_id) + post = ::Post.find_by(id: post_id) + text = post.raw[0..SiteSetting.chatbot_open_ai_embeddings_char_limit] - embedding_vector = response.dig("data", 0, "embedding") + get_embedding_from_api(text) end - def semantic_search(query) self.setup_api @@ -157,9 +138,11 @@ def in_scope(post_id) end def is_valid(post_id) + post = ::Post.find_by(id: post_id) embedding_record = ::DiscourseChatbot::PostEmbedding.find_by(post_id: post_id) return false if !embedding_record.present? return false if embedding_record.model != SiteSetting.chatbot_open_ai_embeddings_model + return false if post.updated_at > embedding_record.updated_at true end diff --git a/lib/discourse_chatbot/topic/topic_title_embedding_process.rb b/lib/discourse_chatbot/topic/topic_title_embedding_process.rb index da72e01..907f7b8 100644 --- a/lib/discourse_chatbot/topic/topic_title_embedding_process.rb +++ b/lib/discourse_chatbot/topic/topic_title_embedding_process.rb @@ -9,7 +9,7 @@ def upsert(topic_id) if in_scope(topic_id) if !is_valid(topic_id) - embedding_vector = get_embedding_from_api(topic_id) + embedding_vector = get_embedding(topic_id) ::DiscourseChatbot::TopicTitleEmbedding.upsert({ topic_id: topic_id, model: SiteSetting.chatbot_open_ai_embeddings_model, embedding: "#{embedding_vector}" }, on_duplicate: :update, unique_by: :topic_id) @@ -32,28 +32,10 @@ def upsert(topic_id) end end - def get_embedding_from_api(topic_id) - begin - self.setup_api - - topic = ::Topic.find_by(id: topic_id) - response = @client.embeddings( - parameters: { - model: @model_name, - input: topic.title - } - ) - - if response.dig("error") - error_text = response.dig("error", "message") - raise StandardError, error_text - end - rescue StandardError => e - Rails.logger.error("Chatbot: Error occurred while attempting to retrieve Embedding for topic id '#{topic_id}': #{e.message}") - raise e - end + def get_embedding(topic_id) + topic = ::Topic.find_by(id: topic_id) - embedding_vector = response.dig("data", 0, "embedding") + get_embedding_from_api(topic.title) end @@ -138,9 +120,11 @@ def in_scope(topic_id) end def is_valid(topic_id) + topic = ::Topic.find_by(id: topic_id) embedding_record = ::DiscourseChatbot::TopicTitleEmbedding.find_by(topic_id: topic_id) return false if !embedding_record.present? return false if embedding_record.model != SiteSetting.chatbot_open_ai_embeddings_model + return false if topic.updated_at > embedding_record.updated_at true end diff --git a/plugin.rb b/plugin.rb index 7492b82..b10bf68 100644 --- a/plugin.rb +++ b/plugin.rb @@ -1,12 +1,10 @@ # frozen_string_literal: true # name: discourse-chatbot # about: a plugin that allows you to have a conversation with a configurable chatbot in Discourse Chat, Topics and Private Messages -# version: 1.5.8 +# version: 1.5.9 # authors: merefield # url: https://github.com/merefield/discourse-chatbot -gem 'mime-types-data', '3.2025.0610', { require: false } -gem 'mime-types', '3.7.0', { require: false } gem 'multipart-post', '2.4.0', { require: false } gem 'faraday-multipart', '1.0.4', { require: false } gem 'event_stream_parser', '1.0.0', { require: false } diff --git a/spec/lib/post_embedding_process_spec.rb b/spec/lib/post_embedding_process_spec.rb index c952fff..3c5e5c2 100644 --- a/spec/lib/post_embedding_process_spec.rb +++ b/spec/lib/post_embedding_process_spec.rb @@ -50,9 +50,12 @@ describe 'validity' do it "checks if a post embedding is valid" do SiteSetting.chatbot_open_ai_embeddings_model = "text-embedding-ada-002" + freeze_time(3.days.ago) post = Fabricate(:post) + freeze_time(2.days.ago) post_embedding = ::DiscourseChatbot::PostEmbedding.create!(post_id: post.id, model: "text-embedding-3-small", embedding: "[#{(1..1536).to_a.join(",")}]") expect(subject.is_valid(post.id)).to eq(false) + freeze_time(1.days.ago) post_embedding = ::DiscourseChatbot::PostEmbedding.upsert({post_id: post.id, model: "text-embedding-ada-002", embedding: "[#{(1..1536).to_a.join(",")}]"}, on_duplicate: :update, unique_by: :post_id) expect(subject.is_valid(post.id)).to eq(true) end