From 5f561c83810939ab4ddff81252aed74f32625f19 Mon Sep 17 00:00:00 2001 From: mathrailsAI Date: Thu, 15 May 2025 01:01:22 -0500 Subject: [PATCH] Add support for question, prompt, and batch size configuration in processing - Enable optional `question` and `prompt` parameters for entity, sentiment, and key phrase extraction. - Improve batch processing by allowing configurable batch sizes for OpenAI client. - Implement default prompts with support for custom overrides. --- Gemfile.lock | 2 +- README.md | 271 +++++++++++++----- .../clients/entities/aws_client.rb | 2 +- .../clients/entities/open_ai_client.rb | 31 +- .../clients/key_phrases/aws_client.rb | 2 +- .../clients/key_phrases/open_ai_client.rb | 38 ++- .../sentiment/aws_comprehend_client.rb | 2 +- .../clients/sentiment/open_ai_client.rb | 122 ++++---- .../clients/sentiment/sentimental_client.rb | 2 +- lib/sentiment_insights/insights/entities.rb | 5 +- .../insights/key_phrases.rb | 6 +- lib/sentiment_insights/insights/sentiment.rb | 6 +- sentiment_insights.gemspec | 4 +- .../sentiment_insights_entities_spec.rb | 16 +- .../sentiment_insights_key_phrases_spec.rb | 12 +- spec/insights/sentiment_spec.rb | 10 +- 16 files changed, 343 insertions(+), 188 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index a79d3b9..eef2106 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,7 @@ PATH remote: . specs: sentiment_insights (0.1.0) - aws-sdk-comprehend + aws-sdk-comprehend (>= 1.98.0) sentimental (~> 1.4.0) GEM diff --git a/README.md b/README.md index 7b94802..c5776b5 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,94 @@ -# SentimentInsights πŸ’¬πŸ“Š +# SentimentInsights -**SentimentInsights** is a Ruby gem that helps you uncover meaningful insights from open-ended survey responses using Natural Language Processing (NLP). It supports multi-provider analysis via OpenAI, AWS Comprehend, or a local fallback engine. +**SentimentInsights** is a Ruby gem for extracting sentiment, key phrases, and named entities from survey responses or free-form textual data. It offers a plug-and-play interface to different NLP providers, including OpenAI and AWS. --- -## ✨ Features +## Table of Contents -### βœ… 1. Sentiment Analysis +- [Installation](#installation) +- [Configuration](#configuration) +- [Usage](#usage) + - [Sentiment Analysis](#sentiment-analysis) + - [Key Phrase Extraction](#key-phrase-extraction) + - [Entity Extraction](#entity-extraction) +- [Provider Options & Custom Prompts](#provider-options--custom-prompts) +- [Full Example](#full-example) +- [Contributing](#contributing) +- [License](#license) + +--- + +## Installation + +Add to your Gemfile: + +```ruby +gem 'sentiment_insights' +``` + +Then install: + +```bash +bundle install +``` + +Or install it directly: + +```bash +gem install sentiment_insights +``` + +--- + +## Configuration + +Configure the provider and (if using OpenAI or AWS) your API key: + +```ruby +require 'sentiment_insights' + +# For OpenAI +SentimentInsights.configure do |config| + config.provider = :openai + config.openai_api_key = ENV["OPENAI_API_KEY"] +end + +# For AWS +SentimentInsights.configure do |config| + config.provider = :aws + config.aws_region = 'us-east-1' +end + +# For sentimental +SentimentInsights.configure do |config| + config.provider = :sentimental +end +``` + +Supported providers: +- `:openai` +- `:aws` +- `:sentimental` (local fallback, limited feature set) + +--- + +## Usage + +Data entries should be hashes with at least an `:answer` key. Optionally include segmentation info under `:segment`. + +```ruby +entries = [ + { answer: "Amazon Checkout was smooth!", segment: { age_group: "18-25", gender: "Female" } }, + { answer: "Walmart Shipping was delayed.", segment: { age_group: "18-25", gender: "Female" } }, + { answer: "Target Support was decent.", segment: { age_group: "26-35", gender: "Male" } }, + { answer: "Loved the product!", segment: { age_group: "18-25", gender: "Male" } } +] +``` + +--- + +### Sentiment Analysis Quickly classify and summarize user responses as positive, neutral, or negative β€” globally or by segment (e.g., age, region). @@ -17,6 +99,34 @@ insight = SentimentInsights::Insights::Sentiment.new result = insight.analyze(entries) ``` +With options: + +```ruby +custom_prompt = <<~PROMPT + For each of the following customer responses, classify the sentiment as Positive, Neutral, or Negative, and assign a score between -1.0 (very negative) and 1.0 (very positive). + + Reply with a numbered list like: + 1. Positive (0.9) + 2. Negative (-0.8) + 3. Neutral (0.0) +PROMPT + +insight = SentimentInsights::Insights::Sentiment.new +result = insight.analyze( + entries, + question: "How was your experience today?", + prompt: custom_prompt, + batch_size: 10 +) +``` + +#### Available Options (`analyze`) +| Option | Type | Description | Provider | +|---------------|---------|------------------------------------------------------------------------|-------------| +| `question` | String | Contextual question for the batch | OpenAI only | +| `prompt` | String | Custom prompt text for LLM | OpenAI only | +| `batch_size` | Integer | Number of entries per OpenAI completion call (default: 50) | OpenAI only | + #### πŸ“Ύ Sample Output ```ruby @@ -59,15 +169,56 @@ result = insight.analyze(entries) :sentiment_score=>0.9}]}} ``` -### βœ… 2. Key Phrase Extraction +--- + +### Key Phrase Extraction Extract frequently mentioned phrases and identify their associated sentiment and segment spread. ```ruby insight = SentimentInsights::Insights::KeyPhrases.new -result = insight.extract(entries, question: question) +result = insight.extract(entries) ``` +With options: + +```ruby +key_phrase_prompt = <<~PROMPT.strip + Extract the most important key phrases that represent the main ideas or feedback in the sentence below. + Ignore stop words and return each key phrase in its natural form, comma-separated. + + Question: %{question} + + Text: %{text} +PROMPT + +sentiment_prompt = <<~PROMPT + For each of the following customer responses, classify the sentiment as Positive, Neutral, or Negative, and assign a score between -1.0 (very negative) and 1.0 (very positive). + + Reply with a numbered list like: + 1. Positive (0.9) + 2. Negative (-0.8) + 3. Neutral (0.0) +PROMPT + +insight = SentimentInsights::Insights::KeyPhrases.new +result = insight.extract( + entries, + question: "What are the recurring themes?", + key_phrase_prompt: key_phrase_prompt, + sentiment_prompt: sentiment_prompt +) +``` + +#### Available Options (`extract`) +| Option | Type | Description | Provider | +|--------------------|---------|------------------------------------------------------------|--------------| +| `question` | String | Context question to help guide phrase extraction | OpenAI only | +| `key_phrase_prompt`| String | Custom prompt for extracting key phrases | OpenAI only | +| `sentiment_prompt` | String | Custom prompt for classifying tone of extracted phrases | OpenAI only | + +#### πŸ“Ύ Sample Output + ```ruby {:phrases=> [{:phrase=>"everlane", @@ -85,15 +236,44 @@ result = insight.extract(entries, question: question) :segment=>{:age=>"25-34", :region=>"West"}}]} ``` -### βœ… 3. Entity Recognition +--- -Identify named entities like organizations, products, and people, and track them by sentiment and segment. +### Entity Extraction ```ruby insight = SentimentInsights::Insights::Entities.new -result = insight.extract(entries, question: question) +result = insight.extract(entries) ``` +With options: + +```ruby +entity_prompt = <<~PROMPT.strip + Identify brand names, competitors, and product references in the sentence below. + Return each as a JSON object with "text" and "type" (e.g., BRAND, PRODUCT, COMPANY). + + Question: %{question} + + Sentence: "%{text}" +PROMPT + +insight = SentimentInsights::Insights::Entities.new +result = insight.extract( + entries, + question: "Which products or brands are mentioned?", + prompt: entity_prompt +) + +``` + +#### Available Options (`extract`) +| Option | Type | Description | Provider | +|-------------|---------|---------------------------------------------------|--------------| +| `question` | String | Context question to guide entity extraction | OpenAI only | +| `prompt` | String | Custom instructions for OpenAI entity extraction | OpenAI only | + +#### πŸ“Ύ Sample Output + ```ruby {:entities=> [{:entity=>"everlane", @@ -126,73 +306,12 @@ result = insight.extract(entries, question: question) "the response was copy-paste and didn't address my issue directly.", :segment=>{:age=>"45-54", :region=>"Midwest"}}]} ``` - -### βœ… 4. Topic Modeling *(Coming Soon)* - -Automatically group similar responses into topics and subthemes. - ---- - -## πŸ”Œ Supported Providers - -| Feature | OpenAI βœ… | AWS Comprehend βœ… | Sentimental (Local) ⚠️ | -| ------------------ | -------------- | ---------------- | ---------------------- | -| Sentiment Analysis | βœ… | βœ… | βœ… | -| Key Phrases | βœ… | βœ… | ❌ Not supported | -| Entities | βœ… | βœ… | ❌ Not supported | -| Topics | πŸ”œ Coming Soon | πŸ”œ Coming Soon | ❌ | - -Legend: βœ… Supported | πŸ”œ Coming Soon | ❌ Not Available | ⚠️ Partial - ---- - -## πŸ“… Example Input - -```ruby -question = "What did you like or dislike about your recent shopping experience with us?" - -entries = [ - { - answer: "I absolutely loved the experience shopping with Everlane. The website is clean,\nproduct descriptions are spot-on, and my jeans arrived two days early with eco-friendly packaging.", - segment: { age: "25-34", region: "West" } - }, - { - answer: "The checkout flow on your site was a nightmare. The promo code from your Instagram campaign didn’t work,\nand it kept redirecting me to the homepage. Shopify integration needs a serious fix.", - segment: { age: "35-44", region: "South" } - }, - { - answer: "Apple Pay made the mobile checkout super fast. I placed an order while waiting for my coffee at Starbucks.\nGreat job optimizing the app UXβ€”this is a game-changer.", - segment: { age: "25-34", region: "West" } - }, - { - answer: "I reached out to your Zendesk support team about a missing package, and while they responded within 24 hours,\nthe response was copy-paste and didn't address my issue directly.", - segment: { age: "45-54", region: "Midwest" } - }, - { - answer: "Shipping delays aside, I really liked the personalized note inside the box. Small gestures like that\nmake the Uniqlo brand stand out. Will definitely recommend to friends.", - segment: { age: "25-34", region: "West" } - } -] -``` - --- -## πŸš€ Quick Start +## Provider Options & Custom Prompts -```ruby -# Install the gem -$ gem install sentiment_insights - -# Configure the provider -SentimentInsights.configure do |config| - config.provider = :openai # or :aws, :sentimental -end - -# Run analysis -insight = SentimentInsights::Insights::Sentiment.new -result = insight.analyze(entries) -puts JSON.pretty_generate(result) -``` +> ⚠️ All advanced options (`question`, `prompt`, `key_phrase_prompt`, `sentiment_prompt`, `batch_size`) apply only to the `:openai` provider. +> They are safely ignored for `:aws` and `:sentimental`. --- @@ -217,7 +336,6 @@ AWS_REGION=us-east-1 ## πŸ’Ž Ruby Compatibility - **Minimum Ruby version:** 2.7 -- Tested on: 2.7, 3.0, 3.1, 3.2 --- @@ -258,8 +376,3 @@ Pull requests welcome! Please open an issue to discuss major changes first. - [AWS Comprehend](https://docs.aws.amazon.com/comprehend/latest/dg/what-is.html) - [Sentimental Gem](https://github.com/7compass/sentimental) ---- - -## πŸ“’ Questions? - -File an issue or reach out on [GitHub](https://github.com/your-repo) diff --git a/lib/sentiment_insights/clients/entities/aws_client.rb b/lib/sentiment_insights/clients/entities/aws_client.rb index 56bb7b2..962a037 100644 --- a/lib/sentiment_insights/clients/entities/aws_client.rb +++ b/lib/sentiment_insights/clients/entities/aws_client.rb @@ -12,7 +12,7 @@ def initialize(region: 'us-east-1') @logger = Logger.new($stdout) end - def extract_batch(entries, question: nil) + def extract_batch(entries, question: nil, prompt: nil) responses = [] entity_map = Hash.new { |h, k| h[k] = [] } diff --git a/lib/sentiment_insights/clients/entities/open_ai_client.rb b/lib/sentiment_insights/clients/entities/open_ai_client.rb index 27e653b..73bfad0 100644 --- a/lib/sentiment_insights/clients/entities/open_ai_client.rb +++ b/lib/sentiment_insights/clients/entities/open_ai_client.rb @@ -17,7 +17,7 @@ def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries @logger = Logger.new($stdout) end - def extract_batch(entries, question: nil) + def extract_batch(entries, question: nil, prompt: nil) responses = [] entity_map = Hash.new { |h, k| h[k] = [] } @@ -26,7 +26,7 @@ def extract_batch(entries, question: nil) next if sentence.empty? response_id = "r_#{index + 1}" - entities = extract_entities_from_sentence(sentence) + entities = extract_entities_from_sentence(sentence, question: question, prompt: prompt) responses << { id: response_id, @@ -35,6 +35,7 @@ def extract_batch(entries, question: nil) } entities.each do |ent| + next if ent[:text].empty? || ent[:type].empty? key = [ent[:text].downcase, ent[:type]] entity_map[key] << response_id end @@ -54,13 +55,29 @@ def extract_batch(entries, question: nil) private - def extract_entities_from_sentence(text) - prompt = <<~PROMPT - Extract named entities from this sentence. Return them as a JSON array with each item having "text" and "type" (e.g., PERSON, ORGANIZATION, LOCATION, PRODUCT). - Sentence: "#{text}" + def extract_entities_from_sentence(text, question: nil, prompt: nil) + # Default prompt with interpolation placeholders + default_prompt = <<~PROMPT + Extract named entities from this sentence based on the question. + Return them as a JSON array with each item having "text" and "type" (e.g., PERSON, ORGANIZATION, LOCATION, PRODUCT). + %{question} + Sentence: "%{text}" PROMPT - body = build_request_body(prompt) + # If a custom prompt is provided, interpolate %{text} and %{question} if present + if prompt + interpolated = prompt.dup + interpolated.gsub!('%{text}', text.to_s) + interpolated.gsub!('%{question}', question.to_s) if question + interpolated.gsub!('{text}', text.to_s) + interpolated.gsub!('{question}', question.to_s) if question + prompt_to_use = interpolated + else + question_line = question ? "Question: #{question}" : "" + prompt_to_use = default_prompt % { question: question_line, text: text } + end + + body = build_request_body(prompt_to_use) response = post_openai(body) begin diff --git a/lib/sentiment_insights/clients/key_phrases/aws_client.rb b/lib/sentiment_insights/clients/key_phrases/aws_client.rb index f0df599..a46fec1 100644 --- a/lib/sentiment_insights/clients/key_phrases/aws_client.rb +++ b/lib/sentiment_insights/clients/key_phrases/aws_client.rb @@ -12,7 +12,7 @@ def initialize(region: 'us-east-1') @logger = Logger.new($stdout) end - def extract_batch(entries, question: nil) + def extract_batch(entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil) responses = [] phrase_map = Hash.new { |h, k| h[k] = [] } diff --git a/lib/sentiment_insights/clients/key_phrases/open_ai_client.rb b/lib/sentiment_insights/clients/key_phrases/open_ai_client.rb index 92adfd4..48d037c 100644 --- a/lib/sentiment_insights/clients/key_phrases/open_ai_client.rb +++ b/lib/sentiment_insights/clients/key_phrases/open_ai_client.rb @@ -8,7 +8,7 @@ module SentimentInsights module Clients module KeyPhrases class OpenAIClient - DEFAULT_MODEL = "gpt-3.5-turbo" + DEFAULT_MODEL = "gpt-3.5-turbo" DEFAULT_RETRIES = 3 def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES) @@ -20,19 +20,19 @@ def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries end # Extract key phrases from entries and enrich with sentiment - def extract_batch(entries, question: nil) + def extract_batch(entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil) responses = [] phrase_map = Hash.new { |h, k| h[k] = [] } # Fetch sentiments in batch from sentiment client - sentiments = @sentiment_client.analyze_entries(entries, question: question) + sentiments = @sentiment_client.analyze_entries(entries, question: question, prompt: sentiment_prompt) entries.each_with_index do |entry, index| sentence = entry[:answer].to_s.strip next if sentence.empty? response_id = "r_#{index + 1}" - phrases = extract_phrases_from_sentence(sentence) + phrases = extract_phrases_from_sentence(sentence, question: question, prompt: key_phrase_prompt) sentiment = sentiments[index] || { label: :neutral } @@ -61,15 +61,33 @@ def extract_batch(entries, question: nil) private - def extract_phrases_from_sentence(text) - prompt = <<~PROMPT - Extract the key phrases from this sentence: - "#{text}" - Return them as a comma-separated list. + def extract_phrases_from_sentence(text, question: nil, prompt: nil) + # Default prompt with interpolation placeholders + default_prompt = <<~PROMPT + Extract the most important key phrases that represent the main ideas or feedback in the sentence below. + Ignore stop words and return each key phrase in its natural form, comma-separated. + %{question} + Sentence: "%{text}" PROMPT - body = build_request_body(prompt) + # If a custom prompt is provided, attempt to interpolate %{text} and %{question} if present + if prompt + interpolated = prompt.dup + interpolated.gsub!('%{text}', text.to_s) + interpolated.gsub!('%{question}', question.to_s) if question + # For compatibility: if β€œ{text}” is used instead of β€œ%{text}” + interpolated.gsub!('{text}', text.to_s) + interpolated.gsub!('{question}', question.to_s) if question + prompt_to_use = interpolated + else + question_line = question ? "Question: #{question}" : "" + prompt_to_use = default_prompt % { question: question_line, text: text } + end + + body = build_request_body(prompt_to_use) response = post_openai(body) + + # The response is expected as a comma- or newline-separated list of key phrases parse_phrases(response) end diff --git a/lib/sentiment_insights/clients/sentiment/aws_comprehend_client.rb b/lib/sentiment_insights/clients/sentiment/aws_comprehend_client.rb index 7840d20..5988c6e 100644 --- a/lib/sentiment_insights/clients/sentiment/aws_comprehend_client.rb +++ b/lib/sentiment_insights/clients/sentiment/aws_comprehend_client.rb @@ -15,7 +15,7 @@ def initialize(region: 'us-east-1') # Analyze a batch of entries using AWS Comprehend. # @param entries [Array] each with :answer key # @return [Array] each with :label (symbol) and :score (float) - def analyze_entries(entries, question: nil) + def analyze_entries(entries, question: nil, prompt: nil, batch_size: nil) results = [] entries.each_slice(MAX_BATCH_SIZE) do |batch| diff --git a/lib/sentiment_insights/clients/sentiment/open_ai_client.rb b/lib/sentiment_insights/clients/sentiment/open_ai_client.rb index e7bde9b..266f0b7 100644 --- a/lib/sentiment_insights/clients/sentiment/open_ai_client.rb +++ b/lib/sentiment_insights/clients/sentiment/open_ai_client.rb @@ -7,7 +7,7 @@ module SentimentInsights module Clients module Sentiment class OpenAIClient - DEFAULT_MODEL = "gpt-3.5-turbo" + DEFAULT_MODEL = "gpt-3.5-turbo" DEFAULT_RETRIES = 3 def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries: DEFAULT_RETRIES, return_scores: true) @@ -18,75 +18,85 @@ def initialize(api_key: ENV['OPENAI_API_KEY'], model: DEFAULT_MODEL, max_retries @logger = Logger.new($stdout) end - def analyze_entries(entries, question: nil) - prompt_content = build_prompt_content(entries, question) - request_body = { - model: @model, - messages: [ - { role: "user", content: prompt_content } - ], - temperature: 0.0 - } - - uri = URI("https://api.openai.com/v1/chat/completions") - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = true - - response_content = nil - attempt = 0 - - while attempt < @max_retries - attempt += 1 - request = Net::HTTP::Post.new(uri) - request["Content-Type"] = "application/json" - request["Authorization"] = "Bearer #{@api_key}" - request.body = JSON.generate(request_body) - - begin - response = http.request(request) - rescue StandardError => e - @logger.error "OpenAI API request error: #{e.class} - #{e.message}" - raise + def analyze_entries(entries, question: nil, prompt: nil, batch_size: 50) + all_sentiments = [] + + entries.each_slice(batch_size) do |batch| + prompt_content = build_prompt_content(batch, question: question, prompt: prompt) + request_body = { + model: @model, + messages: [ + { role: "user", content: prompt_content } + ], + temperature: 0.0 + } + + uri = URI("https://api.openai.com/v1/chat/completions") + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + + response_content = nil + attempt = 0 + + while attempt < @max_retries + attempt += 1 + request = Net::HTTP::Post.new(uri) + request["Content-Type"] = "application/json" + request["Authorization"] = "Bearer #{@api_key}" + request.body = JSON.generate(request_body) + + begin + response = http.request(request) + rescue StandardError => e + @logger.error "OpenAI API request error: #{e.class} - #{e.message}" + raise + end + + status = response.code.to_i + if status == 429 + @logger.warn "Rate limit (HTTP 429) on attempt #{attempt}. Retrying..." + sleep(2 ** (attempt - 1)) + next + elsif status != 200 + @logger.error "Request failed (#{status}): #{response.body}" + raise "OpenAI API Error: #{status}" + else + data = JSON.parse(response.body) + response_content = data.dig("choices", 0, "message", "content") + break + end end - status = response.code.to_i - if status == 429 - @logger.warn "Rate limit (HTTP 429) on attempt #{attempt}. Retrying..." - sleep(2 ** (attempt - 1)) - next - elsif status != 200 - @logger.error "Request failed (#{status}): #{response.body}" - raise "OpenAI API Error: #{status}" - else - data = JSON.parse(response.body) - response_content = data.dig("choices", 0, "message", "content") - break - end + sentiments = parse_sentiments(response_content, batch.size) + all_sentiments.concat(sentiments) end - parse_sentiments(response_content, entries.size) + all_sentiments end private - def build_prompt_content(entries, question) - prompt = "" - prompt << "Question: #{question}\n" if question - prompt << <<~INSTRUCTIONS - For each of the following customer responses, classify the sentiment as Positive, Neutral, or Negative, and assign a score between -1.0 (very negative) and 1.0 (very positive). + def build_prompt_content(entries, question: nil, prompt: nil) + content = "" + content << "Question: #{question}\n\n" if question + + # Use custom instructions or default + instructions = prompt || <<~DEFAULT + For each of the following customer responses, classify the sentiment as Positive, Neutral, or Negative, and assign a score between -1.0 (very negative) and 1.0 (very positive). - Reply with a numbered list like: - 1. Positive (0.9) - 2. Negative (-0.8) - 3. Neutral (0.0) + Reply with a numbered list like: + 1. Positive (0.9) + 2. Negative (-0.8) + 3. Neutral (0.0) + DEFAULT - INSTRUCTIONS + content << instructions.strip + "\n\n" entries.each_with_index do |entry, index| - prompt << "#{index + 1}. \"#{entry[:answer]}\"\n" + content << "#{index + 1}. \"#{entry[:answer]}\"\n" end - prompt + content end def parse_sentiments(content, expected_count) diff --git a/lib/sentiment_insights/clients/sentiment/sentimental_client.rb b/lib/sentiment_insights/clients/sentiment/sentimental_client.rb index 882cae7..755f755 100644 --- a/lib/sentiment_insights/clients/sentiment/sentimental_client.rb +++ b/lib/sentiment_insights/clients/sentiment/sentimental_client.rb @@ -14,7 +14,7 @@ def initialize # @param entries [Array] An array of response hashes (each with :answer). # @param question [String, nil] (unused) Global question context, not needed for local analysis. # @return [Array] An array of hashes with sentiment classification and score for each entry. - def analyze_entries(entries, question: nil) + def analyze_entries(entries, question: nil, prompt: nil, batch_size: nil) puts "Inside sentimental" entries.map do |entry| text = entry[:answer].to_s.strip diff --git a/lib/sentiment_insights/insights/entities.rb b/lib/sentiment_insights/insights/entities.rb index 1718b4a..a6702b3 100644 --- a/lib/sentiment_insights/insights/entities.rb +++ b/lib/sentiment_insights/insights/entities.rb @@ -22,11 +22,10 @@ def initialize(provider: nil, provider_client: nil) # Extract named entities and build summarized output # @param entries [Array] each with :answer and optional :segment # @return [Hash] { entities: [...], responses: [...] } - def extract(entries, question: nil) + def extract(entries, question: nil, prompt: nil) entries = entries.to_a - raw_result = @provider_client.extract_batch(entries, question: question) + raw_result = @provider_client.extract_batch(entries, question: question, prompt: prompt) - puts "raw_result = #{raw_result}" responses = raw_result[:responses] || [] entities = raw_result[:entities] || [] diff --git a/lib/sentiment_insights/insights/key_phrases.rb b/lib/sentiment_insights/insights/key_phrases.rb index f6fc70f..016b236 100644 --- a/lib/sentiment_insights/insights/key_phrases.rb +++ b/lib/sentiment_insights/insights/key_phrases.rb @@ -24,9 +24,9 @@ def initialize(provider: nil, provider_client: nil) # @param entries [Array] each with :answer and optional :segment # @param question [String, nil] optional context # @return [Hash] { phrases: [...], responses: [...] } - def extract(entries, question: nil) + def extract(entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil) entries = entries.to_a - raw_result = @provider_client.extract_batch(entries, question: question) + raw_result = @provider_client.extract_batch(entries, question: question, key_phrase_prompt: key_phrase_prompt, sentiment_prompt: sentiment_prompt) responses = raw_result[:responses] || [] phrases = raw_result[:phrases] || [] @@ -77,4 +77,4 @@ def extract(entries, question: nil) end end end -end +end \ No newline at end of file diff --git a/lib/sentiment_insights/insights/sentiment.rb b/lib/sentiment_insights/insights/sentiment.rb index a30653a..fbe76f8 100644 --- a/lib/sentiment_insights/insights/sentiment.rb +++ b/lib/sentiment_insights/insights/sentiment.rb @@ -1,5 +1,6 @@ require_relative '../clients/sentiment/open_ai_client' require_relative '../clients/sentiment/sentimental_client' +require_relative '../clients/sentiment/aws_comprehend_client' module SentimentInsights module Insights @@ -15,7 +16,6 @@ def initialize(provider: nil, provider_client: nil, top_count: DEFAULT_TOP_COUNT when :openai Clients::Sentiment::OpenAIClient.new when :aws - require_relative '../clients/sentiment/aws_comprehend_client' Clients::Sentiment::AwsComprehendClient.new else Clients::Sentiment::SentimentalClient.new @@ -27,11 +27,11 @@ def initialize(provider: nil, provider_client: nil, top_count: DEFAULT_TOP_COUNT # @param entries [Array] An array of response hashes, each with :answer and :segment. # @param question [String, nil] Optional global question text or metadata for context. # @return [Hash] Summary of sentiment analysis (global, segment-wise, top comments, and annotated responses). - def analyze(entries, question: nil) + def analyze(entries, question: nil, prompt: nil, batch_size: 50) # Ensure entries is an array of hashes with required keys entries = entries.to_a # Get sentiment results for each entry from the provider client - results = @provider_client.analyze_entries(entries, question: question) + results = @provider_client.analyze_entries(entries, question: question, prompt: prompt, batch_size: batch_size) # Combine original entries with sentiment results annotated_responses = entries.each_with_index.map do |entry, idx| diff --git a/sentiment_insights.gemspec b/sentiment_insights.gemspec index 29495eb..22ce7f3 100644 --- a/sentiment_insights.gemspec +++ b/sentiment_insights.gemspec @@ -17,8 +17,6 @@ Gem::Specification.new do |spec| if spec.respond_to?(:metadata) spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/mathrailsAI/sentiment_insights" - spec.metadata["changelog_uri"] = "https://github.com/mathrailsAI/sentiment_insights/blob/main/CHANGELOG.md" - # Removed allowed_push_host β€” usually not needed unless you have a private server else raise "RubyGems 2.0 or newer is required to protect against public gem pushes." end @@ -32,7 +30,7 @@ Gem::Specification.new do |spec| # Runtime dependencies spec.add_dependency "sentimental", "~> 1.4.0" - spec.add_dependency "aws-sdk-comprehend" + spec.add_dependency "aws-sdk-comprehend", ">= 1.98.0" # Development dependencies spec.add_development_dependency "bundler", "~> 2.0" diff --git a/spec/insights/sentiment_insights_entities_spec.rb b/spec/insights/sentiment_insights_entities_spec.rb index 422d055..6b8065f 100644 --- a/spec/insights/sentiment_insights_entities_spec.rb +++ b/spec/insights/sentiment_insights_entities_spec.rb @@ -133,7 +133,7 @@ class AwsClient; end context 'with mocked OpenAI provider' do let(:mock_client) do double('mock_openai_client').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { responses: [ { @@ -232,7 +232,7 @@ class AwsClient; end allow_any_instance_of(described_class).to receive(:puts) # Modify the mock to have an entity appearing in multiple responses - allow(mock_client).to receive(:extract_batch) do |entries, question: nil| + allow(mock_client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { responses: [ { @@ -286,7 +286,7 @@ class AwsClient; end context 'with mocked AWS provider' do let(:mock_client) do double('mock_aws_client').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { responses: [ { @@ -353,7 +353,7 @@ class AwsClient; end let(:mock_client) do double('mock_client').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { responses: [ { @@ -405,7 +405,7 @@ class AwsClient; end context 'with provider returning incomplete data' do let(:mock_client_with_incomplete_data) do double('mock_client_incomplete').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { # Missing responses but has entities entities: [ @@ -442,7 +442,7 @@ class AwsClient; end context 'with provider returning null mention ids' do let(:mock_client_with_null_mentions) do double('mock_client_null_mentions').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { responses: [ { @@ -481,7 +481,7 @@ class AwsClient; end context 'with invalid response IDs in mentions' do let(:mock_client_with_invalid_ids) do double('mock_client_invalid_ids').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { responses: [ { @@ -522,7 +522,7 @@ class AwsClient; end context 'with entities having empty fields' do let(:mock_client_with_empty_fields) do double('mock_client_empty_fields').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, prompt: nil| { responses: [ { diff --git a/spec/insights/sentiment_insights_key_phrases_spec.rb b/spec/insights/sentiment_insights_key_phrases_spec.rb index 7e8185e..0c77508 100644 --- a/spec/insights/sentiment_insights_key_phrases_spec.rb +++ b/spec/insights/sentiment_insights_key_phrases_spec.rb @@ -119,7 +119,7 @@ context 'with mocked OpenAI provider' do let(:mock_client) do double('mock_openai_client').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil| { responses: [ { @@ -199,7 +199,7 @@ context 'with mocked AWS provider' do let(:mock_client) do double('mock_aws_client').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil| { responses: [ { @@ -256,7 +256,7 @@ let(:mock_client) do double('mock_client').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil| { responses: [ { @@ -302,7 +302,7 @@ context 'with provider returning incomplete data' do let(:mock_client_with_incomplete_data) do double('mock_client_incomplete').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil| { # Missing responses but has phrases phrases: [ @@ -335,7 +335,7 @@ context 'with provider returning null mention ids' do let(:mock_client_with_null_mentions) do double('mock_client_null_mentions').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil| { responses: [ { @@ -370,7 +370,7 @@ context 'with responses having no sentiment' do let(:mock_client_no_sentiment) do double('mock_client_no_sentiment').tap do |client| - allow(client).to receive(:extract_batch) do |entries, question: nil| + allow(client).to receive(:extract_batch) do |entries, question: nil, key_phrase_prompt: nil, sentiment_prompt: nil| { responses: [ { diff --git a/spec/insights/sentiment_spec.rb b/spec/insights/sentiment_spec.rb index 49a9895..a437f0f 100644 --- a/spec/insights/sentiment_spec.rb +++ b/spec/insights/sentiment_spec.rb @@ -136,7 +136,7 @@ context 'with mocked OpenAI provider' do let(:mock_client) do double('mock_openai_client').tap do |client| - allow(client).to receive(:analyze_entries) do |entries, question: nil| + allow(client).to receive(:analyze_entries) do |entries, question: nil, prompt: nil, batch_size: nil| entries.map.with_index do |_, i| { label: [:positive, :negative, :neutral][i % 3], score: [0.9, -0.8, 0.1][i % 3] } end @@ -215,7 +215,7 @@ ] custom_mock_client = double('custom_mock_client') - allow(custom_mock_client).to receive(:analyze_entries) do |entries, question: nil| + allow(custom_mock_client).to receive(:analyze_entries) do |entries, question: nil, prompt: nil, batch_size: nil| entries.map.with_index do |_, i| sentiment_type = i % 3 == 0 ? :positive : (i % 3 == 1 ? :negative : :neutral) score = sentiment_type == :positive ? 0.9 - (i * 0.1) : @@ -243,7 +243,7 @@ context 'with mocked AWS provider' do let(:mock_client) do double('mock_aws_client').tap do |client| - allow(client).to receive(:analyze_entries) do |entries, question: nil| + allow(client).to receive(:analyze_entries) do |entries, question: nil, prompt: nil, batch_size: nil| entries.map { { label: :positive, score: 0.9 } } end end @@ -303,7 +303,7 @@ let(:mock_client) do double('mock_client').tap do |client| - allow(client).to receive(:analyze_entries) do |entries, question: nil| + allow(client).to receive(:analyze_entries) do |entries, question: nil, prompt: nil, batch_size: nil| entries.map.with_index do |_, i| { label: i == 0 ? :positive : :negative, score: i == 0 ? 0.9 : -0.8 } end @@ -329,7 +329,7 @@ context 'with missing provider results' do let(:mock_client_with_missing_results) do double('mock_client_missing_results').tap do |client| - allow(client).to receive(:analyze_entries) do |entries, question: nil| + allow(client).to receive(:analyze_entries) do |entries, question: nil, prompt: nil, batch_size: nil| # Return fewer results than entries [{ label: :positive, score: 0.9 }] end