Add score calculation (#262)

tremend-cofe · Jul 22, 2023 · f6a1dca · f6a1dca
1 parent daec039
commit f6a1dca
Show file tree

Hide file tree

Showing 8 changed files with 99 additions and 28 deletions.
diff --git a/decidim-ai/lib/decidim/ai.rb b/decidim-ai/lib/decidim/ai.rb
@@ -16,6 +16,13 @@ module SpamContent
 
     include ActiveSupport::Configurable
 
+    # You can configure the spam treshold for the spam detection service.
+    # The treshold is a float value between 0 and 1.
+    # The default value is 0.5
+    # Any value below the treshold will be considered spam.
+    config_accessor :spam_treshold do
+      0.5
+    end
     # Registered analyzers.
     # You can register your own analyzer by adding a new entry to this array.
     # The entry must be a hash with the following keys:

diff --git a/decidim-ai/lib/decidim/ai/spam_content/base_strategy.rb b/decidim-ai/lib/decidim/ai/spam_content/base_strategy.rb
@@ -4,7 +4,10 @@ module Decidim
   module Ai
     module SpamContent
       class BaseStrategy
+        attr_reader :name
+
         def initialize(options = {})
+          @name = options.delete(:name)
           @options = options
         end
 
@@ -13,6 +16,10 @@ def classify(_content); end
         def train(_classification, _content); end
 
         def untrain(_classification, _content); end
+
+        def log; end
+
+        def score; end
       end
     end
   end

diff --git a/decidim-ai/lib/decidim/ai/spam_content/bayes_strategy.rb b/decidim-ai/lib/decidim/ai/spam_content/bayes_strategy.rb
@@ -7,19 +7,41 @@ module Ai
     module SpamContent
       class BayesStrategy < BaseStrategy
         def initialize(options = {})
-          @options = { adapter: :memory, params: {} }.deep_merge(options)
-          @backend = ClassifierReborn::Bayes.new :spam, :ham, backend: configured_backend
+          super
+          @options = { adapter: :memory, categories: %w(ham spam), params: {} }.deep_merge(options)
+
+          @available_categories = options[:categories]
+          @backend = ClassifierReborn::Bayes.new(*available_categories, backend: configured_backend)
         end
 
-        delegate :train, :untrain, :classify, to: :backend
+        delegate :train, :untrain, to: :backend
 
         def log
-          "The Classification engine marked this as ..."
+          return unless category
+
+          "The Classification engine marked this as #{category}"
+        end
+
+        def classify(content)
+          @category, @internal_score = backend.classify_with_score(content)
+          category
+        end
+
+        # The Bayes strategy returns a score between that can be lower than -1
+        # As per ClassifierReborn documentation, closest to 0 is being picked as the dominant category
+        #
+        # From original documentation:
+        #   Returns the scores in each category the provided +text+. E.g.,
+        #     b.classifications "I hate bad words and you"
+        #       =>  {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
+        #   The largest of these scores (the one closest to 0) is the one picked out by #classify
+        def score
+          category.presence == "Spam" ? 1 : 0
         end
 
         private
 
-        attr_reader :backend, :options
+        attr_reader :backend, :options, :available_categories, :category, :internal_score
 
         def configured_backend
           if options[:adapter] == :redis

diff --git a/decidim-ai/lib/decidim/ai/spam_detection_service.rb b/decidim-ai/lib/decidim/ai/spam_detection_service.rb
@@ -8,26 +8,30 @@ def initialize
       end
 
       def train(category, text)
-        @registry.each do |_name, strategy|
+        @registry.each do |strategy|
           strategy.train(category, text)
         end
       end
 
       def classify(text)
-        @registry.each do |_name, strategy|
+        @registry.each do |strategy|
           strategy.classify(text)
         end
       end
 
       def untrain(category, text)
-        @registry.each do |_name, strategy|
+        @registry.each do |strategy|
           strategy.untrain(category, text)
         end
       end
 
+      def score
+        @registry.collect(&:score).inject(0.0, :+) / @registry.size
+      end
+
       def classification_log
         @classification_log = []
-        @registry.each do |_name, strategy|
+        @registry.each do |strategy|
           @classification_log << strategy.log
         end
         @classification_log.join("\n")

diff --git a/decidim-ai/lib/decidim/ai/strategy_registry.rb b/decidim-ai/lib/decidim/ai/strategy_registry.rb
@@ -5,31 +5,27 @@ module Ai
     class StrategyRegistry
       class StrategyAlreadyRegistered < StandardError; end
 
+      delegate :clear, :collect, :each, :size, to: :strategies
+      attr_reader :strategies
+
+      def initialize
+        @strategies = []
+      end
+
       def register_analyzer(name:, strategy:, options: {})
-        if strategies[name].present?
+        if self.for(name).present?
           raise(
             StrategyAlreadyRegistered,
             "There is a stategy already registered with the name `:#{name}`"
           )
         end
 
-        strategies[name] = strategy.new(options)
+        options = { name: }.merge(options)
+        strategies << strategy.new(options)
       end
 
       def for(name)
-        strategies[name]
-      end
-
-      def all
-        strategies
-      end
-
-      delegate :empty?, :size, :each, :clear, to: :strategies
-
-      private
-
-      def strategies
-        @strategies ||= {}
+        strategies.select { |k, _v| k.name == name }.first
       end
     end
   end

diff --git a/decidim-ai/spec/lib/decidim/ai/spam_content/bayes_strategy_spec.rb b/decidim-ai/spec/lib/decidim/ai/spam_content/bayes_strategy_spec.rb
@@ -25,15 +25,41 @@
 
   describe "classify" do
     it "calls backend.classify" do
-      expect(subject.send(:backend)).to receive(:classify).with("text")
+      expect(subject.send(:backend)).to receive(:classify_with_score).with("text")
 
       subject.classify("text")
     end
   end
 
   describe "log" do
     it "returns a log" do
-      expect(subject.log).to eq("The Classification engine marked this as ...")
+      expect(subject.log).to be_nil
+    end
+
+    context "when category is spam" do
+      it "returns a log" do
+        allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Spam", -12.6997])
+        subject.classify("text")
+        expect(subject.log).to eq("The Classification engine marked this as Spam")
+      end
+    end
+  end
+
+  describe "score" do
+    it "returns a score" do
+      expect(subject.score).to eq(0)
+    end
+
+    it "returns 0 when is ham" do
+      allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Ham", -12.6997])
+      subject.classify("text")
+      expect(subject.score).to eq(0)
+    end
+
+    it "returns 1 when is spam" do
+      allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Spam", -12.6997])
+      subject.classify("text")
+      expect(subject.score).to eq(1)
     end
   end
 end
diff --git a/decidim-ai/spec/lib/decidim/ai/spam_detection_service_spec.rb b/decidim-ai/spec/lib/decidim/ai/spam_detection_service_spec.rb
@@ -50,4 +50,13 @@
       expect(subject.classification_log).to eq("base log\ndummy log")
     end
   end
+
+  describe "score" do
+    it "returns the average score of all strategies" do
+      allow(registry.for(:base)).to receive(:score).and_return(1)
+      allow(registry.for(:dummy)).to receive(:score).and_return(0)
+
+      expect(subject.score).to eq(0.5)
+    end
+  end
 end
diff --git a/decidim-ai/spec/lib/decidim/ai/strategy_registry_spec.rb b/decidim-ai/spec/lib/decidim/ai/strategy_registry_spec.rb
@@ -36,8 +36,8 @@ module Ai
         it "returns all content blocks" do
           subject.register_analyzer(**analyzer)
 
-          expect(subject.all).to be_a(Hash)
-          expect(subject.all.size).to be(1)
+          expect(subject.strategies).to be_a(Array)
+          expect(subject.size).to be(1)
         end
       end
     end