Skip to content

Commit

Permalink
Add score calculation (#262)
Browse files Browse the repository at this point in the history
  • Loading branch information
alecslupu committed Jul 22, 2023
1 parent daec039 commit f6a1dca
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 28 deletions.
7 changes: 7 additions & 0 deletions decidim-ai/lib/decidim/ai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ module SpamContent

include ActiveSupport::Configurable

# You can configure the spam treshold for the spam detection service.
# The treshold is a float value between 0 and 1.
# The default value is 0.5
# Any value below the treshold will be considered spam.
config_accessor :spam_treshold do
0.5
end
# Registered analyzers.
# You can register your own analyzer by adding a new entry to this array.
# The entry must be a hash with the following keys:
Expand Down
7 changes: 7 additions & 0 deletions decidim-ai/lib/decidim/ai/spam_content/base_strategy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ module Decidim
module Ai
module SpamContent
class BaseStrategy
attr_reader :name

def initialize(options = {})
@name = options.delete(:name)
@options = options
end

Expand All @@ -13,6 +16,10 @@ def classify(_content); end
def train(_classification, _content); end

def untrain(_classification, _content); end

def log; end

def score; end
end
end
end
Expand Down
32 changes: 27 additions & 5 deletions decidim-ai/lib/decidim/ai/spam_content/bayes_strategy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,41 @@ module Ai
module SpamContent
class BayesStrategy < BaseStrategy
def initialize(options = {})
@options = { adapter: :memory, params: {} }.deep_merge(options)
@backend = ClassifierReborn::Bayes.new :spam, :ham, backend: configured_backend
super
@options = { adapter: :memory, categories: %w(ham spam), params: {} }.deep_merge(options)

@available_categories = options[:categories]
@backend = ClassifierReborn::Bayes.new(*available_categories, backend: configured_backend)
end

delegate :train, :untrain, :classify, to: :backend
delegate :train, :untrain, to: :backend

def log
"The Classification engine marked this as ..."
return unless category

"The Classification engine marked this as #{category}"
end

def classify(content)
@category, @internal_score = backend.classify_with_score(content)
category
end

# The Bayes strategy returns a score between that can be lower than -1
# As per ClassifierReborn documentation, closest to 0 is being picked as the dominant category
#
# From original documentation:
# Returns the scores in each category the provided +text+. E.g.,
# b.classifications "I hate bad words and you"
# => {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
# The largest of these scores (the one closest to 0) is the one picked out by #classify
def score
category.presence == "Spam" ? 1 : 0
end

private

attr_reader :backend, :options
attr_reader :backend, :options, :available_categories, :category, :internal_score

def configured_backend
if options[:adapter] == :redis
Expand Down
12 changes: 8 additions & 4 deletions decidim-ai/lib/decidim/ai/spam_detection_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,30 @@ def initialize
end

def train(category, text)
@registry.each do |_name, strategy|
@registry.each do |strategy|
strategy.train(category, text)
end
end

def classify(text)
@registry.each do |_name, strategy|
@registry.each do |strategy|
strategy.classify(text)
end
end

def untrain(category, text)
@registry.each do |_name, strategy|
@registry.each do |strategy|
strategy.untrain(category, text)
end
end

def score
@registry.collect(&:score).inject(0.0, :+) / @registry.size
end

def classification_log
@classification_log = []
@registry.each do |_name, strategy|
@registry.each do |strategy|
@classification_log << strategy.log
end
@classification_log.join("\n")
Expand Down
26 changes: 11 additions & 15 deletions decidim-ai/lib/decidim/ai/strategy_registry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,27 @@ module Ai
class StrategyRegistry
class StrategyAlreadyRegistered < StandardError; end

delegate :clear, :collect, :each, :size, to: :strategies
attr_reader :strategies

def initialize
@strategies = []
end

def register_analyzer(name:, strategy:, options: {})
if strategies[name].present?
if self.for(name).present?
raise(
StrategyAlreadyRegistered,
"There is a stategy already registered with the name `:#{name}`"
)
end

strategies[name] = strategy.new(options)
options = { name: }.merge(options)
strategies << strategy.new(options)
end

def for(name)
strategies[name]
end

def all
strategies
end

delegate :empty?, :size, :each, :clear, to: :strategies

private

def strategies
@strategies ||= {}
strategies.select { |k, _v| k.name == name }.first
end
end
end
Expand Down
30 changes: 28 additions & 2 deletions decidim-ai/spec/lib/decidim/ai/spam_content/bayes_strategy_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,41 @@

describe "classify" do
it "calls backend.classify" do
expect(subject.send(:backend)).to receive(:classify).with("text")
expect(subject.send(:backend)).to receive(:classify_with_score).with("text")

subject.classify("text")
end
end

describe "log" do
it "returns a log" do
expect(subject.log).to eq("The Classification engine marked this as ...")
expect(subject.log).to be_nil
end

context "when category is spam" do
it "returns a log" do
allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Spam", -12.6997])
subject.classify("text")
expect(subject.log).to eq("The Classification engine marked this as Spam")
end
end
end

describe "score" do
it "returns a score" do
expect(subject.score).to eq(0)
end

it "returns 0 when is ham" do
allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Ham", -12.6997])
subject.classify("text")
expect(subject.score).to eq(0)
end

it "returns 1 when is spam" do
allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Spam", -12.6997])
subject.classify("text")
expect(subject.score).to eq(1)
end
end
end
9 changes: 9 additions & 0 deletions decidim-ai/spec/lib/decidim/ai/spam_detection_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,13 @@
expect(subject.classification_log).to eq("base log\ndummy log")
end
end

describe "score" do
it "returns the average score of all strategies" do
allow(registry.for(:base)).to receive(:score).and_return(1)
allow(registry.for(:dummy)).to receive(:score).and_return(0)

expect(subject.score).to eq(0.5)
end
end
end
4 changes: 2 additions & 2 deletions decidim-ai/spec/lib/decidim/ai/strategy_registry_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ module Ai
it "returns all content blocks" do
subject.register_analyzer(**analyzer)

expect(subject.all).to be_a(Hash)
expect(subject.all.size).to be(1)
expect(subject.strategies).to be_a(Array)
expect(subject.size).to be(1)
end
end
end
Expand Down

0 comments on commit f6a1dca

Please sign in to comment.