Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add score calculation #262

Merged
merged 1 commit into from
Jul 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions decidim-ai/lib/decidim/ai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ module SpamContent

include ActiveSupport::Configurable

# You can configure the spam treshold for the spam detection service.
# The treshold is a float value between 0 and 1.
# The default value is 0.5
# Any value below the treshold will be considered spam.
config_accessor :spam_treshold do
0.5
end
# Registered analyzers.
# You can register your own analyzer by adding a new entry to this array.
# The entry must be a hash with the following keys:
Expand Down
7 changes: 7 additions & 0 deletions decidim-ai/lib/decidim/ai/spam_content/base_strategy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ module Decidim
module Ai
module SpamContent
class BaseStrategy
attr_reader :name

def initialize(options = {})
@name = options.delete(:name)
@options = options
end

Expand All @@ -13,6 +16,10 @@ def classify(_content); end
def train(_classification, _content); end

def untrain(_classification, _content); end

def log; end

def score; end
end
end
end
Expand Down
32 changes: 27 additions & 5 deletions decidim-ai/lib/decidim/ai/spam_content/bayes_strategy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,41 @@ module Ai
module SpamContent
class BayesStrategy < BaseStrategy
def initialize(options = {})
@options = { adapter: :memory, params: {} }.deep_merge(options)
@backend = ClassifierReborn::Bayes.new :spam, :ham, backend: configured_backend
super
@options = { adapter: :memory, categories: %w(ham spam), params: {} }.deep_merge(options)

@available_categories = options[:categories]
@backend = ClassifierReborn::Bayes.new(*available_categories, backend: configured_backend)
end

delegate :train, :untrain, :classify, to: :backend
delegate :train, :untrain, to: :backend

def log
"The Classification engine marked this as ..."
return unless category

"The Classification engine marked this as #{category}"
end

def classify(content)
@category, @internal_score = backend.classify_with_score(content)
category
end

# The Bayes strategy returns a score between that can be lower than -1
# As per ClassifierReborn documentation, closest to 0 is being picked as the dominant category
#
# From original documentation:
# Returns the scores in each category the provided +text+. E.g.,
# b.classifications "I hate bad words and you"
# => {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
# The largest of these scores (the one closest to 0) is the one picked out by #classify
def score
category.presence == "Spam" ? 1 : 0
end

private

attr_reader :backend, :options
attr_reader :backend, :options, :available_categories, :category, :internal_score

def configured_backend
if options[:adapter] == :redis
Expand Down
12 changes: 8 additions & 4 deletions decidim-ai/lib/decidim/ai/spam_detection_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,30 @@ def initialize
end

def train(category, text)
@registry.each do |_name, strategy|
@registry.each do |strategy|
strategy.train(category, text)
end
end

def classify(text)
@registry.each do |_name, strategy|
@registry.each do |strategy|
strategy.classify(text)
end
end

def untrain(category, text)
@registry.each do |_name, strategy|
@registry.each do |strategy|
strategy.untrain(category, text)
end
end

def score
@registry.collect(&:score).inject(0.0, :+) / @registry.size
end

def classification_log
@classification_log = []
@registry.each do |_name, strategy|
@registry.each do |strategy|
@classification_log << strategy.log
end
@classification_log.join("\n")
Expand Down
26 changes: 11 additions & 15 deletions decidim-ai/lib/decidim/ai/strategy_registry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,27 @@ module Ai
class StrategyRegistry
class StrategyAlreadyRegistered < StandardError; end

delegate :clear, :collect, :each, :size, to: :strategies
attr_reader :strategies

def initialize
@strategies = []
end

def register_analyzer(name:, strategy:, options: {})
if strategies[name].present?
if self.for(name).present?
raise(
StrategyAlreadyRegistered,
"There is a stategy already registered with the name `:#{name}`"
)
end

strategies[name] = strategy.new(options)
options = { name: }.merge(options)
strategies << strategy.new(options)
end

def for(name)
strategies[name]
end

def all
strategies
end

delegate :empty?, :size, :each, :clear, to: :strategies

private

def strategies
@strategies ||= {}
strategies.select { |k, _v| k.name == name }.first
end
end
end
Expand Down
30 changes: 28 additions & 2 deletions decidim-ai/spec/lib/decidim/ai/spam_content/bayes_strategy_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,41 @@

describe "classify" do
it "calls backend.classify" do
expect(subject.send(:backend)).to receive(:classify).with("text")
expect(subject.send(:backend)).to receive(:classify_with_score).with("text")

subject.classify("text")
end
end

describe "log" do
it "returns a log" do
expect(subject.log).to eq("The Classification engine marked this as ...")
expect(subject.log).to be_nil
end

context "when category is spam" do
it "returns a log" do
allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Spam", -12.6997])
subject.classify("text")
expect(subject.log).to eq("The Classification engine marked this as Spam")
end
end
end

describe "score" do
it "returns a score" do
expect(subject.score).to eq(0)
end

it "returns 0 when is ham" do
allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Ham", -12.6997])
subject.classify("text")
expect(subject.score).to eq(0)
end

it "returns 1 when is spam" do
allow(subject.send(:backend)).to receive(:classify_with_score).with("text").and_return(["Spam", -12.6997])
subject.classify("text")
expect(subject.score).to eq(1)
end
end
end
9 changes: 9 additions & 0 deletions decidim-ai/spec/lib/decidim/ai/spam_detection_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,13 @@
expect(subject.classification_log).to eq("base log\ndummy log")
end
end

describe "score" do
it "returns the average score of all strategies" do
allow(registry.for(:base)).to receive(:score).and_return(1)
allow(registry.for(:dummy)).to receive(:score).and_return(0)

expect(subject.score).to eq(0.5)
end
end
end
4 changes: 2 additions & 2 deletions decidim-ai/spec/lib/decidim/ai/strategy_registry_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ module Ai
it "returns all content blocks" do
subject.register_analyzer(**analyzer)

expect(subject.all).to be_a(Hash)
expect(subject.all.size).to be(1)
expect(subject.strategies).to be_a(Array)
expect(subject.size).to be(1)
end
end
end
Expand Down
Loading