Skip to content

Commit

Permalink
Add all matched statuses into automatic report
Browse files Browse the repository at this point in the history
  • Loading branch information
Gargron committed Jul 12, 2019
1 parent f735bfe commit 182bb68
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 11 deletions.
48 changes: 39 additions & 9 deletions app/lib/spam_check.rb
Expand Up @@ -21,10 +21,9 @@ def spam?
if insufficient_data?
false
elsif nilsimsa?
other_digests = redis.zrange(redis_key, '0', '-1')
other_digests.select { |other_digest| other_digest.start_with?('nilsimsa') }.any? { |other_digest| nilsimsa_compare_value(digest, other_digest.split(':').last) >= NILSIMSA_COMPARE_THRESHOLD }
any_other_digest?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
else
!redis.zrank(redis_key, digest_with_algorithm).nil?
any_other_digest?('md5') { |_, other_digest| other_digest == digest }
end
end

Expand All @@ -34,12 +33,18 @@ def flag!
end

def remember!
# The scores in sorted sets don't actually have enough bits to hold an exact
# value of our snowflake IDs, so we use it only for its ordering property. To
# get the correct status ID back, we have to save it in the string value

redis.zadd(redis_key, @status.id, digest_with_algorithm)
redis.zremrangebyrank(redis_key, '0', '-10')
redis.expire(redis_key, EXPIRE_SET_AFTER)
end

private
def reset!
redis.del(redis_key)
end

def hashable_text
return @hashable_text if defined?(@hashable_text)
Expand Down Expand Up @@ -67,17 +72,19 @@ def digest

def digest_with_algorithm
if nilsimsa?
['nilsimsa', digest].join(':')
['nilsimsa', digest, @status.id].join(':')
else
['md5', digest].join(':')
['md5', digest, @status.id].join(':')
end
end

private

def remove_mentions(text)
return text.gsub(Account::MENTION_RE, '') if @status.local?

Nokogiri::HTML.fragment(text).tap do |html|
mentions = @status.mentions.map { |mention| TagManager.instance.url_for(mention.account) }
mentions = @status.mentions.map { |mention| ActivityPub::TagManager.instance.url_for(mention.account) }

html.traverse do |element|
element.unlink if element.name == 'a' && mentions.include?(element['href'])
Expand All @@ -90,15 +97,16 @@ def normalize_unicode(text)
end

def remove_whitespace(text)
text.gsub(/\s+/, ' ')
text.gsub(/\s+/, ' ').strip
end

def auto_silence_account!
@account.silence!
end

def auto_report_status!
ReportService.new.call(Account.representative, @account, status_ids: @status.distributable? ? [@status.id] : nil, comment: I18n.t('spam_check.spam_detected_and_silenced'))
status_ids = Status.where(visibility: %i(public unlisted)).where(id: matching_status_ids).pluck(:id) + [@status.id] if @status.distributable?
ReportService.new.call(Account.representative, @account, status_ids: status_ids, comment: I18n.t('spam_check.spam_detected_and_silenced'))
end

def already_flagged?
Expand Down Expand Up @@ -133,6 +141,28 @@ def nilsimsa?
hashable_text.size > NILSIMSA_MIN_SIZE
end

def other_digests
redis.zrange(redis_key, 0, -1)
end

def any_other_digest?(filter_algorithm)
other_digests.any? do |record|
algorithm, other_digest, status_id = record.split(':')

next unless algorithm == filter_algorithm

yield algorithm, other_digest, status_id
end
end

def matching_status_ids
if nilsimsa?
other_digests.select { |record| record.start_with?('nilsimsa') && nilsimsa_compare_value(digest, record.split(':')[1]) >= NILSIMSA_COMPARE_THRESHOLD }.map { |record| record.split(':')[2] }.compact
else
other_digests.select { |record| record.start_with?('md5') && record.split(':')[1] == digest }.map { |record| record.split(':')[2] }.compact
end
end

def redis_key
@redis_key ||= "spam_check:#{@account.id}"
end
Expand Down
1 change: 1 addition & 0 deletions db/schema.rb
Expand Up @@ -148,6 +148,7 @@
t.string "also_known_as", array: true
t.datetime "silenced_at"
t.datetime "suspended_at"
t.integer "trust_level"
t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin
t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", unique: true
t.index ["moved_to_account_id"], name: "index_accounts_on_moved_to_account_id"
Expand Down
42 changes: 40 additions & 2 deletions spec/lib/spam_check_spec.rb
Expand Up @@ -11,6 +11,37 @@ def status_with_html(text, options = {})
status
end

describe '#hashable_text' do
it 'removes mentions from HTML for remote statuses' do
status = status_with_html('@alice Hello')
expect(described_class.new(status).hashable_text).to eq 'hello'
end

it 'removes mentions from text for local statuses' do
status = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
expect(described_class.new(status).hashable_text).to eq 'hey , how are you?'
end
end

describe '#insufficient_data?' do
it 'returns true when there is no text' do
status = status_with_html('@alice')
expect(described_class.new(status).insufficient_data?).to be true
end

it 'returns false when there is text' do
status = status_with_html('@alice h')
expect(described_class.new(status).insufficient_data?).to be false
end
end

describe '#digest' do
it 'returns a string' do
status = status_with_html('@alice Hello world')
expect(described_class.new(status).digest).to be_a String
end
end

describe '#spam?' do
it 'returns false for a unique status' do
status = status_with_html('@alice Hello')
Expand Down Expand Up @@ -106,9 +137,12 @@ def status_with_html(text, options = {})
end

describe '#flag!' do
let!(:status1) { status_with_html('@alice General Kenobi you are a bold one') }
let!(:status2) { status_with_html('@alice @bob General Kenobi, you are a bold one') }

before do
status = status_with_html('@alice @bob Hello')
described_class.new(status).flag!
described_class.new(status1).remember!
described_class.new(status2).flag!
end

it 'silences the account' do
Expand All @@ -118,5 +152,9 @@ def status_with_html(text, options = {})
it 'creates a report about the account' do
expect(sender.targeted_reports.unresolved.count).to eq 1
end

it 'attaches both matching statuses to the report' do
expect(sender.targeted_reports.first.status_ids).to include(status1.id, status2.id)
end
end
end

0 comments on commit 182bb68

Please sign in to comment.