Skip to content

Commit

Permalink
Change account search tokenizer and queries
Browse files Browse the repository at this point in the history
  • Loading branch information
Gargron committed Aug 7, 2023
1 parent 30c64bf commit c0a33b9
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 89 deletions.
2 changes: 1 addition & 1 deletion app/chewy/accounts_index.rb
Expand Up @@ -33,7 +33,7 @@ class AccountsIndex < Chewy::Index
},

verbatim: {
tokenizer: 'whitespace',
tokenizer: 'standard',
filter: %w(lowercase asciifolding cjk_width),
},

Expand Down
231 changes: 143 additions & 88 deletions app/services/account_search_service.rb
Expand Up @@ -8,6 +8,143 @@ class AccountSearchService < BaseService
# Min. number of characters to look for non-exact matches
MIN_QUERY_LENGTH = 5

class QueryBuilder
def initialize(query, account, options = {})
@query = query
@account = account
@options = options
end

def build
AccountsIndex.query(
bool: {
must: {
function_score: {
query: {
bool: {
must: must_clauses,
},
},

functions: [
reputation_score_function,
followers_score_function,
time_distance_function,
],
},
},

should: should_clauses,
}
)
end

private

def must_clauses
if @account && @options[:following]
[core_query, only_following_query]
else
[core_query]
end
end

def should_clauses
if @account && !@options[:following]
[boost_following_query]
else
[]
end
end

# This function limits results to only the accounts the user is following
def only_following_query
{
terms: {
id: following_ids,
},
}
end

# This function promotes accounts the user is following
def boost_following_query
{
terms: {
id: following_ids,
boost: 100,
},
}
end

# This function deranks accounts that follow more people than follow them
def reputation_score_function
{
script_score: {
script: {
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
},
},
}
end

# This function promotes accounts that have more followers
def followers_score_function
{
script_score: {
script: {
source: "(Math.max(doc['followers_count'].value, 0) / (Math.max(doc['followers_count'].value, 0) + 1))",
},
},
}
end

# This function deranks accounts that haven't posted in a long time
def time_distance_function
{
gauss: {
last_status_at: {
scale: '30d',
offset: '30d',
decay: 0.3,
},
},
}
end

def following_ids
@following_ids ||= @account.active_relationships.pluck(:target_account_id) + [@account.id]
end
end

class AutocompleteQueryBuilder < QueryBuilder
private

def core_query
{
multi_match: {
query: @query,
type: 'bool_prefix',
fields: %w(username username.* display_name display_name.*),
},
}
end
end

class FullQueryBuilder < QueryBuilder
private

def core_query
{
multi_match: {
query: @query,
type: 'most_fields',
fields: %w(username^2 display_name^2 text text.*),
operator: 'and',
},
}
end
end

def call(query, account = nil, options = {})
@query = query&.strip&.gsub(/\A@/, '')
@limit = options[:limit].to_i
Expand Down Expand Up @@ -71,27 +208,15 @@ def simple_search_results
end

def from_elasticsearch
must_clauses = must_clause
should_clauses = should_clause

if account
return [] if options[:following] && following_ids.empty?

if options[:following]
must_clauses << { terms: { id: following_ids } }
elsif following_ids.any?
should_clauses << { terms: { id: following_ids, boost: 100 } }
query_builder = begin
if options[:use_searchable_text]
FullQueryBuilder.new(terms_for_query, account, options.slice(:following))
else
AutocompleteQueryBuilder.new(terms_for_query, account, options.slice(:following))
end
end

query = { bool: { must: must_clauses, should: should_clauses } }
functions = [reputation_score_function, followers_score_function, time_distance_function]

records = AccountsIndex.query(function_score: { query: query, functions: functions })
.limit(limit_for_non_exact_results)
.offset(offset)
.objects
.compact
records = query_builder.build.limit(limit_for_non_exact_results).offset(offset).objects.compact

ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat)

Expand All @@ -100,76 +225,6 @@ def from_elasticsearch
nil
end

def reputation_score_function
{
script_score: {
script: {
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
},
},
}
end

def followers_score_function
{
script_score: {
script: {
source: "Math.log10(Math.max(doc['followers_count'].value, 0) + 2)",
},
},
}
end

def time_distance_function
{
gauss: {
last_status_at: {
scale: '30d',
offset: '30d',
decay: 0.3,
},
},
}
end

def must_clause
if options[:start_with_hashtag]
fields = %w(text text.*)
else
fields = %w(username username.* display_name display_name.*)
fields << 'text' << 'text.*' if options[:use_searchable_text]
end

[
{
multi_match: {
query: terms_for_query,
fields: fields,
type: 'best_fields',
operator: 'or',
},
},
]
end

def should_clause
[
{
multi_match: {
query: terms_for_query,
fields: %w(username username.* display_name display_name.*),
type: 'best_fields',
operator: 'and',
boost: 10,
},
},
]
end

def following_ids
@following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id]
end

def limit_for_non_exact_results
return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH

Expand Down

0 comments on commit c0a33b9

Please sign in to comment.