Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change account search tokenizer and queries #26378

Merged
merged 1 commit into from Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/chewy/accounts_index.rb
Expand Up @@ -33,7 +33,7 @@ class AccountsIndex < Chewy::Index
},

verbatim: {
tokenizer: 'whitespace',
tokenizer: 'standard',
filter: %w(lowercase asciifolding cjk_width),
},

Expand Down
231 changes: 143 additions & 88 deletions app/services/account_search_service.rb
Expand Up @@ -8,6 +8,143 @@ class AccountSearchService < BaseService
# Min. number of characters to look for non-exact matches
MIN_QUERY_LENGTH = 5

class QueryBuilder
def initialize(query, account, options = {})
@query = query
@account = account
@options = options
end

def build
AccountsIndex.query(
bool: {
must: {
function_score: {
query: {
bool: {
must: must_clauses,
},
},

functions: [
reputation_score_function,
followers_score_function,
time_distance_function,
],
},
},

should: should_clauses,
}
)
end

private

def must_clauses
if @account && @options[:following]
[core_query, only_following_query]
else
[core_query]
end
end

def should_clauses
if @account && !@options[:following]
[boost_following_query]
else
[]
end
end

# This function limits results to only the accounts the user is following
def only_following_query
{
terms: {
id: following_ids,
},
}
end

# This function promotes accounts the user is following
def boost_following_query
{
terms: {
id: following_ids,
boost: 100,
},
}
end

# This function deranks accounts that follow more people than follow them
def reputation_score_function
{
script_score: {
script: {
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
},
},
}
end

# This function promotes accounts that have more followers
def followers_score_function
{
script_score: {
script: {
source: "(Math.max(doc['followers_count'].value, 0) / (Math.max(doc['followers_count'].value, 0) + 1))",
},
},
}
end

# This function deranks accounts that haven't posted in a long time
def time_distance_function
{
gauss: {
last_status_at: {
scale: '30d',
offset: '30d',
decay: 0.3,
},
},
}
end

def following_ids
@following_ids ||= @account.active_relationships.pluck(:target_account_id) + [@account.id]
end
end

class AutocompleteQueryBuilder < QueryBuilder
private

def core_query
{
multi_match: {
query: @query,
type: 'bool_prefix',
fields: %w(username username.* display_name display_name.*),
},
}
end
end

class FullQueryBuilder < QueryBuilder
private

def core_query
{
multi_match: {
query: @query,
type: 'most_fields',
fields: %w(username^2 display_name^2 text text.*),
operator: 'and',
},
}
end
end

def call(query, account = nil, options = {})
@query = query&.strip&.gsub(/\A@/, '')
@limit = options[:limit].to_i
Expand Down Expand Up @@ -71,27 +208,15 @@ def simple_search_results
end

def from_elasticsearch
must_clauses = must_clause
should_clauses = should_clause

if account
return [] if options[:following] && following_ids.empty?

if options[:following]
must_clauses << { terms: { id: following_ids } }
elsif following_ids.any?
should_clauses << { terms: { id: following_ids, boost: 100 } }
query_builder = begin
if options[:use_searchable_text]
FullQueryBuilder.new(terms_for_query, account, options.slice(:following))
else
AutocompleteQueryBuilder.new(terms_for_query, account, options.slice(:following))
end
end

query = { bool: { must: must_clauses, should: should_clauses } }
functions = [reputation_score_function, followers_score_function, time_distance_function]

records = AccountsIndex.query(function_score: { query: query, functions: functions })
.limit(limit_for_non_exact_results)
.offset(offset)
.objects
.compact
records = query_builder.build.limit(limit_for_non_exact_results).offset(offset).objects.compact

ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat)

Expand All @@ -100,76 +225,6 @@ def from_elasticsearch
nil
end

def reputation_score_function
{
script_score: {
script: {
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
},
},
}
end

def followers_score_function
{
script_score: {
script: {
source: "Math.log10(Math.max(doc['followers_count'].value, 0) + 2)",
},
},
}
end

def time_distance_function
{
gauss: {
last_status_at: {
scale: '30d',
offset: '30d',
decay: 0.3,
},
},
}
end

def must_clause
if options[:start_with_hashtag]
fields = %w(text text.*)
else
fields = %w(username username.* display_name display_name.*)
fields << 'text' << 'text.*' if options[:use_searchable_text]
end

[
{
multi_match: {
query: terms_for_query,
fields: fields,
type: 'best_fields',
operator: 'or',
},
},
]
end

def should_clause
[
{
multi_match: {
query: terms_for_query,
fields: %w(username username.* display_name display_name.*),
type: 'best_fields',
operator: 'and',
boost: 10,
},
},
]
end

def following_ids
@following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id]
end

def limit_for_non_exact_results
return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH

Expand Down