From 781e64355169396dc063b33a095881c1fa2e7278 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Sun, 12 Oct 2025 13:19:18 +0100 Subject: [PATCH 1/3] Prioritize exact matches in search results When searching for a class name like "String", the search should return exact matches first before returning classes that contain the search term in their name (like "Prism::StringNode"). This adds a new pass 0 to the search algorithm that only matches when the searchIndex exactly equals the query. The previous passes are shifted accordingly: - Pass 0: exact match (new) - Pass 1: searchIndex starts with query (was pass 0) - Pass 2: longSearchIndex starts with query (was pass 1) - Pass 3: searchIndex contains query (was pass 2) - Pass 4: regexp match (was pass 3) Fixes ruby/rdoc#1194 --- .../template/json_index/js/searcher.js | 39 ++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/lib/rdoc/generator/template/json_index/js/searcher.js b/lib/rdoc/generator/template/json_index/js/searcher.js index f1ffe8cffe..dfec4493a7 100644 --- a/lib/rdoc/generator/template/json_index/js/searcher.js +++ b/lib/rdoc/generator/template/json_index/js/searcher.js @@ -29,7 +29,7 @@ Searcher.prototype = new function() { var results = performSearch(_this.data, regexps, queries, highlighters, state); - var hasMore = (state.limit > 0 && state.pass < 4); + var hasMore = (state.limit > 0 && state.pass < 6); triggerResults.call(_this, results, !hasMore); if (hasMore) { @@ -85,6 +85,29 @@ Searcher.prototype = new function() { /* ----- Mathchers ------ */ + /* + * This record matches if both the index and longIndex exactly equal queries[0] + * and the record matches all of the regexps. This ensures top-level exact matches + * like "String" are prioritized over nested classes like "Gem::Module::String". + */ + function matchPassExact(index, longIndex, queries) { + return index == queries[0] && longIndex == queries[0]; + } + + /* + * This record matches if the index without "()" exactly equals queries[0]. + * This prioritizes methods like "attribute()" when searching for "attribute". + */ + function matchPassExactMethod(index, longIndex, queries, regexps) { + var indexWithoutParens = index.replace(/\(\)$/, ''); + if (indexWithoutParens != queries[0]) return false; + for (var i=1, l = regexps.length; i < l; i++) { + if (!index.match(regexps[i]) && !longIndex.match(regexps[i])) + return false; + }; + return true; + } + /* * This record matches if the index starts with queries[0] and the record * matches all of the regexps @@ -192,17 +215,23 @@ Searcher.prototype = new function() { var togo = CHUNK_SIZE; var matchFunc, hltFunc; - while (state.pass < 4 && state.limit > 0 && togo > 0) { + while (state.pass < 6 && state.limit > 0 && togo > 0) { if (state.pass == 0) { - matchFunc = matchPassBeginning; + matchFunc = matchPassExact; hltFunc = highlightQuery; } else if (state.pass == 1) { - matchFunc = matchPassLongIndex; + matchFunc = matchPassExactMethod; hltFunc = highlightQuery; } else if (state.pass == 2) { - matchFunc = matchPassContains; + matchFunc = matchPassBeginning; hltFunc = highlightQuery; } else if (state.pass == 3) { + matchFunc = matchPassLongIndex; + hltFunc = highlightQuery; + } else if (state.pass == 4) { + matchFunc = matchPassContains; + hltFunc = highlightQuery; + } else if (state.pass == 5) { matchFunc = matchPassRegexp; hltFunc = highlightRegexp; } From c74e998d9436967ecb118ba38c87cf59f42c4cda Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Mon, 13 Oct 2025 23:26:34 +0100 Subject: [PATCH 2/3] Use mini_racer to test search.js --- Gemfile | 6 + ...rdoc_generator_json_index_searcher_test.rb | 319 ++++++++++++++++++ 2 files changed, 325 insertions(+) create mode 100644 test/rdoc/rdoc_generator_json_index_searcher_test.rb diff --git a/Gemfile b/Gemfile index 2a54c09b14..957cbbd227 100644 --- a/Gemfile +++ b/Gemfile @@ -11,3 +11,9 @@ gem 'rubocop', '>= 1.31.0' gem 'gettext' gem 'prism', '>= 0.30.0' gem 'webrick' + +platforms :ruby do + if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('3.2') + gem 'mini_racer' # For testing the searcher.js file + end +end diff --git a/test/rdoc/rdoc_generator_json_index_searcher_test.rb b/test/rdoc/rdoc_generator_json_index_searcher_test.rb new file mode 100644 index 0000000000..402652d921 --- /dev/null +++ b/test/rdoc/rdoc_generator_json_index_searcher_test.rb @@ -0,0 +1,319 @@ +# frozen_string_literal: true + +require_relative 'helper' + +return if RUBY_DESCRIPTION =~ /truffleruby/ || RUBY_DESCRIPTION =~ /jruby/ + +begin + require 'mini_racer' +rescue LoadError + return +end + +# This test is a simpler setup for testing the searcher.js file without pulling all the JS dependencies. +# If there are more JS functionalities to test in the future, we can move to use JS test frameworks. +class RDocGeneratorJsonIndexSearcherTest < Test::Unit::TestCase + def setup + @context = MiniRacer::Context.new + + # Add RegExp.escape polyfill to avoid `RegExp.escape is not a function` error + @context.eval(<<~JS) + RegExp.escape = function(string) { + return string.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&'); + }; + JS + + searcher_js_path = File.expand_path( + '../../lib/rdoc/generator/template/json_index/js/searcher.js', + __dir__ + ) + searcher_js = File.read(searcher_js_path) + @context.eval(searcher_js) + end + + def teardown + @context.dispose + end + + def test_exact_match_prioritized + results = run_search( + query: 'string', + data: { + searchIndex: ['string', 'string', 'strings'], + longSearchIndex: ['gem::safemarshal::elements::string', 'string', 'strings'], + info: [ + ['String', 'Gem::SafeMarshal::Elements', 'Gem/SafeMarshal/Elements/String.html', '', 'Nested String class', '', 'class'], + ['String', '', 'String.html', '', 'Top-level String class', '', 'class'], + ['Strings', '', 'Strings.html', '', 'Strings class', '', 'class'] + ] + } + ) + + assert_equal 3, results.length + # Top-level String should come first despite being second in the array + assert_equal 'String', strip_highlights(results[0]['title']) + assert_equal '', results[0]['namespace'], 'Top-level String should be prioritized over nested String' + assert_equal 'String.html', results[0]['path'] + + # Nested String should come second + assert_equal 'String', strip_highlights(results[1]['title']) + assert_equal 'Gem::SafeMarshal::Elements', strip_highlights(results[1]['namespace']) + end + + def test_exact_method_match + results = run_search( + query: 'attribute', + data: { + searchIndex: ['attributemanager', 'attributes', 'attribute()'], + longSearchIndex: ['rdoc::markup::attributemanager', 'rdoc::markup::attributes', 'rdoc::markup::attributemanager#attribute()'], + info: [ + ['AttributeManager', 'RDoc::Markup', 'RDoc/Markup/AttributeManager.html', '', 'AttributeManager class', '', 'class'], + ['Attributes', 'RDoc::Markup', 'RDoc/Markup/Attributes.html', '', 'Attributes class', '', 'class'], + ['attribute', 'RDoc::Markup::AttributeManager', 'RDoc/Markup/AttributeManager.html#method-i-attribute', '()', 'Attribute method', '', 'method'] + ] + } + ) + + assert_equal 3, results.length + # attribute() method should come first despite being last in the array + assert_equal 'attribute', strip_highlights(results[0]['title']) + assert_equal 'RDoc::Markup::AttributeManager', strip_highlights(results[0]['namespace']) + end + + def test_exact_class_beats_exact_method + results = run_search( + query: 'attribute', + data: { + searchIndex: ['attribute()', 'attribute'], + longSearchIndex: ['rdoc::markup#attribute()', 'attribute'], + info: [ + ['attribute', 'RDoc::Markup', 'RDoc/Markup.html#method-i-attribute', '()', 'Attribute method', '', 'method'], + ['Attribute', '', 'Attribute.html', '', 'Attribute class (hypothetical)', '', 'class'] + ] + } + ) + + assert_equal 2, results.length + # Exact class match (Pass 0) should beat exact method match (Pass 1) + assert_equal 'Attribute', strip_highlights(results[0]['title']) + assert_equal '', results[0]['namespace'] + assert_equal 'Attribute.html', results[0]['path'] + + # Method comes second + assert_equal 'attribute', strip_highlights(results[1]['title']) + assert_equal 'RDoc::Markup', strip_highlights(results[1]['namespace']) + end + + def test_beginning_match + results = run_search( + query: 'attr', + data: { + searchIndex: ['attribute()', 'attributemanager', 'generator'], + longSearchIndex: ['rdoc::markup#attribute()', 'rdoc::markup::attributemanager', 'rdoc::generator'], + info: [ + ['attribute', 'RDoc::Markup', 'RDoc/Markup.html#method-i-attribute', '()', 'Attribute method', '', 'method'], + ['AttributeManager', 'RDoc::Markup', 'RDoc/Markup/AttributeManager.html', '', 'Manager class', '', 'class'], + ['Generator', 'RDoc', 'RDoc/Generator.html', '', 'Generator class', '', 'class'] + ] + } + ) + + assert_equal 2, results.length + assert_equal 'attribute', strip_highlights(results[0]['title']) + assert_equal 'AttributeManager', strip_highlights(results[1]['title']) + end + + def test_long_index_match + results = run_search( + query: 'rdoc::markup', + data: { + searchIndex: ['attributes', 'parser'], + longSearchIndex: ['rdoc::markup::attributes', 'rdoc::parser'], + info: [ + ['Attributes', 'RDoc::Markup', 'RDoc/Markup/Attributes.html', '', 'Attributes class', '', 'class'], + ['Parser', 'RDoc', 'RDoc/Parser.html', '', 'Parser class', '', 'class'] + ] + } + ) + + assert_equal 1, results.length + assert_equal 'Attributes', strip_highlights(results[0]['title']) + assert_equal 'RDoc::Markup', strip_highlights(results[0]['namespace']) + end + + def test_contains_match + results = run_search( + query: 'manager', + data: { + searchIndex: ['attributemanager', 'parser'], + longSearchIndex: ['rdoc::markup::attributemanager', 'rdoc::parser'], + info: [ + ['AttributeManager', 'RDoc::Markup', 'RDoc/Markup/AttributeManager.html', '', 'Manager class', '', 'class'], + ['Parser', 'RDoc', 'RDoc/Parser.html', '', 'Parser class', '', 'class'] + ] + } + ) + + assert_equal 1, results.length + assert_equal 'AttributeManager', strip_highlights(results[0]['title']) + end + + def test_regexp_match + results = run_search( + query: 'atrbt', + data: { + searchIndex: ['attribute()', 'generator'], + longSearchIndex: ['rdoc::markup#attribute()', 'rdoc::generator'], + info: [ + ['attribute', 'RDoc::Markup', 'RDoc/Markup.html#method-i-attribute', '()', 'Attribute method', '', 'method'], + ['Generator', 'RDoc', 'RDoc/Generator.html', '', 'Generator class', '', 'class'] + ] + } + ) + + assert_equal 1, results.length + assert_equal 'attribute', strip_highlights(results[0]['title']) + end + + def test_empty_query + results = run_search( + query: '', + data: { + searchIndex: ['string'], + longSearchIndex: ['string'], + info: [['String', '', 'String.html', '', 'String class', '', 'class']] + } + ) + + assert_equal 0, results.length + end + + def test_no_matches + results = run_search( + query: 'nonexistent', + data: { + searchIndex: ['string', 'attribute()'], + longSearchIndex: ['string', 'rdoc#attribute()'], + info: [ + ['String', '', 'String.html', '', 'String class', '', 'class'], + ['attribute', 'RDoc', 'RDoc.html#attribute', '()', 'Attribute method', '', 'method'] + ] + } + ) + + assert_equal 0, results.length + end + + def test_multiple_exact_matches + results = run_search( + query: 'test', + data: { + searchIndex: ['test', 'test', 'testing'], + longSearchIndex: ['test', 'rdoc::test', 'testing'], + info: [ + ['Test', '', 'Test.html', '', 'Top-level Test', '', 'class'], + ['Test', 'RDoc', 'RDoc/Test.html', '', 'RDoc Test', '', 'class'], + ['Testing', '', 'Testing.html', '', 'Testing class', '', 'class'] + ] + } + ) + + assert_equal 3, results.length + # First result should be the exact match with both indexes matching + assert_equal 'Test', strip_highlights(results[0]['title']) + assert_equal '', results[0]['namespace'] + end + + # Test case insensitive search + def test_case_insensitive + results = run_search( + query: 'STRING', + data: { + searchIndex: ['string'], + longSearchIndex: ['string'], + info: [['String', '', 'String.html', '', 'String class', '', 'class']] + } + ) + + assert_equal 1, results.length + assert_equal 'String', strip_highlights(results[0]['title']) + end + + def test_multi_word_query + results = run_search( + query: 'rdoc markup', + data: { + searchIndex: ['attributemanager'], + longSearchIndex: ['rdoc::markup::attributemanager'], + info: [['AttributeManager', 'RDoc::Markup', 'RDoc/Markup/AttributeManager.html', '', 'Manager', '', 'class']] + } + ) + + assert_equal 1, results.length + assert_equal 'AttributeManager', results[0]['title'] + end + + def test_highlighting + results = run_search( + query: 'string', + data: { + searchIndex: ['string'], + longSearchIndex: ['string'], + info: [['String', '', 'String.html', '', 'String class', '', 'class']] + } + ) + + assert_equal 1, results.length + # Check that highlighting markers (unicode \u0001 and \u0002) are present + assert_match(/[\u0001\u0002]/, results[0]['title']) + end + + def test_max_results_limit + # Create 150 entries (more than MAX_RESULTS = 100) + search_index = [] + long_search_index = [] + info = [] + + 150.times do |i| + search_index << "test#{i}" + long_search_index << "test#{i}" + info << ["Test#{i}", '', "Test#{i}.html", '', "Test class #{i}", '', 'class'] + end + + results = run_search( + query: 'test', + data: { + searchIndex: search_index, + longSearchIndex: long_search_index, + info: info + } + ) + + # Should return at most 100 results + assert_operator results.length, :<=, 100 + end + + private + + def run_search(query:, data:) + @context.eval("var testResults = [];") + @context.eval(<<~JS) + var data = #{data.to_json}; + var searcher = new Searcher(data); + searcher.ready(function(res, isLast) { + testResults = testResults.concat(res); + }); + searcher.find(#{query.to_json}); + JS + + # Give some time for async operations + sleep 0.01 + + @context.eval('testResults') + end + + # Helper to strip highlighting markers from a string + def strip_highlights(str) + str.gsub(/[\u0001\u0002]/, '') + end +end From b3dcaccd28c38b5121905e89c94334cbe12f11ab Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Tue, 14 Oct 2025 16:31:39 +0100 Subject: [PATCH 3/3] Make searching mor case sensitive --- .../template/json_index/js/searcher.js | 8 +++- ...rdoc_generator_json_index_searcher_test.rb | 46 ++++++++++++++----- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/lib/rdoc/generator/template/json_index/js/searcher.js b/lib/rdoc/generator/template/json_index/js/searcher.js index dfec4493a7..8f03f67a93 100644 --- a/lib/rdoc/generator/template/json_index/js/searcher.js +++ b/lib/rdoc/generator/template/json_index/js/searcher.js @@ -101,6 +101,7 @@ Searcher.prototype = new function() { function matchPassExactMethod(index, longIndex, queries, regexps) { var indexWithoutParens = index.replace(/\(\)$/, ''); if (indexWithoutParens != queries[0]) return false; + if (index === indexWithoutParens) return false; // Not a method (no parens to remove) for (var i=1, l = regexps.length; i < l; i++) { if (!index.match(regexps[i]) && !longIndex.match(regexps[i])) return false; @@ -215,12 +216,15 @@ Searcher.prototype = new function() { var togo = CHUNK_SIZE; var matchFunc, hltFunc; + var isLowercaseQuery = queries[0] === queries[0].toLowerCase(); + while (state.pass < 6 && state.limit > 0 && togo > 0) { + // When query is lowercase, prioritize methods over classes if (state.pass == 0) { - matchFunc = matchPassExact; + matchFunc = isLowercaseQuery ? matchPassExactMethod : matchPassExact; hltFunc = highlightQuery; } else if (state.pass == 1) { - matchFunc = matchPassExactMethod; + matchFunc = isLowercaseQuery ? matchPassExact : matchPassExactMethod; hltFunc = highlightQuery; } else if (state.pass == 2) { matchFunc = matchPassBeginning; diff --git a/test/rdoc/rdoc_generator_json_index_searcher_test.rb b/test/rdoc/rdoc_generator_json_index_searcher_test.rb index 402652d921..36613e57c4 100644 --- a/test/rdoc/rdoc_generator_json_index_searcher_test.rb +++ b/test/rdoc/rdoc_generator_json_index_searcher_test.rb @@ -80,28 +80,52 @@ def test_exact_method_match assert_equal 'RDoc::Markup::AttributeManager', strip_highlights(results[0]['namespace']) end - def test_exact_class_beats_exact_method + def test_capitalized_query_prioritizes_exact_class results = run_search( - query: 'attribute', + query: 'String', data: { - searchIndex: ['attribute()', 'attribute'], - longSearchIndex: ['rdoc::markup#attribute()', 'attribute'], + searchIndex: ['string', 'string()'], + longSearchIndex: ['string', 'object#string()'], info: [ - ['attribute', 'RDoc::Markup', 'RDoc/Markup.html#method-i-attribute', '()', 'Attribute method', '', 'method'], - ['Attribute', '', 'Attribute.html', '', 'Attribute class (hypothetical)', '', 'class'] + ['String', '', 'String.html', '', 'String class', '', 'class'], + ['string', 'Object', 'Object.html#method-i-string', '()', 'String method', '', 'method'] ] } ) assert_equal 2, results.length - # Exact class match (Pass 0) should beat exact method match (Pass 1) - assert_equal 'Attribute', strip_highlights(results[0]['title']) + # Capitalized query: exact class (Pass 0) beats exact method (Pass 1) + assert_equal 'String', strip_highlights(results[0]['title']) assert_equal '', results[0]['namespace'] - assert_equal 'Attribute.html', results[0]['path'] + assert_equal 'String.html', results[0]['path'] # Method comes second - assert_equal 'attribute', strip_highlights(results[1]['title']) - assert_equal 'RDoc::Markup', strip_highlights(results[1]['namespace']) + assert_equal 'string', strip_highlights(results[1]['title']) + assert_equal 'Object', strip_highlights(results[1]['namespace']) + end + + def test_lowercase_query_prioritizes_method + results = run_search( + query: 'options', + data: { + searchIndex: ['options', 'options()'], + longSearchIndex: ['rdoc::options', 'rdoc::codeobject#options()'], + info: [ + ['Options', 'RDoc', 'RDoc/Options.html', '', 'Options class', '', 'class'], + ['options', 'RDoc::CodeObject', 'RDoc/CodeObject.html#method-i-options', '()', 'Options method', '', 'method'] + ] + } + ) + + assert_equal 2, results.length + # Lowercase query should prioritize method over class + assert_equal 'options', strip_highlights(results[0]['title']) + assert_equal 'RDoc::CodeObject', strip_highlights(results[0]['namespace']) + assert_equal 'RDoc/CodeObject.html#method-i-options', results[0]['path'] + + # Class comes second + assert_equal 'Options', strip_highlights(results[1]['title']) + assert_equal 'RDoc', strip_highlights(results[1]['namespace']) end def test_beginning_match