From 125ae879f9e8153516e5e815ca9caff19dce72d6 Mon Sep 17 00:00:00 2001 From: Mike Goatly <4577868+mikegoatly@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:43:44 +0000 Subject: [PATCH] V6.0.0 (#107) Among other things... * Use latest C# version * Added support for bracketed field names #76 * Added field score boosting #72 (#83) * Added field score boosting #72 * Added score boosting query syntax #72 * Add .NET 8 as a target * Item score boosting (#95) * Allow characters to be escaped in query syntax #85 * Removing ImmutableCollections (#97) * Speed up field collection prior to scoring (#102) * Added support for adding custom stemmers #82 (#103) * Apply field filters while collecting results * Filter documents at navigator level #105 * Added query part weight calculations #105 Refactor query match collection primitives --- .editorconfig | 6 +- Lifti.sln | 1 + azure-pipelines.yml | 18 +- docs/assets/icons/logo.svg | 5 + docs/content/en/_index.html | 2 +- .../content/en/docs/Custom stemmers/_index.md | 38 + .../content/en/docs/Getting started/_index.md | 16 +- .../docs/Getting started/indexing-objects.md | 10 +- .../WithDefaultThesaurus.md | 4 +- .../WithDefaultTokenization.md | 12 +- .../WithDuplicateItemBehavior.md | 20 - .../WithDuplicateKeyBehavior.md | 20 + .../WithIndexModificationAction.md | 10 +- .../WithObjectTokenization.md | 41 +- .../Index construction/WithQueryParser.md | 6 +- .../WithSimpleQueryParser.md | 6 +- .../Index construction/WithTextExtraction.md | 4 +- .../en/docs/Index construction/_index.md | 2 +- .../Indexing mutations/Batch Mutations.md | 2 +- .../en/docs/Indexing mutations/_index.md | 18 +- .../Serialization format/PreVersion6.md | 13 + .../Reference/Serialization format/_index.md | 12 +- .../docs/Searching/Search results/Scoring.md | 15 +- .../docs/Searching/Search results/_index.md | 22 +- .../extracting-matched-phrases.md | 8 +- docs/content/en/docs/Searching/_index.md | 4 - .../en/docs/Searching/field-information.md | 2 +- .../en/docs/Searching/lifti-query-syntax.md | 24 +- .../manually-constructing-queries.md | 14 +- .../en/docs/Searching/query-execution.md | 32 + docs/content/en/docs/Serialization/_index.md | 4 +- .../docs/Serialization/key-serialization.md | 2 +- docs/package-lock.json | 883 +++--------------- docs/package.json | 2 +- docs/static/images/v6-serialization.svg | 4 + global.json | 3 + samples/Blazor/Blazor.csproj | 8 +- .../Blazor/Services/WikipediaIndexService.cs | 2 +- samples/TestConsole/AutoCompleteSample.cs | 5 +- samples/TestConsole/Book.cs | 7 - samples/TestConsole/BookSample.cs | 42 +- samples/TestConsole/CompositeKey.cs | 14 - samples/TestConsole/CompositeKeySerializer.cs | 26 - samples/TestConsole/CustomStemmerSample.cs | 44 + samples/TestConsole/CustomerObjectSample.cs | 21 +- samples/TestConsole/FreshnessBoosting.cs | 43 + ...dexSerializationWithCustomKeySerializer.cs | 25 +- samples/TestConsole/MagnitudeBoosting.cs | 41 + samples/TestConsole/Program.cs | 13 +- samples/TestConsole/SampleBase.cs | 38 +- samples/TestConsole/ShardedIndexExample.cs | 2 +- samples/TestConsole/TestConsole.csproj | 5 +- samples/TestConsole/ThesaurusSample.cs | 6 +- samples/TestConsole/WikipediaSample.cs | 2 +- src/Lifti.Core/AssemblyInfo.cs | 1 + src/Lifti.Core/ChildNodeMap.cs | 186 ++++ src/Lifti.Core/ChildNodeMapMutation.cs | 133 +++ src/Lifti.Core/DocumentMatchCollector.cs | 95 ++ src/Lifti.Core/DocumentMetadata.cs | 93 ++ .../{ItemPhrases.cs => DocumentPhrases.cs} | 102 +- src/Lifti.Core/DocumentStatistics.cs | 26 +- src/Lifti.Core/DocumentTokenMatchMap.cs | 126 +++ .../DocumentTokenMatchMapMutation.cs | 93 ++ src/Lifti.Core/DuplicateItemBehavior.cs | 18 - src/Lifti.Core/DuplicateKeyBehavior.cs | 18 + src/Lifti.Core/ExceptionMessages.Designer.cs | 141 ++- src/Lifti.Core/ExceptionMessages.resx | 51 +- src/Lifti.Core/FullTextIndex.cs | 192 ++-- src/Lifti.Core/FullTextIndexBuilder.cs | 46 +- src/Lifti.Core/IFullTextIndex.cs | 51 +- src/Lifti.Core/IIdPool.cs | 33 - src/Lifti.Core/IIndexMetadata.cs | 86 ++ src/Lifti.Core/IIndexNodeFactory.cs | 9 +- src/Lifti.Core/IIndexSnapshot.cs | 20 +- src/Lifti.Core/IItemMetadata.cs | 30 - src/Lifti.Core/IItemStore.cs | 70 -- src/Lifti.Core/IObjectTokenization.cs | 16 - src/Lifti.Core/IObjectTypeConfiguration.cs | 26 + src/Lifti.Core/ISearchResults.cs | 69 +- src/Lifti.Core/ITokenLocation.cs | 23 + src/Lifti.Core/IdPool.cs | 88 +- src/Lifti.Core/IndexMetadata.cs | 245 +++++ src/Lifti.Core/IndexMutation.cs | 22 +- src/Lifti.Core/IndexNode.cs | 35 +- src/Lifti.Core/IndexNodeFactory.cs | 9 +- src/Lifti.Core/IndexNodeMutation.cs | 260 ++---- src/Lifti.Core/IndexOptions.cs | 16 +- src/Lifti.Core/IndexSnapshot.cs | 25 +- src/Lifti.Core/IndexStatistics.cs | 67 +- src/Lifti.Core/IndexedFieldDetails.cs | 61 +- src/Lifti.Core/IndexedFieldLookup.cs | 30 +- src/Lifti.Core/ItemMetadata.cs | 25 - src/Lifti.Core/ItemStore.cs | 92 -- src/Lifti.Core/Lifti.Core.csproj | 24 +- src/Lifti.Core/LiftiException.cs | 2 + src/Lifti.Core/PublicAPI.Unshipped.txt | 0 src/Lifti.Core/PublicApi/MarkShipped.cmd | 2 + src/Lifti.Core/PublicApi/MarkShipped.ps1 | 51 + .../PublicApi/PublicAPI.Shipped.txt | 730 +++++++++++++++ .../PublicApi/PublicAPI.Unshipped.txt | 1 + .../netstandard2}/PublicAPI.Shipped.txt | 526 ++++++----- .../netstandard2/PublicAPI.Unshipped.txt | 1 + .../CompositePositionalIntersectMerger.cs | 94 +- .../Querying/CompositeTokenLocation.cs | 108 +++ .../Querying/CompositeTokenMatchLocation.cs | 82 -- src/Lifti.Core/Querying/FieldMatch.cs | 88 -- .../Querying/FieldScoreBoostProvider.cs | 45 + .../Querying/IFieldScoreBoostProvider.cs | 13 + src/Lifti.Core/Querying/IIndexNavigator.cs | 82 +- .../Querying/IIndexNavigatorBookmark.cs | 8 +- src/Lifti.Core/Querying/IScorer.cs | 38 +- .../Querying/ITokenLocationMatch.cs | 26 - src/Lifti.Core/Querying/IndexNavigator.cs | 327 ++++--- src/Lifti.Core/Querying/IndexNavigatorPool.cs | 6 +- .../Querying/IntermediateQueryResult.cs | 138 ++- .../Querying/IntermediateQueryResultMerger.cs | 122 ++- src/Lifti.Core/Querying/IntersectMerger.cs | 52 +- src/Lifti.Core/Querying/OkapiBm25Scorer.cs | 123 +-- .../Querying/OkapiBm25ScorerFactory.cs | 24 +- .../Querying/PrecedingIntersectMerger.cs | 71 +- src/Lifti.Core/Querying/Query.cs | 20 +- src/Lifti.Core/Querying/QueryContext.cs | 23 + src/Lifti.Core/Querying/QueryParser.cs | 15 +- .../Querying/QueryParserException.cs | 4 +- .../QueryParts/AdjacentWordsQueryOperator.cs | 40 +- .../Querying/QueryParts/AndQueryOperator.cs | 11 +- .../QueryParts/BinaryQueryOperator.cs | 68 +- .../Querying/QueryParts/BracketedQueryPart.cs | 11 +- .../Querying/QueryParts/DoubleBufferedList.cs | 25 +- .../Querying/QueryParts/EmptyQueryPart.cs | 10 +- .../Querying/QueryParts/ExactWordQueryPart.cs | 32 +- .../QueryParts/FieldFilterQueryOperator.cs | 27 +- .../QueryParts/FuzzyMatchQueryPart.cs | 144 ++- .../Querying/QueryParts/IQueryContext.cs | 14 - .../Querying/QueryParts/IQueryPart.cs | 14 +- .../Querying/QueryParts/NearQueryOperator.cs | 17 +- .../Querying/QueryParts/OrQueryOperator.cs | 12 +- .../QueryParts/PrecedingNearQueryOperator.cs | 19 +- .../QueryParts/PrecedingQueryOperator.cs | 15 +- .../Querying/QueryParts/QueryContext.cs | 42 - .../QueryParts/ScoreBoostedQueryPart.cs | 56 ++ .../QueryParts/WildcardQueryFragment.cs | 5 +- .../Querying/QueryParts/WildcardQueryPart.cs | 155 ++- .../Querying/QueryParts/WordQueryPart.cs | 35 +- src/Lifti.Core/Querying/QueryToken.cs | 51 +- src/Lifti.Core/Querying/QueryTokenMatch.cs | 63 -- src/Lifti.Core/Querying/QueryTokenizer.cs | 205 +++- src/Lifti.Core/Querying/ScoredFieldMatch.cs | 196 +++- src/Lifti.Core/Querying/ScoredToken.cs | 46 +- .../Querying/SingleTokenLocationMatch.cs | 71 -- src/Lifti.Core/Querying/UnionMerger.cs | 66 +- .../Querying/WildcardQueryPartParser.cs | 10 +- src/Lifti.Core/ScoreBoostMetadata.cs | 86 ++ src/Lifti.Core/ScoreBoostValues.cs | 163 ++++ src/Lifti.Core/SearchResult.cs | 10 +- src/Lifti.Core/SearchResults.cs | 92 +- .../Serialization/Binary/BinarySerializer.cs | 18 +- .../Binary/DeserializationException.cs | 2 + .../Serialization/Binary/IIndexReader.cs | 23 - .../Serialization/Binary/IIndexSerializer.cs | 16 +- .../Serialization/Binary/IIndexWriter.cs | 10 - .../Serialization/Binary/IndexWriter.cs | 183 ++-- .../Serialization/Binary/V2IndexReader.cs | 71 +- .../Serialization/Binary/V5IndexReader.cs | 111 +-- .../Serialization/Binary/V6IndexReader.cs | 72 ++ .../DeserializedDataCollector.cs | 28 + .../DocumentMetadataCollector.cs | 29 + .../Serialization/IIndexDeserializer.cs | 23 + .../Serialization/IIndexSerializer.cs | 11 + .../Serialization/IndexDeserializerBase.cs | 122 +++ .../Serialization/IndexSerializerBase.cs | 133 +++ .../Serialization/SerializedFieldCollector.cs | 27 + .../Serialization/SerializedFieldIdMap.cs | 25 + .../Serialization/SerializedFieldInfo.cs | 7 + src/Lifti.Core/SerializedFieldInfo.cs | 7 - src/Lifti.Core/SharedPool.cs | 20 +- src/Lifti.Core/ThesaurusBuilder.cs | 4 +- src/Lifti.Core/TokenLocation.cs | 131 ++- src/Lifti.Core/Tokenization/IStemmer.cs | 25 +- src/Lifti.Core/Tokenization/IndexTokenizer.cs | 11 +- .../Objects/AsyncStringArrayFieldReader.cs | 17 +- .../Objects/AsyncStringFieldReader.cs | 17 +- .../Objects/ChildItemDynamicFieldReader.cs | 23 +- .../Objects/DictionaryDynamicFieldReader.cs | 19 +- .../Objects/DynamicFieldReader.cs | 24 +- .../Tokenization/Objects/FieldConfig.cs | 11 +- .../Tokenization/Objects/IFieldConfig.cs | 7 +- .../Objects/IStaticFieldReader.cs | 4 +- .../Objects/ObjectScoreBoostBuilder.cs | 76 ++ .../Objects/ObjectScoreBoostOptions.cs | 65 ++ .../Objects/ObjectTokenization.cs | 41 - .../Objects/ObjectTokenizationBuilder.cs | 238 +++-- .../Objects/ObjectTokenizationLookup.cs | 30 - .../Objects/ObjectTypeConfiguration.cs | 56 ++ .../Objects/ObjectTypeConfigurationLookup.cs | 32 + .../Tokenization/Objects/StaticFieldReader.cs | 8 +- ...ringArrayChildObjectDynamicFieldReader.cs} | 18 +- ...StringArrayDictionaryDynamicFieldReader.cs | 9 +- .../Objects/StringArrayFieldReader.cs | 15 +- ...=> StringChildObjectDynamicFieldReader.cs} | 17 +- .../StringDictionaryDynamicFieldReader.cs | 9 +- .../Tokenization/Objects/StringFieldReader.cs | 19 +- .../InputPreprocessorPipeline.cs | 10 +- .../Tokenization/Stemming/PorterStemmer.cs | 192 ++-- .../Stemming/StringBuilderExtensions.cs | 54 +- src/Lifti.Core/Tokenization/Thesaurus.cs | 7 +- src/Lifti.Core/Tokenization/Token.cs | 32 +- .../Tokenization/TokenExtensions.cs | 18 + src/Lifti.Core/Tokenization/TokenStore.cs | 2 +- src/Lifti.Core/TokenizationOptions.cs | 6 +- src/Lifti.Core/TokenizerBuilder.cs | 44 +- src/Lifti.Core/VirtualString.cs | 5 +- test/Lifti.Tests/ChildNodeMapTests.cs | 83 ++ .../Fakes/FakeFieldScoreBoostProvider.cs | 26 + test/Lifti.Tests/Fakes/FakeIndexMetadata.cs | 61 ++ test/Lifti.Tests/Fakes/FakeIndexSnapshot.cs | 25 + test/Lifti.Tests/Fakes/FakeItemStore.cs | 26 - test/Lifti.Tests/Fakes/FakeQueryContext.cs | 20 - test/Lifti.Tests/Fakes/FakeQueryParser.cs | 2 +- test/Lifti.Tests/Fakes/FakeScorer.cs | 7 +- .../Fakes/FakeTokenLocationMatch.cs | 26 - test/Lifti.Tests/FullTextIndexBuilderTests.cs | 37 +- test/Lifti.Tests/FullTextIndexTests.cs | 174 ++-- test/Lifti.Tests/GlobalSuppressions.cs | 8 + test/Lifti.Tests/IdPoolTests.cs | 169 ---- .../IndexInsertionMutationTests.cs | 60 +- test/Lifti.Tests/IndexMetadataTests.cs | 197 ++++ test/Lifti.Tests/IndexRemovalMutationTests.cs | 22 +- test/Lifti.Tests/IndexedFieldLookupTests.cs | 29 +- test/Lifti.Tests/Lifti.Tests.csproj | 4 +- test/Lifti.Tests/MutationTestBase.cs | 23 +- .../ObjectScoreBoostMetadataTests.cs | 93 ++ .../Querying/CompositeTokenLocationTests.cs | 75 ++ .../CompositeTokenMatchLocationTests.cs | 62 -- .../Querying/DocumentMatchCollectorTests.cs | 71 ++ .../Querying/FakeIndexNavigator.cs | 82 +- test/Lifti.Tests/Querying/FakeQueryPart.cs | 26 +- test/Lifti.Tests/Querying/FieldMatchTests.cs | 27 - .../Querying/IndexNavigatorTests.cs | 142 ++- .../Querying/IntermediateQueryResultTests.cs | 35 + .../Querying/OkapiBm25ScorerTests.cs | 130 ++- .../Querying/PrecedingIntersectMergerTests.cs | 6 +- test/Lifti.Tests/Querying/QueryParserTests.cs | 16 +- .../AdjacentWordsQueryOperatorTests.cs | 14 +- .../QueryParts/AndQueryOperatorTests.cs | 10 +- .../QueryParts/ExactWordQueryPartTests.cs | 73 +- .../FieldFilterQueryOperatorTests.cs | 25 +- .../QueryParts/FuzzyWordQueryPartTests.cs | 74 +- .../QueryParts/NearQueryOperatorTests.cs | 15 +- .../QueryParts/OrQueryOperatorTests.cs | 14 +- .../PrecedingNearQueryOperatorTests.cs | 15 +- .../QueryParts/PrecedingQueryOperatorTests.cs | 14 +- .../QueryParts/WildcardQueryPartTests.cs | 120 ++- test/Lifti.Tests/Querying/QueryTestBase.cs | 65 +- .../Querying/QueryTokenizerTests.cs | 148 ++- .../Querying/ScoredFieldMatchTests.cs | 70 ++ .../Querying/WildcardQueryPartParserTests.cs | 19 +- test/Lifti.Tests/ScoreBoostValuesTests.cs | 143 +++ test/Lifti.Tests/SearchResultsTests.cs | 16 +- .../Serialization/BinarySerializerTests.cs | 137 ++- test/Lifti.Tests/SharedPoolTests.cs | 12 +- test/Lifti.Tests/TestResources.Designer.cs | 10 + test/Lifti.Tests/TestResources.resx | 3 + test/Lifti.Tests/ThesaurusBuilderTests.cs | 3 +- test/Lifti.Tests/ThesaurusTests.cs | 16 +- .../Tokenization/IndexTokenizerTests.cs | 8 +- .../DictionaryDynamicFieldReaderTests.cs | 11 +- .../InputPreprocessorPipelineTests.cs | 2 +- test/Lifti.Tests/TokenizerBuilderTests.cs | 74 +- test/Lifti.Tests/V6.dat | Bin 0 -> 376 bytes test/Lifti.Tests/WordLocationTests.cs | 7 +- .../ChildNodeMapBenchmarks.cs | 109 +++ .../IndexPopulationTests.cs | 2 +- .../IndexSearchingBenchmarks.cs | 5 +- .../PerformanceProfiling.csproj | 1 + .../RemovalFromIndexBenchmarks.cs | 3 +- .../SerializationBenchmarks.cs | 6 +- ...artsWithMultiCharacterWildcardBenchmark.cs | 4 +- 278 files changed, 9725 insertions(+), 4536 deletions(-) create mode 100644 docs/assets/icons/logo.svg create mode 100644 docs/content/en/docs/Custom stemmers/_index.md delete mode 100644 docs/content/en/docs/Index construction/WithDuplicateItemBehavior.md create mode 100644 docs/content/en/docs/Index construction/WithDuplicateKeyBehavior.md create mode 100644 docs/content/en/docs/Reference/Serialization format/PreVersion6.md create mode 100644 docs/content/en/docs/Searching/query-execution.md create mode 100644 docs/static/images/v6-serialization.svg create mode 100644 global.json delete mode 100644 samples/TestConsole/Book.cs delete mode 100644 samples/TestConsole/CompositeKey.cs delete mode 100644 samples/TestConsole/CompositeKeySerializer.cs create mode 100644 samples/TestConsole/CustomStemmerSample.cs create mode 100644 samples/TestConsole/FreshnessBoosting.cs create mode 100644 samples/TestConsole/MagnitudeBoosting.cs create mode 100644 src/Lifti.Core/ChildNodeMap.cs create mode 100644 src/Lifti.Core/ChildNodeMapMutation.cs create mode 100644 src/Lifti.Core/DocumentMatchCollector.cs create mode 100644 src/Lifti.Core/DocumentMetadata.cs rename src/Lifti.Core/{ItemPhrases.cs => DocumentPhrases.cs} (58%) create mode 100644 src/Lifti.Core/DocumentTokenMatchMap.cs create mode 100644 src/Lifti.Core/DocumentTokenMatchMapMutation.cs delete mode 100644 src/Lifti.Core/DuplicateItemBehavior.cs create mode 100644 src/Lifti.Core/DuplicateKeyBehavior.cs delete mode 100644 src/Lifti.Core/IIdPool.cs create mode 100644 src/Lifti.Core/IIndexMetadata.cs delete mode 100644 src/Lifti.Core/IItemMetadata.cs delete mode 100644 src/Lifti.Core/IItemStore.cs delete mode 100644 src/Lifti.Core/IObjectTokenization.cs create mode 100644 src/Lifti.Core/IObjectTypeConfiguration.cs create mode 100644 src/Lifti.Core/ITokenLocation.cs create mode 100644 src/Lifti.Core/IndexMetadata.cs delete mode 100644 src/Lifti.Core/ItemMetadata.cs delete mode 100644 src/Lifti.Core/ItemStore.cs delete mode 100644 src/Lifti.Core/PublicAPI.Unshipped.txt create mode 100644 src/Lifti.Core/PublicApi/MarkShipped.cmd create mode 100644 src/Lifti.Core/PublicApi/MarkShipped.ps1 create mode 100644 src/Lifti.Core/PublicApi/PublicAPI.Shipped.txt create mode 100644 src/Lifti.Core/PublicApi/PublicAPI.Unshipped.txt rename src/Lifti.Core/{ => PublicApi/netstandard2}/PublicAPI.Shipped.txt (53%) create mode 100644 src/Lifti.Core/PublicApi/netstandard2/PublicAPI.Unshipped.txt create mode 100644 src/Lifti.Core/Querying/CompositeTokenLocation.cs delete mode 100644 src/Lifti.Core/Querying/CompositeTokenMatchLocation.cs delete mode 100644 src/Lifti.Core/Querying/FieldMatch.cs create mode 100644 src/Lifti.Core/Querying/FieldScoreBoostProvider.cs create mode 100644 src/Lifti.Core/Querying/IFieldScoreBoostProvider.cs delete mode 100644 src/Lifti.Core/Querying/ITokenLocationMatch.cs create mode 100644 src/Lifti.Core/Querying/QueryContext.cs delete mode 100644 src/Lifti.Core/Querying/QueryParts/IQueryContext.cs delete mode 100644 src/Lifti.Core/Querying/QueryParts/QueryContext.cs create mode 100644 src/Lifti.Core/Querying/QueryParts/ScoreBoostedQueryPart.cs delete mode 100644 src/Lifti.Core/Querying/QueryTokenMatch.cs delete mode 100644 src/Lifti.Core/Querying/SingleTokenLocationMatch.cs create mode 100644 src/Lifti.Core/ScoreBoostMetadata.cs create mode 100644 src/Lifti.Core/ScoreBoostValues.cs delete mode 100644 src/Lifti.Core/Serialization/Binary/IIndexReader.cs delete mode 100644 src/Lifti.Core/Serialization/Binary/IIndexWriter.cs create mode 100644 src/Lifti.Core/Serialization/Binary/V6IndexReader.cs create mode 100644 src/Lifti.Core/Serialization/DeserializedDataCollector.cs create mode 100644 src/Lifti.Core/Serialization/DocumentMetadataCollector.cs create mode 100644 src/Lifti.Core/Serialization/IIndexDeserializer.cs create mode 100644 src/Lifti.Core/Serialization/IIndexSerializer.cs create mode 100644 src/Lifti.Core/Serialization/IndexDeserializerBase.cs create mode 100644 src/Lifti.Core/Serialization/IndexSerializerBase.cs create mode 100644 src/Lifti.Core/Serialization/SerializedFieldCollector.cs create mode 100644 src/Lifti.Core/Serialization/SerializedFieldIdMap.cs create mode 100644 src/Lifti.Core/Serialization/SerializedFieldInfo.cs delete mode 100644 src/Lifti.Core/SerializedFieldInfo.cs create mode 100644 src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostBuilder.cs create mode 100644 src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostOptions.cs delete mode 100644 src/Lifti.Core/Tokenization/Objects/ObjectTokenization.cs delete mode 100644 src/Lifti.Core/Tokenization/Objects/ObjectTokenizationLookup.cs create mode 100644 src/Lifti.Core/Tokenization/Objects/ObjectTypeConfiguration.cs create mode 100644 src/Lifti.Core/Tokenization/Objects/ObjectTypeConfigurationLookup.cs rename src/Lifti.Core/Tokenization/Objects/{StringArrayChildItemDynamicFieldReader.cs => StringArrayChildObjectDynamicFieldReader.cs} (53%) rename src/Lifti.Core/Tokenization/Objects/{StringChildItemDynamicFieldReader.cs => StringChildObjectDynamicFieldReader.cs} (53%) create mode 100644 src/Lifti.Core/Tokenization/TokenExtensions.cs create mode 100644 test/Lifti.Tests/ChildNodeMapTests.cs create mode 100644 test/Lifti.Tests/Fakes/FakeFieldScoreBoostProvider.cs create mode 100644 test/Lifti.Tests/Fakes/FakeIndexMetadata.cs create mode 100644 test/Lifti.Tests/Fakes/FakeIndexSnapshot.cs delete mode 100644 test/Lifti.Tests/Fakes/FakeItemStore.cs delete mode 100644 test/Lifti.Tests/Fakes/FakeQueryContext.cs delete mode 100644 test/Lifti.Tests/Fakes/FakeTokenLocationMatch.cs create mode 100644 test/Lifti.Tests/GlobalSuppressions.cs delete mode 100644 test/Lifti.Tests/IdPoolTests.cs create mode 100644 test/Lifti.Tests/IndexMetadataTests.cs create mode 100644 test/Lifti.Tests/ObjectScoreBoostMetadataTests.cs create mode 100644 test/Lifti.Tests/Querying/CompositeTokenLocationTests.cs delete mode 100644 test/Lifti.Tests/Querying/CompositeTokenMatchLocationTests.cs create mode 100644 test/Lifti.Tests/Querying/DocumentMatchCollectorTests.cs delete mode 100644 test/Lifti.Tests/Querying/FieldMatchTests.cs create mode 100644 test/Lifti.Tests/Querying/IntermediateQueryResultTests.cs create mode 100644 test/Lifti.Tests/Querying/ScoredFieldMatchTests.cs create mode 100644 test/Lifti.Tests/ScoreBoostValuesTests.cs create mode 100644 test/Lifti.Tests/V6.dat create mode 100644 test/PerformanceProfiling/ChildNodeMapBenchmarks.cs diff --git a/.editorconfig b/.editorconfig index 13f0ad73..b3b9f81a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -100,7 +100,7 @@ csharp_prefer_simple_using_statement = true:suggestion csharp_prefer_simple_default_expression = true:suggestion csharp_style_pattern_local_over_anonymous_function = true:suggestion csharp_style_prefer_index_operator = false:suggestion -csharp_style_prefer_range_operator = true:suggestion +csharp_style_prefer_range_operator = false:suggestion csharp_style_unused_value_assignment_preference = discard_variable:none csharp_style_unused_value_expression_statement_preference = discard_variable:none @@ -213,6 +213,7 @@ csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimenta csharp_style_prefer_pattern_matching = true:silent csharp_style_prefer_not_pattern = true:suggestion csharp_style_prefer_extended_property_pattern = true:suggestion +csharp_style_prefer_primary_constructors = false:suggestion [*.{cs,vb}] dotnet_style_operator_placement_when_wrapping = beginning_of_line @@ -248,4 +249,5 @@ dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent dotnet_style_qualification_for_field = true:silent dotnet_style_qualification_for_property = true:silent dotnet_style_qualification_for_method = true:silent -dotnet_style_qualification_for_event = true:silent \ No newline at end of file +dotnet_style_qualification_for_event = true:silent +dotnet_style_prefer_collection_expression = true:suggestion \ No newline at end of file diff --git a/Lifti.sln b/Lifti.sln index dcefc82e..f294c7fa 100644 --- a/Lifti.sln +++ b/Lifti.sln @@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .gitattributes = .gitattributes .gitignore = .gitignore azure-pipelines.yml = azure-pipelines.yml + global.json = global.json LICENSE = LICENSE README.md = README.md EndProjectSection diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3df1ed19..9a142408 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -8,9 +8,9 @@ trigger: - test variables: - majorVersion: 5 + majorVersion: 6 minorVersion: 0 - patchVersion: 0 + patchVersion: 1 project: src/Lifti.Core/Lifti.Core.csproj testProject: test/Lifti.Tests/Lifti.Tests.csproj buildConfiguration: 'Release' @@ -36,6 +36,11 @@ stages: inputs: packageType: 'sdk' version: '7.0.x' + - task: UseDotNet@2 + displayName: "Use .NET 8" + inputs: + packageType: 'sdk' + version: '8.0.x' - task: DotNetCoreCLI@2 displayName: "NuGet Restore" inputs: @@ -82,7 +87,7 @@ stages: - stage: PublishCINugetPackage displayName: Publish to CI feed dependsOn: Build - condition: succeeded() + condition: and(succeeded(), ne(variables['Build.SourceBranch'], 'refs/heads/master')) jobs: - job: PublishCI @@ -95,12 +100,13 @@ stages: - download: current artifact: 'packages' - - task: DotNetCoreCLI@2 + - task: NuGetCommand@2 + displayName: 'Push NuGet Package' inputs: command: 'push' packagesToPush: '$(Pipeline.Workspace)/packages/ci/*.nupkg' - nuGetFeedType: 'internal' - publishVstsFeed: '21c23043-21b0-4e5a-8557-00b88fc52fd4/9f4e269d-a35a-4657-b2a3-b56b01c01f8c' + nuGetFeedType: 'external' + publishFeedCredentials: 'NuGet' - stage: 'PublishReleaseNuGetPackage' displayName: 'Publish Release NuGet Package' diff --git a/docs/assets/icons/logo.svg b/docs/assets/icons/logo.svg new file mode 100644 index 00000000..9260d197 --- /dev/null +++ b/docs/assets/icons/logo.svg @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/docs/content/en/_index.html b/docs/content/en/_index.html index f8de2607..07cde55b 100644 --- a/docs/content/en/_index.html +++ b/docs/content/en/_index.html @@ -3,7 +3,7 @@ linkTitle = "LIFTI" +++ -{{< blocks/cover title="LIFTI" image_anchor="top" height="full" color="orange" >}} +{{< blocks/cover title="LIFTI" image_anchor="top" height="full" >}}
}}"> Learn More diff --git a/docs/content/en/docs/Custom stemmers/_index.md b/docs/content/en/docs/Custom stemmers/_index.md new file mode 100644 index 00000000..9f185f72 --- /dev/null +++ b/docs/content/en/docs/Custom stemmers/_index.md @@ -0,0 +1,38 @@ +--- +title: "Custom stemmers" +linkTitle: "Custom stemmers" +weight: 7 +description: > + You can implement a custom stemmer if the default English Porter stemmer doesn't meet your needs. +--- + +Let's say that for some reason you needed to stem every indexed token so that it was at most 3 characters long: + +```csharp +public class FirstThreeLettersStemmer : IStemmer +{ + public bool RequiresCaseInsensitivity => false; + + public bool RequiresAccentInsensitivity => false; + + public void Stem(StringBuilder builder) + { + if (builder.Length > 3) + { + builder.Length = 3; + } + } +} +``` + +`RequiresCaseInsensitivity` and `RequiresAccentInsensitivity` are hints used by the index at creation time that force it to enable +case/accent sensitivity. Case insensitivity means that any text passed to your stemmer will already be uppercase. Accent insensitivity means +that accents will automatically be stripped prior to being sent to the stemmer. + +Once you've got your stemmer implemented, you just need to give it to the `FullTextIndexBuilder`: + +``` csharp +var index = new FullTextIndexBuilder() + .WithDefaultTokenization(o => o.WithStemming(new FirstThreeLettersStemmer())) + .Build(); +``` diff --git a/docs/content/en/docs/Getting started/_index.md b/docs/content/en/docs/Getting started/_index.md index 521e930e..ecfe2961 100644 --- a/docs/content/en/docs/Getting started/_index.md +++ b/docs/content/en/docs/Getting started/_index.md @@ -11,10 +11,10 @@ Perhaps the simplest way to work with LIFTI is to index text against a key that In this example, we're just indexing three pieces of text against 3 integer keys: ``` c# -// Create a full text index with default settings and integer item keys +// Create a full text index with default settings and integer keys var index = new FullTextIndexBuilder().Build(); -// Index item keys with their associated text +// Index keys with their associated text await index.AddAsync(1, "This is some text associated with A: fizz"); await index.AddAsync(2, "Some buzz text for B"); await index.AddAsync(3, "Text associated with C is both fizz and buzz"); @@ -24,17 +24,17 @@ await index.AddAsync(3, "Text associated with C is both fizz and buzz"); You can search in this index using: ``` c# -// Search for text containing both Fizz *and* Buzz +// Search for documents containing both Fizz *and* Buzz var results = index.Search("Fizz Buzz").ToList(); -// Output: Items with both Fizz and Buzz: 1 -Console.WriteLine($"Items with both Fizz and Buzz: {results.Count}"); +// Output: Documents with both Fizz and Buzz: 1 +Console.WriteLine($"Documents with both Fizz and Buzz: {results.Count}"); -// Search for text containing both Fizz *or* Buzz +// Search for documents containing both Fizz *or* Buzz results = index.Search("Fizz | Buzz").ToList(); -// Outputs: Items with Fizz or Buzz: 3 -Console.WriteLine($"Items with Fizz or Buzz: {results.Count}"); +// Outputs: Documents with Fizz or Buzz: 3 +Console.WriteLine($"Documents with Fizz or Buzz: {results.Count}"); ``` Each set of results returns the keys that the text was indexed against. For example, the first set of results will return a key of 3, diff --git a/docs/content/en/docs/Getting started/indexing-objects.md b/docs/content/en/docs/Getting started/indexing-objects.md index 8fa34f7a..e090ec56 100644 --- a/docs/content/en/docs/Getting started/indexing-objects.md +++ b/docs/content/en/docs/Getting started/indexing-objects.md @@ -26,7 +26,7 @@ Where you want users to be able to search for text in all three Title, Abstract // Books are indexed by their BookId property, which is an int. var bookIndex = new FullTextIndexBuilder() .WithObjectTokenization( - itemOptions => itemOptions + options => options .WithKey(b => b.BookId) .WithField("Title", b => b.Title, tokenOptions => tokenOptions.WithStemming()) @@ -63,15 +63,15 @@ await bookIndex.AddRangeAsync(books); When you get search results back, they will be against the key stored in the index, i.e. the book's id: ``` csharp -// Both books contain "first" - prints "Matched items: 1, 2 with respective scores 0.274884808704732, 0.265418822719626" +// Both books contain "first" - prints "Matched documents: 1, 2 with respective scores 0.274884808704732, 0.265418822719626" var results = bookIndex.Search("first"); Console.WriteLine( - "Matched items: " + + "Matched documents: " + string.Join(", ", results.Select(i => i.Key)) + " with respective scores: " + string.Join(", ", results.Select(i => i.Score))); -// Only first book contains "the" in the title - prints "Matched items: 1" +// Only first book contains "the" in the title - prints "Matched documents: 1" results = bookIndex.Search("title=the"); -Console.WriteLine("Matched items: " + string.Join(", ", results.Select(i => i.Key))); +Console.WriteLine("Matched documents: " + string.Join(", ", results.Select(i => i.Key))); ``` diff --git a/docs/content/en/docs/Index construction/WithDefaultThesaurus.md b/docs/content/en/docs/Index construction/WithDefaultThesaurus.md index 9cbef9d7..44aa07a6 100644 --- a/docs/content/en/docs/Index construction/WithDefaultThesaurus.md +++ b/docs/content/en/docs/Index construction/WithDefaultThesaurus.md @@ -1,6 +1,6 @@ --- -title: "WithDefaultThesaurus" -linkTitle: "WithDefaultThesaurus" +title: "Thesaurus synonyms" +linkTitle: "Thesaurus synonyms" weight: 5 description: > Prescribes how the index should treat terms as synonymous when they are being added to the index. diff --git a/docs/content/en/docs/Index construction/WithDefaultTokenization.md b/docs/content/en/docs/Index construction/WithDefaultTokenization.md index fe63c151..6f7e79bb 100644 --- a/docs/content/en/docs/Index construction/WithDefaultTokenization.md +++ b/docs/content/en/docs/Index construction/WithDefaultTokenization.md @@ -1,9 +1,9 @@ --- -title: "WithDefaultTokenization" -linkTitle: "WithDefaultTokenization" +title: "Default tokenization" +linkTitle: "Default tokenization" weight: 2 description: > - Specifies the default tokenization options that should be used when searching or indexing when no other options are provided. + Specifies the default tokenization options that should be used when searching or indexing when tokenization options are not explicitly specified for an object type. --- ## Example usage @@ -47,12 +47,12 @@ Additionally, characters that can be logically expressed as two characters are e `false`: The tokenizer will be case sensitive. Searching for `Cat` will match `Cat` but not `cat`. -#### `WithStemming(bool)` +#### `WithStemming()` -`true`: Words will be stemmed using an implementation of the Porter Stemmer algorithm. For example, `ABANDON`, `ABANDONED` and `ABANDONING` will all +Words will be stemmed using an implementation of the Porter Stemmer algorithm. For example, `ABANDON`, `ABANDONED` and `ABANDONING` will all be treated as `ABANDON`. Currently only English is supported. -`false`: **Default** No stemming will be performed on words. +A [custom stemmer](../../custom-stemmers/) can be used by implementing an `IStemmer` and using `WithStemming(new YourStemmerImplementation())`. ### Word break modifiers diff --git a/docs/content/en/docs/Index construction/WithDuplicateItemBehavior.md b/docs/content/en/docs/Index construction/WithDuplicateItemBehavior.md deleted file mode 100644 index bc22cb63..00000000 --- a/docs/content/en/docs/Index construction/WithDuplicateItemBehavior.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: "WithDuplicateItemBehavior" -linkTitle: "WithDuplicateItemBehavior" -weight: 5 -description: > - Prescribes how the index should behave when indexing an item that is already present in the index. ---- - -`FullTextIndexBuilder WithDuplicateItemBehavior(DuplicateItemBehavior duplicateItemBehavior)` - -* `DuplicateItemBehavior.ReplaceItem`: **Default** The item will first be removed from the index, then indexed -* `DuplicateItemBehavior.ThrowException`: An exception will be thrown. You can use this if you're not expecting items to be re-indexed and want some indication that your code isn't behaving correctly. - -## Example usage - -``` csharp -var index = new FullTextIndexBuilder() - .WithDuplicateItemBehavior(DuplicateItemBehavior.ThrowException) - .Build(); -``` \ No newline at end of file diff --git a/docs/content/en/docs/Index construction/WithDuplicateKeyBehavior.md b/docs/content/en/docs/Index construction/WithDuplicateKeyBehavior.md new file mode 100644 index 00000000..5ce6beef --- /dev/null +++ b/docs/content/en/docs/Index construction/WithDuplicateKeyBehavior.md @@ -0,0 +1,20 @@ +--- +title: "Managing duplicate keys" +linkTitle: "Managing duplicate keys" +weight: 5 +description: > + Configure how the index should behave when indexing an item that is already present in the index. +--- + +`FullTextIndexBuilder WithDuplicateKeyBehavior(DuplicateKeyBehavior duplicateKeyBehavior)` + +* `DuplicateKeyBehavior.Replace`: **Default** The document associated to the key will first be removed from the index, then indexed +* `DuplicateKeyBehavior.ThrowException`: An exception will be thrown. You can use this if you're not expecting keys to be re-indexed and want some indication that your code isn't behaving correctly. + +## Example usage + +``` csharp +var index = new FullTextIndexBuilder() + .WithDuplicateKeyBehavior(DuplicateKeyBehavior.ThrowException) + .Build(); +``` \ No newline at end of file diff --git a/docs/content/en/docs/Index construction/WithIndexModificationAction.md b/docs/content/en/docs/Index construction/WithIndexModificationAction.md index eef77b67..e34fd574 100644 --- a/docs/content/en/docs/Index construction/WithIndexModificationAction.md +++ b/docs/content/en/docs/Index construction/WithIndexModificationAction.md @@ -1,9 +1,9 @@ --- -title: "WithIndexModificationAction" -linkTitle: "WithIndexModificationAction" +title: "Adding index modification actions" +linkTitle: "Adding index modification actions" weight: 10 description: > - Registers an async action that needs to occur when mutations to the index are committed and a new snapshot is generated. + You can register an async action that needs to occur when mutations to the index are committed and a new snapshot is generated. --- Every time an index is modified, either with a single document being added or a batch change being completed, a new immutable snapshot is created. @@ -11,13 +11,13 @@ This is part of LIFTI's thread safety mechanism. You can hook into this process by registering an action using the `FullTextIndexBuilder.WithIndexModificationAction` method. -This trivial example just logs to the console the number of items in the index whenever a new snapshot is created. +This trivial example just logs to the console the number of documents in the index whenever a new snapshot is created. ``` csharp var index = new FullTextIndexBuilder() .WithIndexModificationAction(async (idx) => { - Console.WriteLine($"Index now contains {idx.IdLookup.Count} items"); + Console.WriteLine($"Index now contains {idx.IdLookup.Count} documents"); }) .Build(); ``` diff --git a/docs/content/en/docs/Index construction/WithObjectTokenization.md b/docs/content/en/docs/Index construction/WithObjectTokenization.md index 0c2e7522..80b7d90f 100644 --- a/docs/content/en/docs/Index construction/WithObjectTokenization.md +++ b/docs/content/en/docs/Index construction/WithObjectTokenization.md @@ -1,6 +1,6 @@ --- -title: "WithObjectTokenization" -linkTitle: "WithObjectTokenization" +title: "Object tokenization" +linkTitle: "Object tokenization" weight: 2 description: > Configure the index to accept a strongly typed object when indexing content. @@ -19,7 +19,7 @@ public class Customer var index = new FullTextIndexBuilder() .WithObjectTokenization(o => o .WithKey(c => c.Id) - .WithField("Name", c => c.Name) + .WithField("Name", c => c.Name, scoreBoost: 1.5D) .WithField("Profile", c => c.ProfileHtml, textExtractor: new XmlTextExtractor()) .WithDynamicFields("Tags", c => c.TagDictionary, "Tag_") .WithDynamicFields( @@ -27,18 +27,23 @@ var index = new FullTextIndexBuilder() c => c.Questions, q => q.QuestionName, q => q.QuestionResponse, - "Question_") + "Question_", + scoreBoost: 1.8D) + .WithScoreBoosting( + boost => boost + .Freshness(c => c.UpdatedDate, 2D) + .Magnitude(c => c.Rating, 2D)) ) .Build(); await index.AddAsync(new Customer { ... }); ``` -## WithKey +## `WithKey` Each object configured against the index must have a key of the same type as the index's key. `WithKey` defines how this key is read from the object. -## WithField +## `WithField` An object can be configured with one *static* fields that are known at compile time. The `WithField` method overloads allow for static fields to be defined. @@ -73,7 +78,11 @@ Equivalent to [WithTextExtraction](./WithTextExtraction) but for use exclusively Equivalent to [WithDefaultThesaurus](./WithDefaultThesaurus) but for use exclusively with this field. Left null, the default thesaurus builder for the index will be used. -## WithDynamicFields +### `scoreBoost` + +The multiplier to apply to the score of this field when ranking results. The default value of 1 is equivalent to no boosting. + +## `WithDynamicFields` In addition to the static fields configured using `WithField`, it is possible to configure dynamic fields that are not known at compile time. The `WithDynamicFields` overloads allow for dynamic field readers to be defined, each of which will be invoked to retrieve the field names for the object being indexed. @@ -105,14 +114,28 @@ Or you can provice a function that returns a collection of *child objects*: These last two overloads also require you provide two more delegates via the `getFieldName` and `getFieldText` parameters. These delegates are used to extract the field name and text from each child object. -### fieldNamePrefix +### `fieldNamePrefix` The prefix to use when constructing the field name. This is useful when the dynamic fields can produce the same field name as a static field, or a dynamic field from another dynamic field reader. ### Other `WithDynamicFields` parameters -The `tokenizationOptions`, `textExtractor` and `thesaurusOptions` parameters are equivalent to their `WithField` counterparts. +The `tokenizationOptions`, `textExtractor`, `thesaurusOptions` and `scoreBoost` parameters are equivalent to their `WithField` counterparts. + +## `WithScoreBoosting` + +Configures the score boosting options for the object type. These allow you to promote documents associated to objects based on related data. + +### `Freshness` + +Freshness boosting allows you to boost results based on a date associated to the object. For example, assuming all the documents have exactly the same text +and a multiplier of 3 is specified, then the score of the newest document will be 3 times higher than the oldest. + +### `Magnitude` + +Magnitude boosting allows you to boost results based on a numeric value associated to the object. For example, if you used this with a "star rating" property, +documents with a higher rating will be more likely to appear nearer the top of search results. ## Indexing multiple object types diff --git a/docs/content/en/docs/Index construction/WithQueryParser.md b/docs/content/en/docs/Index construction/WithQueryParser.md index f7cf2883..725025de 100644 --- a/docs/content/en/docs/Index construction/WithQueryParser.md +++ b/docs/content/en/docs/Index construction/WithQueryParser.md @@ -1,9 +1,9 @@ --- -title: "WithQueryParser" -linkTitle: "WithQueryParser" +title: "Query parser configuration" +linkTitle: "Query parser configuration" weight: 5 description: > - Prescribes how the QueryParser implementation should be configured for the index. + Configure how the LIFTI query parser should operate for the index. --- ## Configuring the default LIFTI `QueryParser` diff --git a/docs/content/en/docs/Index construction/WithSimpleQueryParser.md b/docs/content/en/docs/Index construction/WithSimpleQueryParser.md index b77c129c..479eac4a 100644 --- a/docs/content/en/docs/Index construction/WithSimpleQueryParser.md +++ b/docs/content/en/docs/Index construction/WithSimpleQueryParser.md @@ -1,9 +1,9 @@ --- -title: "WithSimpleQueryParser" -linkTitle: "WithSimpleQueryParser" +title: "Using the simple query parser" +linkTitle: "Using the simple query parser" weight: 6 description: > - Prescribes the use of a simple query parser that you can use when you don't want queries to make use of the full LIFTI query syntax. + You can use a simple query parser when you don't want queries to make use of the full LIFTI query syntax. --- ## Configuring the `SimpleQueryParser` diff --git a/docs/content/en/docs/Index construction/WithTextExtraction.md b/docs/content/en/docs/Index construction/WithTextExtraction.md index 755bb7d9..faea7528 100644 --- a/docs/content/en/docs/Index construction/WithTextExtraction.md +++ b/docs/content/en/docs/Index construction/WithTextExtraction.md @@ -1,6 +1,6 @@ --- -title: "WithTextExtraction" -linkTitle: "WithTextExtraction" +title: "Document text extraction" +linkTitle: "Document text extraction" weight: 6 description: > Text extraction is the process by which fragments of text are lifted from a larger body of text prior to tokenization. diff --git a/docs/content/en/docs/Index construction/_index.md b/docs/content/en/docs/Index construction/_index.md index 312df832..5e18e141 100644 --- a/docs/content/en/docs/Index construction/_index.md +++ b/docs/content/en/docs/Index construction/_index.md @@ -9,7 +9,7 @@ description: > ## `FullTextIndexBuilder` `FullTextIndexBuilder` requires a single generic type provided to it. -This defines the type of the key that items will be indexed against. +This defines the type of the key that documents will be indexed against. In simple cases this will just be a `string`, `Guid`, `Int32` or `UInt32`. Indexes can be built with other key types, including composite types, but special care needs diff --git a/docs/content/en/docs/Indexing mutations/Batch Mutations.md b/docs/content/en/docs/Indexing mutations/Batch Mutations.md index be22dd67..dec11faf 100644 --- a/docs/content/en/docs/Indexing mutations/Batch Mutations.md +++ b/docs/content/en/docs/Indexing mutations/Batch Mutations.md @@ -14,4 +14,4 @@ await index.AddAsync(3, "Text associated with C is both fizz and buzz"); await index.CommitBatchChangeAsync(); ``` -Only once `CommitBatchChangeAsync` has been called will the new data be available in the index for searching. If a search operation was to have been performed between any of the calls to `AddAsync` then the previous snapshot would have been used and it would be as if the new items had not been added yet. +Only once `CommitBatchChangeAsync` has been called will the new data be available in the index for searching. If a search operation was to have been performed between any of the calls to `AddAsync` then the previous snapshot would have been used and it would be as if the new documents had not been added yet. diff --git a/docs/content/en/docs/Indexing mutations/_index.md b/docs/content/en/docs/Indexing mutations/_index.md index 4da52edd..09748f18 100644 --- a/docs/content/en/docs/Indexing mutations/_index.md +++ b/docs/content/en/docs/Indexing mutations/_index.md @@ -8,17 +8,17 @@ description: > ## Indexing text -As shown in [this example](../Getting%20started), if all you have is a key and text to be indexed for it, you can just make use one of the `AddAsync` overloads that operate on `string`s - there is no need to construct any artificial objects in order to populate the index. +As shown in [this example](../getting-started), if all you have is a key and text to be indexed for it, you can just make use one of the `AddAsync` overloads that operate on `string`s - there is no need to construct any artificial objects in order to populate the index. Each of these methods will use the default text extraction and tokenization configured for the index and the default field id, `IFullTextIndex.FieldLookup.DefaultField`. -### `Task AddAsync(TKey itemKey, string text)` +### `Task AddAsync(TKey key, string text)` -Adds an item to the index treating the single `string` as the complete document text. +Adds a document to the index treating the single `string` as the complete document text. -### `Task AddAsync(TKey itemKey, IEnumerable text)` +### `Task AddAsync(TKey key, IEnumerable text)` -Adds an item to the index with multiple text fragments. Each fragment is considered to be fragments of the same text, i.e. the index and offset of tokens increments across the elements in the `IEnumerable`. +Adds a document to the index with multiple text fragments. Each fragment is considered to be fragments of the same text, i.e. the index and offset of tokens increments across the elements in the `IEnumerable`. It's worth noting that each fragment is processed independently, so an implicit word break exists between element. This means that `AddAsync(1, new[ "a", "b" ])` will index two words (`a` and `b`) whereas if the two strings had been naively concatenated together, only one (`ab`) would, as there was no word break between them. @@ -28,10 +28,10 @@ In order to index an object against the index it needs to have first been config See the [Indexing objects](../getting-started/indexing-objects) quick start for an example of this in action. -### `Task AddAsync(TItem item)` +### `Task AddAsync(TObject item)` -Adds a single item to the index. +Adds a single document to the index. -### `Task AddRangeAsync(IEnumerable items)` +### `Task AddRangeAsync(IEnumerable items)` -Adds a set of items to the index in a single mutation. This is more efficient than making multiple calls to `AddAsync` unless a batch mutation has already been manually started. +Adds a set of documents to the index in a single mutation. This is more efficient than making multiple calls to `AddAsync` unless a [batch mutation](./batch-mutations) has already been manually started. diff --git a/docs/content/en/docs/Reference/Serialization format/PreVersion6.md b/docs/content/en/docs/Reference/Serialization format/PreVersion6.md new file mode 100644 index 00000000..8eaaa382 --- /dev/null +++ b/docs/content/en/docs/Reference/Serialization format/PreVersion6.md @@ -0,0 +1,13 @@ +--- +title: "V5 Serialization File Format" +linkTitle: "V5 Serialization File Format" +date: 2024-01-14 +description: > + Documentation for older serialization formats. +--- + +## Version 5 (v5.0.0) + +![LIFTI Serialization Format](../../../../images/v5-serialization.svg) + +- New in version 5 is the list of fields in the index. This is used upon deserialization to rehydrate the dynamic fields and ensure that the field names in the index being deserialized into are mapped correctly to fields in the serialized index. \ No newline at end of file diff --git a/docs/content/en/docs/Reference/Serialization format/_index.md b/docs/content/en/docs/Reference/Serialization format/_index.md index a1862b91..64bcc86a 100644 --- a/docs/content/en/docs/Reference/Serialization format/_index.md +++ b/docs/content/en/docs/Reference/Serialization format/_index.md @@ -1,17 +1,17 @@ --- title: "Serialization File Format" linkTitle: "Serialization File Format" -date: 2023-07-04 +date: 2024-01-14 description: > - The current serialization format is version 5. + The current serialization format is version 6. --- -## Version 5 (v5.0.0) +## Version 6 (v6.0.0) -![LIFTI Serialization Format](../../../images/v5-serialization.svg) +![LIFTI Serialization Format](../../../images/v6-serialization.svg) Notes: -- Versions 2 to 4 are readable as a one-time conversion but always written back as version 5. +- Versions 2 to 5 are readable as a one-time conversion but always written back as version 6. - Int32s are written as *positive* values using 7-bit encoding. This means that the maximum value is 2,147,483,647, apart from Int32s written by the `IntFormatterKeySerializer` which can't make the assumption that the value is always positive. For these, values are written using zig-zag encoding. -- New in version 5 is the list of fields in the index. This is used upon deserialization to rehydrate the dynamic fields and ensure that the field names in the index being deserialized into are mapped correctly to fields in the serialized index. \ No newline at end of file +- New in version 6 is the storage of object type and scoring metadata information for a document in the index, including an internal object type id it was extracted for, freshness date and scoring magnitude, if applicable. \ No newline at end of file diff --git a/docs/content/en/docs/Searching/Search results/Scoring.md b/docs/content/en/docs/Searching/Search results/Scoring.md index ea1e6095..c187a0aa 100644 --- a/docs/content/en/docs/Searching/Search results/Scoring.md +++ b/docs/content/en/docs/Searching/Search results/Scoring.md @@ -1,6 +1,6 @@ --- -title: "Search Result Scoring" -linkTitle: "Result Scoring" +title: "Search result scoring" +linkTitle: "Scoring" weight: 2 description: > Explains how the results provided in `ISearchResults` are ordered, and how LIFTI calculates its scores. @@ -43,4 +43,13 @@ PrintSearchResults(results); // Output // 1 // 2 -``` \ No newline at end of file +``` + +## Score boosting + +You can boost scores in a number of ways: + +* Boosting search terms as [part of the query](../../lifti-query-syntax#score-boosting): `thanks^1.5`. +* Boosting [fields](../../../index-construction/withobjecttokenization#scoreboost) associated to an object: `.WithField("Name", c => c.Name, scoreBoost: 1.5D)`. +* Boosting objects based on a [freshness date](../../../index-construction/withobjecttokenization#withscoreboosting), e.g. the date it was last updated. +* Boosting objects based on a [magnitude value](../../../index-construction/withobjecttokenization#withscoreboosting), e.g. a star rating. \ No newline at end of file diff --git a/docs/content/en/docs/Searching/Search results/_index.md b/docs/content/en/docs/Searching/Search results/_index.md index c92c93d5..dcda7d90 100644 --- a/docs/content/en/docs/Searching/Search results/_index.md +++ b/docs/content/en/docs/Searching/Search results/_index.md @@ -9,31 +9,31 @@ description: "`FullTextIndex.Search` returns `ISearchResults`, which imple Search results are returned sorted according to the total document score, in descending order. See [scoring](./scoring) for more information. -## SearchResult<T> +## `SearchResult<TKey>` -### T Item { get; } +### `TKey Key { get; }` -The item that matched the search criteria. +The key for the document that matched the search criteria. -### IReadOnlyList<FieldSearchResult> FieldMatches { get; } +### `IReadOnlyList<FieldSearchResult> FieldMatches { get; }` -The fields that were matched for the item. Each of these is scored independently and provides detailed information +The fields that were matched for the document. Each of these is scored independently and provides detailed information about the location of the words that were matched. -### double Score { get; } +### `double Score { get; }` The overall score for this match. This is a sum of the scores for this instance's FieldMatches. -## FieldSearchResult +## `FieldSearchResult` -### string FoundIn { get; } +### `string FoundIn { get; }` The name of the field that the search results were found in. This will be one of the field names configured when the index was built, or `Unspecified` if no fields were configured. -### double Score { get; } +### `double Score { get; }` The score for this particular field. -### IReadOnlyList<WordLocation> Locations { get; } +### `IReadOnlyList<TokenLocation> Locations { get; }` -The `WordLocation` instances for the locations of the matched words in the field. +The `TokenLocation` instances for the locations of the matched tokens in the field. diff --git a/docs/content/en/docs/Searching/Search results/extracting-matched-phrases.md b/docs/content/en/docs/Searching/Search results/extracting-matched-phrases.md index b5bfa444..2a347fd9 100644 --- a/docs/content/en/docs/Searching/Search results/extracting-matched-phrases.md +++ b/docs/content/en/docs/Searching/Search results/extracting-matched-phrases.md @@ -8,18 +8,18 @@ description: "`ISearchResults` provides methods that allow you to combine the ## `CreateMatchPhrasesAsync` and `CreateMatchPhrases` The intent of the `ISearchResults.CreateMatchPhrasesAsync` methods is to allow you to provide a mechanism by which -LIFTI can retrieve the original source text, from items or loosely indexed text, and automatically +LIFTI can retrieve the original source text, from objects or loosely indexed text, and automatically extract phrases for the matched token locations. Where a multiple tokens are matched in a sequence, they will be combined into a single phrase. -For the `CreateMatchPhrasesAsync` overloads that create the matched phrases from an indexed item of type `TItem`, -you can either fetch the items one at a time, or be provided with a list of item ids and fetch all the relevant items +For the `CreateMatchPhrasesAsync` overloads that create the matched phrases from an indexed object of type `TObject`, +you can either fetch the objects one at a time, or be provided with a list of keys and fetch all the relevant objects in bulk. The latter is more efficient if you are having to go to an external source for the data, e.g. using a database query. ### Example -This example demonstrates searching against an index of `Book` items: +This example demonstrates searching against an index of `Book` objects: ``` csharp public class Book diff --git a/docs/content/en/docs/Searching/_index.md b/docs/content/en/docs/Searching/_index.md index 9bfb832b..01fd674a 100644 --- a/docs/content/en/docs/Searching/_index.md +++ b/docs/content/en/docs/Searching/_index.md @@ -29,7 +29,3 @@ var query = new Query( new ExactWordQueryPart(tokenizer.Normalize("hello")), new ExactWordQueryPart(tokenizer.Normalize("there")))); ``` - - - - diff --git a/docs/content/en/docs/Searching/field-information.md b/docs/content/en/docs/Searching/field-information.md index 8c47ec5d..768b75e2 100644 --- a/docs/content/en/docs/Searching/field-information.md +++ b/docs/content/en/docs/Searching/field-information.md @@ -40,7 +40,7 @@ This abstract class contains information about a field that has been configured #### Methods -- **ReadAsync(object item, CancellationToken cancellationToken)**: Reads the text for the field from the specified item. The item must be of the type specified by the `ObjectType` property. +- **ReadAsync(object item, CancellationToken cancellationToken)**: Reads the text for the field from the specified object. The object must be of the type specified by the `ObjectType` property. ## `IsKnownField(Type objectType, string fieldName)` diff --git a/docs/content/en/docs/Searching/lifti-query-syntax.md b/docs/content/en/docs/Searching/lifti-query-syntax.md index db8ba011..e5708eb4 100644 --- a/docs/content/en/docs/Searching/lifti-query-syntax.md +++ b/docs/content/en/docs/Searching/lifti-query-syntax.md @@ -13,6 +13,7 @@ description: > Example|Meaning -|- West|**West** must appear in the text [exactly](#exact-word-matches). +West\|Wing^2|**West** or **Wing** must appear in the text [exactly](#exact-word-matches), where matches on **Wing** will have a [score boost](#score-boosting) of 2. ?Wst|Words that [fuzzy match](#fuzzy-match-) with **wst** must appear in the text. ?3,2?Wst|Words that [fuzzy match](#fuzzy-match-) with **wst** must appear in the text, with a specified max edit distance and max sequential edits. title=West|A [field restricted](#field-restrictions-field) search. **West** must appear in the ***title*** field of an indexed object. @@ -86,7 +87,7 @@ Examples: The and operator (`&`) Performs an intersection of two intermediate query results, combining word positions for successful matches. -`Food & Burger` searches for items containing both `"food"` and `"burger"` at any position, and in any field. +`Food & Burger` searches for documents containing both `"food"` and `"burger"` at any position, and in any field. (Alternatively `Food Burger` will have the same effect as the default operator between query parts is an `&`.) @@ -94,7 +95,7 @@ The and operator (`&`) Performs an intersection of two intermediate query result ### Or (`|`) -Performs a union of two intermediate query results. Where an items appears in both sets, word positions are combined into one list. +Performs a union of two intermediate query results. Where a document appears in both sets, word positions are combined into one list. Restricts results to same field by default: **false** @@ -112,9 +113,11 @@ e.g. `(food & cake) | (cheese & biscuit)` These allow for restricting searches within a given field. -`title=analysis | body=(chocolate & cake)` Searches for items with `"analysis"` in the title field *or both* `"chocolate"` and `"cake"` in the body field. +`title=analysis | body=(chocolate & cake)` Searches for documents with `"analysis"` in the title field *or both* `"chocolate"` and `"cake"` in the body field. -`title=analysis food` Searches for items with `"analysis"` in the title field *and* `"food"` in *any* field. +`title=analysis food` Searches for documents with `"analysis"` in the title field *and* `"food"` in *any* field. + +If your field name contains spaces or other special characters, you can escape it using square brackets `[` and `]`, e.g. `[my field]=chocolate`. --- @@ -123,7 +126,7 @@ These allow for restricting searches within a given field. Placing quotes around a search phrase will enforce that the words all appear immediately next to each other in the source text. -`"cheese burger"` will only match items that have text containing `"cheese"` followed immediately by `"burger"`. +`"cheese burger"` will only match documents that have text containing `"cheese"` followed immediately by `"burger"`. --- @@ -133,7 +136,7 @@ The near operator performs a positional intersection of two results based on the The `~` operator requires that words must be within 5 words of one another. This can value can be controlled by specifying a number, e.g. `~4` to restrict to only returning results within 4 words of one another. -`cheese ~ cake` will return items containing the words `"cheese"` and `"cake"` in either order, up to 5 words apart, e.g. `"the cake was made with cheese"` and `"I like cheese and cake"` would both match, but `"cake is never to be considered a substitute for cheese"` would not. +`cheese ~ cake` will return documents containing the words `"cheese"` and `"cake"` in either order, up to 5 words apart, e.g. `"the cake was made with cheese"` and `"I like cheese and cake"` would both match, but `"cake is never to be considered a substitute for cheese"` would not. --- @@ -150,3 +153,12 @@ Same as Near (`~`) except that order is important in the positional intersection Same as Near Following (`~>`) except there are no constraints on how far apart the words can be. `cheese > cake` will match any text where `"cheese"` precedes `"cake"` in a given field. + +## Score boosting + +Wildcard, fuzzy match and exact match search terms can have their resulting scores boosted by adding `^n` after them. For example, `wild^2` will boost matches of "wild" by 2x. + +## Escaping search text + +Use a backslash `\` when you want to explicitly search for a character that clashes with the query syntax. For example, `A\=B` will search for a single token containing +exactly "A=B", rather than attempting to perform a field restricted search. \ No newline at end of file diff --git a/docs/content/en/docs/Searching/manually-constructing-queries.md b/docs/content/en/docs/Searching/manually-constructing-queries.md index e361bf92..4235c0c7 100644 --- a/docs/content/en/docs/Searching/manually-constructing-queries.md +++ b/docs/content/en/docs/Searching/manually-constructing-queries.md @@ -91,28 +91,28 @@ A query part requiring that a series of matches must appear in a document in seq > `field=` in LIFTI query syntax -Restricts the resulting item matches to only those that include matching tokens in a specific field. +Restricts the resulting document matches to only those that include matching tokens in a specific field. #### `NearQueryOperator(IQueryPart left, IQueryPart right, int tolerance = 5)` > `~n` in LIFTI query syntax -Produces an intersection of two `IQueryPart`s, restricting an item's field matches such that the locations are close to one another. +Produces an intersection of two `IQueryPart`s, restricting an document's field matches such that the locations are close to one another. -Items that result in no field matches are filtered out. +Documents that result in no field matches are filtered out. #### `PrecedingNearQueryOperator(IQueryPart left, IQueryPart right, int tolerance = 5)` > `~n>` in LIFTI query syntax -Produces an intersection of two `IQueryPart`s, restricting an item's field matches such that the locations of the first appear before the locations of the second and within a specified tolerance. +Produces an intersection of two `IQueryPart`s, restricting an document's field matches such that the locations of the first appear before the locations of the second and within a specified tolerance. -Items that result in no field matches are filtered out. +Documents that result in no field matches are filtered out. #### `PrecedingQueryOperator(IQueryPart left, IQueryPart right)` > `>` in LIFTI query syntax -Produces an intersection of two `IQueryPart`s, restricting an item's field matches such that the locations of the first appear before the locations of the second. +Produces an intersection of two `IQueryPart`s, restricting an document's field matches such that the locations of the first appear before the locations of the second. -Items that result in no field matches are filtered out. \ No newline at end of file +Documents that result in no field matches are filtered out. \ No newline at end of file diff --git a/docs/content/en/docs/Searching/query-execution.md b/docs/content/en/docs/Searching/query-execution.md new file mode 100644 index 00000000..95eace38 --- /dev/null +++ b/docs/content/en/docs/Searching/query-execution.md @@ -0,0 +1,32 @@ +--- +title: "Query execution" +linkTitle: "Query execution" +weight: 2 +description: > + LIFTI attempts to optimize the order that query parts are executed to make queries as efficient as possible. +--- + +LIFTI's query execution logic employs a strategic approach to optimize query performance, prioritizing the execution of query parts based on their calculated weights. +This weight represents the relative cost and effectiveness of executing a particular query part in relation to the overall document set. By assigning these weights, +LIFTI aims to minimize the number of documents involved early in the query execution, thereby reducing the computational load and improving efficiency. + +### Scoring Mechanism Overview + +1. **General Principle**: Each query part is assigned a "weight" based on its expected execution cost and the number of documents it is likely to involve. +The objective is to execute less costly parts first, reducing the document set size early in the process. + +2. **Weighting Calculations for Different Query Parts**: + - **ExactWordQueryPart**: Uses inverse document count. Weight = `Matching Document Count / Total Document Count`. Lower weights will result for words bringing in fewer documents. + - **WildcardQueryPart**: A complex calculation involving the count of text matches, multi-character, and single-character wildcard matches, with different multipliers. Weight is reduced if the first query part is a text match, and increased it it has a leading multi-character match. + - **FuzzyMatchQueryPart**: Based on the ExactWordQueryPart's weighting, adding a factor for the number of edits allowed. Weight increases with more permissible edits. + - **AdjacentWordsQueryPart**: Assumes the score of the first part of the query, adjusted by the inverse of the number of parts. + - **Intersection Query Parts** (AndQueryPart, PrecedingNearQueryPart, NearQueryPart, PrecedingQueryPart): Promotes parts based on the lowest scoring part of the intersection, encouraging execution of the cheapest part first. + - **OrQueryOperator**: A union operation always requires both sides to be evaluated, so weight = `Left Score + Right Score`. + - **FieldFilterQueryOperator**: Applies additional filtering, thus promoted. Weight = `Child Part Score * 0.5`. + - **BracketedQueryPart**: Reflects the score of the child part. + +### Execution Strategy + +The execution order is determined by these weightings, with lower weights prioritized. This approach ensures that the query processor can efficiently +filter and score documents, avoiding unnecessary computations on documents that would later be excluded. This system is especially beneficial in queries with multiple parts, +where early reduction in document set size can lead to significant performance improvements. diff --git a/docs/content/en/docs/Serialization/_index.md b/docs/content/en/docs/Serialization/_index.md index 2c5acc8c..73479c49 100644 --- a/docs/content/en/docs/Serialization/_index.md +++ b/docs/content/en/docs/Serialization/_index.md @@ -40,9 +40,9 @@ var newIndex = new FullTextIndexBuilder().Build(); await serializer.DeserializeAsync(newIndex, stream, disposeStream: false); // Prove that the new index has the same contents -// Emits: 3 items contain text in the new index +// Emits: 3 documents contain text in the new index var matches = newIndex.Search("text"); -Console.WriteLine($"{matches.Count()} items contain text in the new index"); +Console.WriteLine($"{matches.Count()} documents contain text in the new index"); ``` If you want to understand how the binary data is laid out, you can have a look at the [Serialization Format](../reference/serialization-format) reference page. diff --git a/docs/content/en/docs/Serialization/key-serialization.md b/docs/content/en/docs/Serialization/key-serialization.md index 8b0ab553..d253f360 100644 --- a/docs/content/en/docs/Serialization/key-serialization.md +++ b/docs/content/en/docs/Serialization/key-serialization.md @@ -40,7 +40,7 @@ await index.AddAsync(new CompositeKey(3, 11), "Text associated with C is both fi ``` > `CompositeKey` could also be the key of an object that you want to index. You'd just need to use the -> configure the `FullTextIndexBuilder` with the appropriate `WithKey` call when setting up the item tokenization options. +> configure the `FullTextIndexBuilder` with the appropriate `WithKey` call when setting up the object tokenization options. The only additional work comes when constructing the BinarySerializer - here you need to pass a custom `IKeySerializer` implementation so that the serializer knows how to read and write the data in your custom key: diff --git a/docs/package-lock.json b/docs/package-lock.json index a5bd1b7f..27413b6b 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -1,7 +1,7 @@ { "name": "lifti-doc", "version": "0.0.1", - "lockfileVersion": 2, + "lockfileVersion": 3, "requires": true, "packages": { "": { @@ -10,7 +10,7 @@ "license": "ISC", "devDependencies": { "autoprefixer": "^10.4.16", - "postcss-cli": "^10.1.0" + "postcss-cli": "^11.0.0" } }, "node_modules/@nodelib/fs.scandir": { @@ -48,6 +48,18 @@ "node": ">= 8" } }, + "node_modules/@sindresorhus/merge-streams": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@sindresorhus/merge-streams/-/merge-streams-1.0.0.tgz", + "integrity": "sha512-rUV5WyJrJLoloD4NDN1V1+LDMDWOa4OTsT4yYJwQNpTU6FWxkxHpL7eu4w+DmiH8x/EAM1otkPE1+LaspIbplw==", + "dev": true, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", @@ -73,9 +85,9 @@ } }, "node_modules/anymatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.2.tgz", - "integrity": "sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==", + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", "dev": true, "dependencies": { "normalize-path": "^3.0.0", @@ -144,9 +156,9 @@ } }, "node_modules/browserslist": { - "version": "4.22.1", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.22.1.tgz", - "integrity": "sha512-FEVc202+2iuClEhZhrWy6ZiAcRLvNMyYcxZ8raemul1DYVOVdFsbqckWLdsixQZCpJlwe77Z3UTalE7jsjnKfQ==", + "version": "4.22.2", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.22.2.tgz", + "integrity": "sha512-0UgcrvQmBDvZHFGdYUehrCNIazki7/lUP3kkoi/r3YB2amZbFM9J43ZRkJTXBUZK4gmx56+Sqk9+Vs9mwZx9+A==", "dev": true, "funding": [ { @@ -163,9 +175,9 @@ } ], "dependencies": { - "caniuse-lite": "^1.0.30001541", - "electron-to-chromium": "^1.4.535", - "node-releases": "^2.0.13", + "caniuse-lite": "^1.0.30001565", + "electron-to-chromium": "^1.4.601", + "node-releases": "^2.0.14", "update-browserslist-db": "^1.0.13" }, "bin": { @@ -176,9 +188,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001557", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001557.tgz", - "integrity": "sha512-91oR7hLNUP3gG6MLU+n96em322a8Xzes8wWdBKhLgUoiJsAF5irZnxSUCbc+qUZXNnPCfUwLOi9ZCZpkvjQajw==", + "version": "1.0.30001576", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001576.tgz", + "integrity": "sha512-ff5BdakGe2P3SQsMsiqmt1Lc8221NR1VzHj5jXN5vBny9A6fpze94HiVV/n7XRosOlsShJcvMv5mdnpjOGCEgg==", "dev": true, "funding": [ { @@ -223,14 +235,17 @@ } }, "node_modules/cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", "dev": true, "dependencies": { "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", + "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" } }, "node_modules/color-convert": { @@ -260,22 +275,10 @@ "node": ">= 0.6.0" } }, - "node_modules/dir-glob": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", - "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", - "dev": true, - "dependencies": { - "path-type": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/electron-to-chromium": { - "version": "1.4.569", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.569.tgz", - "integrity": "sha512-LsrJjZ0IbVy12ApW3gpYpcmHS3iRxH4bkKOW98y1/D+3cvDUWGcbzbsFinfUS8knpcZk/PG/2p/RnkMCYN7PVg==", + "version": "1.4.630", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.630.tgz", + "integrity": "sha512-osHqhtjojpCsACVnuD11xO5g9xaCyw7Qqn/C2KParkMv42i8jrJJgx3g7mkHfpxwhy9MnOJr8+pKOdZ7qzgizg==", "dev": true }, "node_modules/emoji-regex": { @@ -294,9 +297,9 @@ } }, "node_modules/fast-glob": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz", - "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==", + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", + "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", "dev": true, "dependencies": { "@nodelib/fs.stat": "^2.0.2", @@ -310,9 +313,9 @@ } }, "node_modules/fastq": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz", - "integrity": "sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw==", + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.16.0.tgz", + "integrity": "sha512-ifCoaXsDrsdkWTtiNJX5uzHDsrck5TzfKKDcuFFTIrrc/BS076qgEIfoIy1VeZqViznfKiysPYTh/QeHtnIsYA==", "dev": true, "dependencies": { "reusify": "^1.0.4" @@ -344,9 +347,9 @@ } }, "node_modules/fs-extra": { - "version": "11.1.1", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz", - "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==", + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", + "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==", "dev": true, "dependencies": { "graceful-fs": "^4.2.0", @@ -358,9 +361,9 @@ } }, "node_modules/fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", "dev": true, "hasInstallScript": true, "optional": true, @@ -405,31 +408,20 @@ } }, "node_modules/globby": { - "version": "13.2.2", - "resolved": "https://registry.npmjs.org/globby/-/globby-13.2.2.tgz", - "integrity": "sha512-Y1zNGV+pzQdh7H39l9zgB4PJqjRNqydvdYCDG4HFXM4XuvSaQQlEc91IU1yALL8gUTDomgBAfz3XJdmUS+oo0w==", + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-14.0.0.tgz", + "integrity": "sha512-/1WM/LNHRAOH9lZta77uGbq0dAEQM+XjNesWwhlERDVenqothRbnzTrL3/LrIoEPPjeUHC3vrS6TwoyxeHs7MQ==", "dev": true, "dependencies": { - "dir-glob": "^3.0.1", - "fast-glob": "^3.3.0", + "@sindresorhus/merge-streams": "^1.0.0", + "fast-glob": "^3.3.2", "ignore": "^5.2.4", - "merge2": "^1.4.1", - "slash": "^4.0.0" + "path-type": "^5.0.0", + "slash": "^5.1.0", + "unicorn-magic": "^0.1.0" }, "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/globby/node_modules/slash": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-4.0.0.tgz", - "integrity": "sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==", - "dev": true, - "engines": { - "node": ">=12" + "node": ">=18" }, "funding": { "url": "https://github.com/sponsors/sindresorhus" @@ -442,9 +434,9 @@ "dev": true }, "node_modules/ignore": { - "version": "5.2.4", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz", - "integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==", + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.0.tgz", + "integrity": "sha512-g7dmpshy+gD7mh88OC9NwSGTKoc3kyLAZQRU1mt53Aw/vnvfXnbC+F/7F7QoYVKbV+KNvJx8wArewKy1vXMtlg==", "dev": true, "engines": { "node": ">= 4" @@ -465,7 +457,7 @@ "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", "dev": true, "engines": { "node": ">=0.10.0" @@ -514,12 +506,12 @@ } }, "node_modules/lilconfig": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", - "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.0.0.tgz", + "integrity": "sha512-K2U4W2Ff5ibV7j7ydLr+zLAkIg5JJ4lPn1Ltsdt+Tz/IjQ8buJ55pZAxoP34lqIiwtF9iAvtLv3JGv7CAyAg+g==", "dev": true, "engines": { - "node": ">=10" + "node": ">=14" } }, "node_modules/merge2": { @@ -545,9 +537,9 @@ } }, "node_modules/nanoid": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.6.tgz", - "integrity": "sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==", + "version": "3.3.7", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", + "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", "dev": true, "funding": [ { @@ -564,9 +556,9 @@ } }, "node_modules/node-releases": { - "version": "2.0.13", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.13.tgz", - "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==", + "version": "2.0.14", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", + "integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==", "dev": true }, "node_modules/normalize-path": { @@ -581,19 +573,22 @@ "node_modules/normalize-range": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", - "integrity": "sha1-LRDAa9/TEuqXd2laTShDlFa3WUI=", + "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==", "dev": true, "engines": { "node": ">=0.10.0" } }, "node_modules/path-type": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", - "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-5.0.0.tgz", + "integrity": "sha512-5HviZNaZcfqP95rwpv+1HDgUamezbqdSYTyzjTvwtJSnIH+3vnbmWsItli8OFEndS984VT55M3jduxZbX351gg==", "dev": true, "engines": { - "node": ">=8" + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/picocolors": { @@ -617,16 +612,16 @@ "node_modules/pify": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", - "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=", + "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", "dev": true, "engines": { "node": ">=0.10.0" } }, "node_modules/postcss": { - "version": "8.4.31", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", - "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==", + "version": "8.4.33", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.33.tgz", + "integrity": "sha512-Kkpbhhdjw2qQs2O2DGX+8m5OVqEcbB9HRBvuYM9pgrjEFUg30A9LmXNlTAUj4S9kgtGyrMbTzVjH7E+s5Re2yg==", "dev": true, "funding": [ { @@ -644,7 +639,7 @@ ], "peer": true, "dependencies": { - "nanoid": "^3.3.6", + "nanoid": "^3.3.7", "picocolors": "^1.0.0", "source-map-js": "^1.0.2" }, @@ -653,18 +648,18 @@ } }, "node_modules/postcss-cli": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/postcss-cli/-/postcss-cli-10.1.0.tgz", - "integrity": "sha512-Zu7PLORkE9YwNdvOeOVKPmWghprOtjFQU3srMUGbdz3pHJiFh7yZ4geiZFMkjMfB0mtTFR3h8RemR62rPkbOPA==", + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/postcss-cli/-/postcss-cli-11.0.0.tgz", + "integrity": "sha512-xMITAI7M0u1yolVcXJ9XTZiO9aO49mcoKQy6pCDFdMh9kGqhzLVpWxeD/32M/QBmkhcGypZFFOLNLmIW4Pg4RA==", "dev": true, "dependencies": { "chokidar": "^3.3.0", "dependency-graph": "^0.11.0", "fs-extra": "^11.0.0", "get-stdin": "^9.0.0", - "globby": "^13.0.0", + "globby": "^14.0.0", "picocolors": "^1.0.0", - "postcss-load-config": "^4.0.0", + "postcss-load-config": "^5.0.0", "postcss-reporter": "^7.0.0", "pretty-hrtime": "^1.0.3", "read-cache": "^1.0.0", @@ -675,37 +670,43 @@ "postcss": "index.js" }, "engines": { - "node": ">=14" + "node": ">=18" }, "peerDependencies": { "postcss": "^8.0.0" } }, "node_modules/postcss-load-config": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.1.tgz", - "integrity": "sha512-vEJIc8RdiBRu3oRAI0ymerOn+7rPuMvRXslTvZUKZonDHFIczxztIyJ1urxM1x9JXEikvpWWTUUqal5j/8QgvA==", + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-5.0.2.tgz", + "integrity": "sha512-Q8QR3FYbqOKa0bnC1UQ2bFq9/ulHX5Bi34muzitMr8aDtUelO5xKeJEYC/5smE0jNE9zdB/NBnOwXKexELbRlw==", "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], "dependencies": { - "lilconfig": "^2.0.5", - "yaml": "^2.1.1" + "lilconfig": "^3.0.0", + "yaml": "^2.3.4" }, "engines": { - "node": ">= 14" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" + "node": ">= 18" }, "peerDependencies": { - "postcss": ">=8.0.9", - "ts-node": ">=9.0.0" + "jiti": ">=1.21.0", + "postcss": ">=8.0.9" }, "peerDependenciesMeta": { - "postcss": { + "jiti": { "optional": true }, - "ts-node": { + "postcss": { "optional": true } } @@ -739,7 +740,7 @@ "node_modules/pretty-hrtime": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz", - "integrity": "sha1-t+PqQkNaTJsnWdmeDyAesZWALuE=", + "integrity": "sha512-66hKPCr+72mlfiSjlEB1+45IjXSqvVAIy6mocupoww4tBFE9R9IhwwUGoI4G++Tc9Aq+2rxOt0RFU6gPcrte0A==", "dev": true, "engines": { "node": ">= 0.8" @@ -768,7 +769,7 @@ "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", - "integrity": "sha1-5mTvMRYRZsl1HNvo28+GtftY93Q=", + "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", "dev": true, "dependencies": { "pify": "^2.3.0" @@ -789,7 +790,7 @@ "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", "dev": true, "engines": { "node": ">=0.10.0" @@ -894,10 +895,22 @@ "node": ">=8.0" } }, + "node_modules/unicorn-magic": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.1.0.tgz", + "integrity": "sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==", + "dev": true, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/universalify": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.0.tgz", - "integrity": "sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", "dev": true, "engines": { "node": ">= 10.0.0" @@ -960,658 +973,40 @@ } }, "node_modules/yaml": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.3.tgz", - "integrity": "sha512-zw0VAJxgeZ6+++/su5AFoqBbZbrEakwu+X0M5HmcwUiBL7AzcuPKjj5we4xfQLp78LkEMpD0cOnUhmgOVy3KdQ==", + "version": "2.3.4", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.4.tgz", + "integrity": "sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA==", "dev": true, "engines": { "node": ">= 14" } }, "node_modules/yargs": { - "version": "17.3.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.3.1.tgz", - "integrity": "sha512-WUANQeVgjLbNsEmGk20f+nlHgOqzRFpiGWVaBrYGYIGANIIu3lWjoyi0fNlFmJkvfhCZ6BXINe7/W2O2bV4iaA==", + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", "dev": true, "dependencies": { - "cliui": "^7.0.2", + "cliui": "^8.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", "string-width": "^4.2.3", "y18n": "^5.0.5", - "yargs-parser": "^21.0.0" + "yargs-parser": "^21.1.1" }, "engines": { "node": ">=12" } }, "node_modules/yargs-parser": { - "version": "21.0.0", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.0.0.tgz", - "integrity": "sha512-z9kApYUOCwoeZ78rfRYYWdiU/iNL6mwwYlkkZfJoyMR1xps+NEBX5X7XmRpxkZHhXJ6+Ey00IwKxBBSW9FIjyA==", + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", "dev": true, "engines": { "node": ">=12" } } - }, - "dependencies": { - "@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "dev": true, - "requires": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - } - }, - "@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", - "dev": true - }, - "@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", - "dev": true, - "requires": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - } - }, - "ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true - }, - "ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "requires": { - "color-convert": "^2.0.1" - } - }, - "anymatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.2.tgz", - "integrity": "sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==", - "dev": true, - "requires": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - } - }, - "autoprefixer": { - "version": "10.4.16", - "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.16.tgz", - "integrity": "sha512-7vd3UC6xKp0HLfua5IjZlcXvGAGy7cBAXTg2lyQ/8WpNhd6SiZ8Be+xm3FyBSYJx5GKcpRCzBh7RH4/0dnY+uQ==", - "dev": true, - "requires": { - "browserslist": "^4.21.10", - "caniuse-lite": "^1.0.30001538", - "fraction.js": "^4.3.6", - "normalize-range": "^0.1.2", - "picocolors": "^1.0.0", - "postcss-value-parser": "^4.2.0" - } - }, - "binary-extensions": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", - "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", - "dev": true - }, - "braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", - "dev": true, - "requires": { - "fill-range": "^7.0.1" - } - }, - "browserslist": { - "version": "4.22.1", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.22.1.tgz", - "integrity": "sha512-FEVc202+2iuClEhZhrWy6ZiAcRLvNMyYcxZ8raemul1DYVOVdFsbqckWLdsixQZCpJlwe77Z3UTalE7jsjnKfQ==", - "dev": true, - "requires": { - "caniuse-lite": "^1.0.30001541", - "electron-to-chromium": "^1.4.535", - "node-releases": "^2.0.13", - "update-browserslist-db": "^1.0.13" - } - }, - "caniuse-lite": { - "version": "1.0.30001557", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001557.tgz", - "integrity": "sha512-91oR7hLNUP3gG6MLU+n96em322a8Xzes8wWdBKhLgUoiJsAF5irZnxSUCbc+qUZXNnPCfUwLOi9ZCZpkvjQajw==", - "dev": true - }, - "chokidar": { - "version": "3.5.3", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", - "integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==", - "dev": true, - "requires": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "fsevents": "~2.3.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" - } - }, - "cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", - "dev": true, - "requires": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", - "wrap-ansi": "^7.0.0" - } - }, - "color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "requires": { - "color-name": "~1.1.4" - } - }, - "color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - }, - "dependency-graph": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/dependency-graph/-/dependency-graph-0.11.0.tgz", - "integrity": "sha512-JeMq7fEshyepOWDfcfHK06N3MhyPhz++vtqWhMT5O9A3K42rdsEDpfdVqjaqaAhsw6a+ZqeDvQVtD0hFHQWrzg==", - "dev": true - }, - "dir-glob": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", - "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", - "dev": true, - "requires": { - "path-type": "^4.0.0" - } - }, - "electron-to-chromium": { - "version": "1.4.569", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.569.tgz", - "integrity": "sha512-LsrJjZ0IbVy12ApW3gpYpcmHS3iRxH4bkKOW98y1/D+3cvDUWGcbzbsFinfUS8knpcZk/PG/2p/RnkMCYN7PVg==", - "dev": true - }, - "emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true - }, - "escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", - "dev": true - }, - "fast-glob": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz", - "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==", - "dev": true, - "requires": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.4" - } - }, - "fastq": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz", - "integrity": "sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw==", - "dev": true, - "requires": { - "reusify": "^1.0.4" - } - }, - "fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", - "dev": true, - "requires": { - "to-regex-range": "^5.0.1" - } - }, - "fraction.js": { - "version": "4.3.7", - "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", - "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==", - "dev": true - }, - "fs-extra": { - "version": "11.1.1", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.1.1.tgz", - "integrity": "sha512-MGIE4HOvQCeUCzmlHs0vXpih4ysz4wg9qiSAu6cd42lVwPbTM1TjV7RusoyQqMmk/95gdQZX72u+YW+c3eEpFQ==", - "dev": true, - "requires": { - "graceful-fs": "^4.2.0", - "jsonfile": "^6.0.1", - "universalify": "^2.0.0" - } - }, - "fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", - "dev": true, - "optional": true - }, - "get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", - "dev": true - }, - "get-stdin": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-9.0.0.tgz", - "integrity": "sha512-dVKBjfWisLAicarI2Sf+JuBE/DghV4UzNAVe9yhEJuzeREd3JhOTE9cUaJTeSa77fsbQUK3pcOpJfM59+VKZaA==", - "dev": true - }, - "glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "requires": { - "is-glob": "^4.0.1" - } - }, - "globby": { - "version": "13.2.2", - "resolved": "https://registry.npmjs.org/globby/-/globby-13.2.2.tgz", - "integrity": "sha512-Y1zNGV+pzQdh7H39l9zgB4PJqjRNqydvdYCDG4HFXM4XuvSaQQlEc91IU1yALL8gUTDomgBAfz3XJdmUS+oo0w==", - "dev": true, - "requires": { - "dir-glob": "^3.0.1", - "fast-glob": "^3.3.0", - "ignore": "^5.2.4", - "merge2": "^1.4.1", - "slash": "^4.0.0" - }, - "dependencies": { - "slash": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-4.0.0.tgz", - "integrity": "sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==", - "dev": true - } - } - }, - "graceful-fs": { - "version": "4.2.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", - "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "dev": true - }, - "ignore": { - "version": "5.2.4", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz", - "integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==", - "dev": true - }, - "is-binary-path": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", - "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", - "dev": true, - "requires": { - "binary-extensions": "^2.0.0" - } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true - }, - "is-glob": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", - "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", - "dev": true, - "requires": { - "is-extglob": "^2.1.1" - } - }, - "is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "dev": true - }, - "jsonfile": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", - "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", - "dev": true, - "requires": { - "graceful-fs": "^4.1.6", - "universalify": "^2.0.0" - } - }, - "lilconfig": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", - "integrity": "sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==", - "dev": true - }, - "merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true - }, - "micromatch": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", - "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", - "dev": true, - "requires": { - "braces": "^3.0.2", - "picomatch": "^2.3.1" - } - }, - "nanoid": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.6.tgz", - "integrity": "sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==", - "dev": true, - "peer": true - }, - "node-releases": { - "version": "2.0.13", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.13.tgz", - "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==", - "dev": true - }, - "normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "dev": true - }, - "normalize-range": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz", - "integrity": "sha1-LRDAa9/TEuqXd2laTShDlFa3WUI=", - "dev": true - }, - "path-type": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", - "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", - "dev": true - }, - "picocolors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", - "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==", - "dev": true - }, - "picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "dev": true - }, - "pify": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", - "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=", - "dev": true - }, - "postcss": { - "version": "8.4.31", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", - "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==", - "dev": true, - "peer": true, - "requires": { - "nanoid": "^3.3.6", - "picocolors": "^1.0.0", - "source-map-js": "^1.0.2" - } - }, - "postcss-cli": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/postcss-cli/-/postcss-cli-10.1.0.tgz", - "integrity": "sha512-Zu7PLORkE9YwNdvOeOVKPmWghprOtjFQU3srMUGbdz3pHJiFh7yZ4geiZFMkjMfB0mtTFR3h8RemR62rPkbOPA==", - "dev": true, - "requires": { - "chokidar": "^3.3.0", - "dependency-graph": "^0.11.0", - "fs-extra": "^11.0.0", - "get-stdin": "^9.0.0", - "globby": "^13.0.0", - "picocolors": "^1.0.0", - "postcss-load-config": "^4.0.0", - "postcss-reporter": "^7.0.0", - "pretty-hrtime": "^1.0.3", - "read-cache": "^1.0.0", - "slash": "^5.0.0", - "yargs": "^17.0.0" - } - }, - "postcss-load-config": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.1.tgz", - "integrity": "sha512-vEJIc8RdiBRu3oRAI0ymerOn+7rPuMvRXslTvZUKZonDHFIczxztIyJ1urxM1x9JXEikvpWWTUUqal5j/8QgvA==", - "dev": true, - "requires": { - "lilconfig": "^2.0.5", - "yaml": "^2.1.1" - } - }, - "postcss-reporter": { - "version": "7.0.5", - "resolved": "https://registry.npmjs.org/postcss-reporter/-/postcss-reporter-7.0.5.tgz", - "integrity": "sha512-glWg7VZBilooZGOFPhN9msJ3FQs19Hie7l5a/eE6WglzYqVeH3ong3ShFcp9kDWJT1g2Y/wd59cocf9XxBtkWA==", - "dev": true, - "requires": { - "picocolors": "^1.0.0", - "thenby": "^1.3.4" - } - }, - "postcss-value-parser": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", - "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", - "dev": true - }, - "pretty-hrtime": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz", - "integrity": "sha1-t+PqQkNaTJsnWdmeDyAesZWALuE=", - "dev": true - }, - "queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "dev": true - }, - "read-cache": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", - "integrity": "sha1-5mTvMRYRZsl1HNvo28+GtftY93Q=", - "dev": true, - "requires": { - "pify": "^2.3.0" - } - }, - "readdirp": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", - "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", - "dev": true, - "requires": { - "picomatch": "^2.2.1" - } - }, - "require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", - "dev": true - }, - "reusify": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", - "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", - "dev": true - }, - "run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "dev": true, - "requires": { - "queue-microtask": "^1.2.2" - } - }, - "slash": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-5.1.0.tgz", - "integrity": "sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==", - "dev": true - }, - "source-map-js": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.0.2.tgz", - "integrity": "sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==", - "dev": true, - "peer": true - }, - "string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "requires": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - } - }, - "strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "requires": { - "ansi-regex": "^5.0.1" - } - }, - "thenby": { - "version": "1.3.4", - "resolved": "https://registry.npmjs.org/thenby/-/thenby-1.3.4.tgz", - "integrity": "sha512-89Gi5raiWA3QZ4b2ePcEwswC3me9JIg+ToSgtE0JWeCynLnLxNr/f9G+xfo9K+Oj4AFdom8YNJjibIARTJmapQ==", - "dev": true - }, - "to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "dev": true, - "requires": { - "is-number": "^7.0.0" - } - }, - "universalify": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.0.tgz", - "integrity": "sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==", - "dev": true - }, - "update-browserslist-db": { - "version": "1.0.13", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz", - "integrity": "sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==", - "dev": true, - "requires": { - "escalade": "^3.1.1", - "picocolors": "^1.0.0" - } - }, - "wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "requires": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - } - }, - "y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", - "dev": true - }, - "yaml": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.3.tgz", - "integrity": "sha512-zw0VAJxgeZ6+++/su5AFoqBbZbrEakwu+X0M5HmcwUiBL7AzcuPKjj5we4xfQLp78LkEMpD0cOnUhmgOVy3KdQ==", - "dev": true - }, - "yargs": { - "version": "17.3.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.3.1.tgz", - "integrity": "sha512-WUANQeVgjLbNsEmGk20f+nlHgOqzRFpiGWVaBrYGYIGANIIu3lWjoyi0fNlFmJkvfhCZ6BXINe7/W2O2bV4iaA==", - "dev": true, - "requires": { - "cliui": "^7.0.2", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.0.0" - } - }, - "yargs-parser": { - "version": "21.0.0", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.0.0.tgz", - "integrity": "sha512-z9kApYUOCwoeZ78rfRYYWdiU/iNL6mwwYlkkZfJoyMR1xps+NEBX5X7XmRpxkZHhXJ6+Ey00IwKxBBSW9FIjyA==", - "dev": true - } } } diff --git a/docs/package.json b/docs/package.json index 76434b00..03aca1fd 100644 --- a/docs/package.json +++ b/docs/package.json @@ -18,6 +18,6 @@ "homepage": "https://github.com/mikegoatly/lifti#readme", "devDependencies": { "autoprefixer": "^10.4.16", - "postcss-cli": "^10.1.0" + "postcss-cli": "^11.0.0" } } diff --git a/docs/static/images/v6-serialization.svg b/docs/static/images/v6-serialization.svg new file mode 100644 index 00000000..fd22b78d --- /dev/null +++ b/docs/static/images/v6-serialization.svg @@ -0,0 +1,4 @@ + + + +
Header

2 bytes: 0x4C, 0x49
Int16: Version

Header...
Items

VarInt32: Item count
Items...
Item n

VarInt32: Item ID
[Key Size]: Item Key

Byte: Number of fields with statistics for item








Byte: Object type information as a bit mask where bits
         0-4: The object type id
         5: 1 - the object has a scoring freshness date
         6: 1 - the object has a scoring magnitude
         7: RESERVED
[Int64: Freshness date - only if indicated in object type bit mask]
[Double
: Scoring magnitude - only if indicated in object type bit mask]
Item n...
1..Item Count
1..Item Count
Field statistics for item n

Byte: Field ID
VarInt32: Token count for field
Field statistics for item n...
1..Field Count
1..Field Count
Index nodes


Index nodes...
Item keys are serialized using the IKeySerializer implementation for the index allowing for arbitrary data types to be used.
Item keys are serialized using the IKeySerializer im...
Serialized Node

VarInt32: Length of intra-node text
VarInt32: Number of matches
VarInt32: Number of child nodes

if intra-node text length > 0:
n VarUInt16: Intra node text

If child node count > 0:








If match count > 0:

Serialized Node...
Node Link

VarUInt16: Link character
Serialized Node structure for child node
Node Link...
1..Field Count
1..Field Count
Item Match

VarInt32: Matched item ID
VarInt32: Number of matchedfields
Item Match...
1..Match Count
1..Match Count
Field Match

Byte: Field ID
VarInt32: Number of locations in the source text at which the field was matched
Field Match...
1..Matched Fields
1..Matched Fields
Field Match Location

The token index and start offsets are written relative to the previously serialized location entry. For the first entry in any given list the previous value is assumed to be zero.

Byte: Serialization optimization flags:
0: Token index and start offsets are VarInt32 and the token length is serialized
1: Token index is a byte
2: Token index is an Int16
4: Token start offset is a byte
8: Token start offset is an Int16
16: The length of the match is the same as the previously serialized entry and is not present in this entry.
Byte|Int16|Int32|VarInt32: Token index (relative to previous entry)
Byte|Int16|Int32|VarInt32: Token start offset (relative to previous entry)

if token length is different to previous entry:
VarUInt16: Token length
Field Match Location...
1..Locations
1..Locations
Terminator

4 bytes: 0xFF, 0xFF, 0xFF, 0xFF


Terminator...
Fields

VarInt32: Field count
Fields...
Field n

Byte: Field id
Byte: Field kind (1: Static, 2: Dynamic)
String: Field name

If field kind := Dynamic:
String: Dynamic field reader name
Field n...
1..Field Count
1..Field Count
Note: VarInt32s in this scheme will always have positive values, so are written with 7-bit encoding, without zigzag encoding
Note: VarInt32s in this scheme will always have positive values, so are written with 7-bit encoding, without zi...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/global.json b/global.json new file mode 100644 index 00000000..2cf7f2ec --- /dev/null +++ b/global.json @@ -0,0 +1,3 @@ +{ + "sdk": {"version": "8.0.0", "rollForward": "latestMinor"} +} \ No newline at end of file diff --git a/samples/Blazor/Blazor.csproj b/samples/Blazor/Blazor.csproj index 1ce4c5e0..cb437f3a 100644 --- a/samples/Blazor/Blazor.csproj +++ b/samples/Blazor/Blazor.csproj @@ -1,9 +1,11 @@  - net7.0 + net8.0 enable enable + latest + enable @@ -25,8 +27,8 @@ - - + + diff --git a/samples/Blazor/Services/WikipediaIndexService.cs b/samples/Blazor/Services/WikipediaIndexService.cs index 65beb14e..e7bb2bb3 100644 --- a/samples/Blazor/Services/WikipediaIndexService.cs +++ b/samples/Blazor/Services/WikipediaIndexService.cs @@ -84,7 +84,7 @@ public IList> Search(string query) public IEnumerable GetIndexedKeys() { - return this.Index.Items.GetIndexedItems().Select(i => i.Item); + return this.Index.Metadata.GetIndexedDocuments().Select(i => i.Key); } public string GetIndexTextualRepresentation() diff --git a/samples/TestConsole/AutoCompleteSample.cs b/samples/TestConsole/AutoCompleteSample.cs index 11b061c5..3465df4d 100644 --- a/samples/TestConsole/AutoCompleteSample.cs +++ b/samples/TestConsole/AutoCompleteSample.cs @@ -1,6 +1,5 @@ using Lifti; using System; -using System.Collections; using System.Collections.Generic; using System.Drawing; using System.Linq; @@ -12,7 +11,7 @@ namespace TestConsole { public class AutoCompleteHelper { - private FullTextIndex index; + private FullTextIndex? index; public async Task InitializeAsync() { @@ -21,7 +20,7 @@ public async Task InitializeAsync() public IEnumerable GetSuggestions(string input) { - using var navigator = index.CreateNavigator(); + using var navigator = this.index!.CreateNavigator(); navigator.Process(input.AsSpan()); return navigator.EnumerateIndexedTokens().ToList(); } diff --git a/samples/TestConsole/Book.cs b/samples/TestConsole/Book.cs deleted file mode 100644 index 8502226a..00000000 --- a/samples/TestConsole/Book.cs +++ /dev/null @@ -1,7 +0,0 @@ -public class Book -{ - public int BookId { get; set; } - public string Title { get; set; } - public string[] Authors { get; set; } - public string Synopsis { get; set; } -} \ No newline at end of file diff --git a/samples/TestConsole/BookSample.cs b/samples/TestConsole/BookSample.cs index b2455e1e..ae277faa 100644 --- a/samples/TestConsole/BookSample.cs +++ b/samples/TestConsole/BookSample.cs @@ -4,26 +4,26 @@ using System.Threading.Tasks; namespace TestConsole -{ +{ + public record Book(int BookId, string Title, string[] Authors, string Synopsis); + public class BookSample : SampleBase { - private static readonly Book[] books = new[] - { - new Book - { - BookId = 1, - Title = "The Three Body Problem", - Authors = new[] { "Liu Cixin" }, - Synopsis = "The Three-Body Problem (Chinese: 三体; literally: 'Three-Body'; pinyin: sān tǐ) is a hard science fiction novel by the Chinese writer Liu Cixin. It is the first novel of the Remembrance of Earth's Past (Chinese: 地球往事) trilogy, but Chinese readers generally refer to the whole series by the title of this first novel.[1] The second and third novels in the trilogy are titled The Dark Forest and Death's End. The title of the first novel refers to the three-body problem in orbital mechanics." - }, - new Book - { - BookId = 2, - Title = "First on Mars", - Authors = new[] { "Cecil Warwick" }, - Synopsis = "This novel, which was first published in 1934, tells the story of a group of astronauts who become the first humans to land on the planet Mars." - }, - }; + private static readonly Book[] books = + [ + new Book( + BookId: 1, + Title: "The Three Body Problem", + Authors: ["Liu Cixin"], + Synopsis: "The Three-Body Problem (Chinese: 三体; literally: 'Three-Body'; pinyin: sān tǐ) is a hard science fiction novel by the Chinese writer Liu Cixin. It is the first novel of the Remembrance of Earth's Past (Chinese: 地球往事) trilogy, but Chinese readers generally refer to the whole series by the title of this first novel.[1] The second and third novels in the trilogy are titled The Dark Forest and Death's End. The title of the first novel refers to the three-body problem in orbital mechanics." + ), + new Book( + BookId: 2, + Title: "First on Mars", + Authors: ["Cecil Warwick"], + Synopsis: "This novel, which was first published in 1934, tells the story of a group of astronauts who become the first humans to land on the planet Mars." + ), + ]; public override async Task RunAsync() { @@ -36,7 +36,7 @@ public override async Task RunAsync() .WithField("Synopsis", b => b.Synopsis, tokenOptions => tokenOptions.WithStemming())) .Build(); - Console.WriteLine(@$"Indexing two sample books with 3 different fields, Title, Authors and Synposis:{Environment.NewLine}{string.Join(Environment.NewLine, books.Select(b => b.Title))}"); + Console.WriteLine(@$"Indexing two sample books with 3 different fields, Title, Authors and Synopsis:{Environment.NewLine}{string.Join(Environment.NewLine, books.Select(b => b.Title))}"); await bookIndex.AddRangeAsync(books); Console.WriteLine(); await RunSearchAsync( @@ -49,8 +49,8 @@ public override async Task RunAsync() bookIndex, "title=the", i => books.First(x => x.BookId == i), - "Only the first book contains 'the' in the title field"); - + "Only the first book contains 'the' in the title field"); + WaitForEnterToReturnToMenu(); } } diff --git a/samples/TestConsole/CompositeKey.cs b/samples/TestConsole/CompositeKey.cs deleted file mode 100644 index 57d1f262..00000000 --- a/samples/TestConsole/CompositeKey.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace TestConsole -{ - public readonly struct CompositeKey - { - public CompositeKey(int userId, short companyId) - { - this.UserId = userId; - this.CompanyId = companyId; - } - - public int UserId { get; } - public short CompanyId { get; } - } -} diff --git a/samples/TestConsole/CompositeKeySerializer.cs b/samples/TestConsole/CompositeKeySerializer.cs deleted file mode 100644 index 9b4c6976..00000000 --- a/samples/TestConsole/CompositeKeySerializer.cs +++ /dev/null @@ -1,26 +0,0 @@ -using Lifti.Serialization.Binary; -using System.IO; - -namespace TestConsole -{ - public class CompositeKeySerializer : IKeySerializer - { - public void Write(BinaryWriter writer, CompositeKey key) - { - writer.Write(key.UserId); // Int32 - writer.Write(key.CompanyId); // Int16 - } - - public CompositeKey Read(BinaryReader reader) - { - // The serialization framework will make sure this method is only - // ever called when a key is ready to be read. - // Ensure the data is read is read out in exactly the same order and with the same - // data types it was written. - var userId = reader.ReadInt32(); - var companyId = reader.ReadInt16(); - - return new CompositeKey(userId, companyId); - } - } -} diff --git a/samples/TestConsole/CustomStemmerSample.cs b/samples/TestConsole/CustomStemmerSample.cs new file mode 100644 index 00000000..6e6c9d80 --- /dev/null +++ b/samples/TestConsole/CustomStemmerSample.cs @@ -0,0 +1,44 @@ +using Lifti; +using Lifti.Tokenization; +using System.Text; +using System.Threading.Tasks; + +namespace TestConsole +{ + public class FirstThreeLettersStemmer : IStemmer + { + public bool RequiresCaseInsensitivity => false; + + public bool RequiresAccentInsensitivity => false; + + public void Stem(StringBuilder builder) + { + if (builder.Length > 3) + { + builder.Length = 3; + } + } + } + + public class CustomStemmerSample : SampleBase + { + public override async Task RunAsync() + { + var index = new FullTextIndexBuilder() + .WithDefaultTokenization(o => o.WithStemming(new FirstThreeLettersStemmer())) + .Build(); + + await index.AddAsync(1, "Some words"); + await index.AddAsync(2, "Wordy text"); + + var results = index.Search("word"); + + RunSearch( + index, + "word", + "Searching for 'word' will get stemmed to just the first three characters, so will match both items"); + + WaitForEnterToReturnToMenu(); + } + } +} diff --git a/samples/TestConsole/CustomerObjectSample.cs b/samples/TestConsole/CustomerObjectSample.cs index 9fba4e98..c6e49309 100644 --- a/samples/TestConsole/CustomerObjectSample.cs +++ b/samples/TestConsole/CustomerObjectSample.cs @@ -6,14 +6,9 @@ namespace TestConsole { public class CustomerObjectSample : SampleBase - { - public class Customer - { - public int Id { get; set; } - public string Name { get; set; } - public string ProfileHtml { get; set; } - } - + { + public record Customer(int Id, string Name, string ProfileHtml); + public override async Task RunAsync() { Console.WriteLine("Creating an index for a Customer object, with two fields, Name and Profile"); @@ -26,12 +21,12 @@ public override async Task RunAsync() ) .Build(); - await index.AddAsync(new Customer { Id = 1, Name = "Joe Bloggs", ProfileHtml = "
Something else something" }); - await index.AddAsync(new Customer { Id = 2, Name = "Joe Something", ProfileHtml = "Something else" }); + await index.AddAsync(new Customer(1, "Joe Bloggs", "Something else something")); + await index.AddAsync(new Customer(2, "Joe Something", "Something else")); var results = RunSearch( - index, - "something", + index, + "something", @"Searching for 'Something' will result in ID 2 being ordered before ID 1. 'Something' appears twice in each document overall, however document 2 has fewer words, therefore the matches are more statistically significant"); @@ -42,5 +37,5 @@ public override async Task RunAsync() WaitForEnterToReturnToMenu(); } - } + } } diff --git a/samples/TestConsole/FreshnessBoosting.cs b/samples/TestConsole/FreshnessBoosting.cs new file mode 100644 index 00000000..0bce75e7 --- /dev/null +++ b/samples/TestConsole/FreshnessBoosting.cs @@ -0,0 +1,43 @@ +using Lifti; +using System; +using System.Threading.Tasks; + +namespace TestConsole +{ + public class FreshnessBoosting : SampleBase + { + public record Document(int Id, string Content, DateTime UpdatedDate); + + public override async Task RunAsync() + { + var documents = new[] + { + new Document(1, "This is a document that was updated 5 day ago", DateTime.UtcNow.AddDays(-5)), + new Document(2, "This is a document that was updated 4 days ago", DateTime.UtcNow.AddDays(-4)), + new Document(3, "This is a document that was updated 3 days ago", DateTime.UtcNow.AddDays(-3)), + new Document(4, "This is a document that was updated 2 days ago", DateTime.UtcNow.AddDays(-2)), + new Document(5, "This is a document that was updated 1 days ago", DateTime.UtcNow.AddDays(-1)) + }; + + var index = new FullTextIndexBuilder() + .WithObjectTokenization(o => o + .WithKey(d => d.Id) + .WithField("Content", d => d.Content) + // Boost the score of documents that have been updated most recently, multiplying the score on a range of 1 to 2 depending + // the date of the document relative to the other documents. + .WithScoreBoosting(o => o + .Freshness(d => d.UpdatedDate, 2D))) + .Build(); + + await index.AddRangeAsync(documents); + + RunSearch( + index, + "document", + @"All documents contain the word 'document', but the results will be ordered by their freshness, with the most recently updated first", + id => $"Updated on: {documents[id - 1].UpdatedDate:d}"); + + WaitForEnterToReturnToMenu(); + } + } +} diff --git a/samples/TestConsole/IndexSerializationWithCustomKeySerializer.cs b/samples/TestConsole/IndexSerializationWithCustomKeySerializer.cs index b41ac560..6132f4a1 100644 --- a/samples/TestConsole/IndexSerializationWithCustomKeySerializer.cs +++ b/samples/TestConsole/IndexSerializationWithCustomKeySerializer.cs @@ -6,7 +6,30 @@ using System.Threading.Tasks; namespace TestConsole -{ +{ + public readonly record struct CompositeKey(int UserId, short CompanyId); + + public class CompositeKeySerializer : IKeySerializer + { + public void Write(BinaryWriter writer, CompositeKey key) + { + writer.Write(key.UserId); // Int32 + writer.Write(key.CompanyId); // Int16 + } + + public CompositeKey Read(BinaryReader reader) + { + // The serialization framework will make sure this method is only + // ever called when a key is ready to be read. + // Ensure the data is read is read out in exactly the same order and with the same + // data types it was written. + var userId = reader.ReadInt32(); + var companyId = reader.ReadInt16(); + + return new CompositeKey(userId, companyId); + } + } + public class IndexSerializationWithCustomKeySerializer : SampleBase { public override async Task RunAsync() diff --git a/samples/TestConsole/MagnitudeBoosting.cs b/samples/TestConsole/MagnitudeBoosting.cs new file mode 100644 index 00000000..f2d0460e --- /dev/null +++ b/samples/TestConsole/MagnitudeBoosting.cs @@ -0,0 +1,41 @@ +using Lifti; +using System.Threading.Tasks; + +namespace TestConsole +{ + public class MagnitudeBoosting : SampleBase + { + public record Document(int Id, string Content, int Rating); + + public override async Task RunAsync() + { + var documents = new[] + { + new Document(1, "This is a document with a rating of 1", 1), + new Document(2, "This is a document with a rating of 2", 2), + new Document(3, "This is a document with a rating of 3", 3), + new Document(4, "This is a document with a rating of 4", 4), + new Document(5, "This is a document with a rating of 5", 5) + }; + + var index = new FullTextIndexBuilder() + .WithObjectTokenization(o => o + .WithKey(d => d.Id) + .WithField("Content", d => d.Content) + // Boost the score of documents with a higher rating multiplying the score on a range of 1 to 2 depending + // on the rating. + .WithScoreBoosting(o => o.Magnitude(d => d.Rating, 2D))) + .Build(); + + await index.AddRangeAsync(documents); + + RunSearch( + index, + "document", + @"All documents contain the word 'document', but the results will be ordered by their rating, with the highest rating first", + id => $"Star rating: {documents[id - 1].Rating}"); + + WaitForEnterToReturnToMenu(); + } + } +} diff --git a/samples/TestConsole/Program.cs b/samples/TestConsole/Program.cs index 08b7d444..588f1446 100644 --- a/samples/TestConsole/Program.cs +++ b/samples/TestConsole/Program.cs @@ -49,15 +49,22 @@ public static async Task Main() Console.Write(' '); Console.CursorLeft -= 1; } - } while (key < firstLetter|| key > lastLetter); + } while (key < firstLetter || key > lastLetter); var selectedSample = samples[key - 'a']; Console.Clear(); Console.WriteLine($"Running {selectedSample.Name}"); Console.WriteLine(); - - await ((ISample)Activator.CreateInstance(selectedSample)).RunAsync(); + + if (Activator.CreateInstance(selectedSample) is ISample sampleInstance) + { + await sampleInstance.RunAsync(); + } + else + { + throw new Exception($"Unable to create sample {selectedSample.Name}!"); + } } while (true); } } diff --git a/samples/TestConsole/SampleBase.cs b/samples/TestConsole/SampleBase.cs index 172b17e9..445d925a 100644 --- a/samples/TestConsole/SampleBase.cs +++ b/samples/TestConsole/SampleBase.cs @@ -1,8 +1,8 @@ using Lifti; -using System.Collections.Generic; using System; -using System.Threading.Tasks; +using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; namespace TestConsole { @@ -10,7 +10,12 @@ public abstract class SampleBase : ISample { public abstract Task RunAsync(); - protected static ISearchResults RunSearch(FullTextIndex index, string query, string message = null) + protected static ISearchResults RunSearch( + FullTextIndex index, + string query, + string? message = null, + Func? objectResultText = null) + where TKey : notnull { if (message != null) { @@ -19,30 +24,43 @@ protected static ISearchResults RunSearch(FullTextIndex index, string q } Console.ForegroundColor = ConsoleColor.DarkCyan; - Console.WriteLine($"Executing query: {query}"); + Console.WriteLine($"Executing query: {query}"); + Console.WriteLine($"(Query parsed as: {index.ParseQuery(query)})"); Console.ResetColor(); var results = index.Search(query); - PrintSearchResults(results); + PrintSearchResults(results, objectResultText); return results; } - protected static void PrintSearchResults(IEnumerable> results) + protected static void PrintSearchResults( + IEnumerable> results, + Func? objectResultText = null) { - Console.WriteLine("Matched items total score:"); + Console.WriteLine("Matched documents total score:"); foreach (var result in results) { Console.ForegroundColor = ConsoleColor.Cyan; Console.Write($"{result.Key} "); Console.ForegroundColor = ConsoleColor.Green; - Console.WriteLine($"({result.Score})"); + Console.Write($"({result.Score})"); + + if (objectResultText != null) + { + Console.ForegroundColor = ConsoleColor.Yellow; + Console.Write($" - {objectResultText(result.Key)}"); + } + + Console.WriteLine(); } Console.ResetColor(); Console.WriteLine(); } - protected static async Task> RunSearchAsync(FullTextIndex index, string query, Func readItem, string message = null) + protected static async Task> RunSearchAsync(FullTextIndex index, string query, Func readItem, string? message = null) + where TObject : class + where TKey : notnull { if (message != null) { @@ -59,7 +77,7 @@ protected static void PrintSearchResults(IEnumerable> results return results; } - protected static async Task PrintSearchResultsAsync(ISearchResults results, Func readItem) + protected static async Task PrintSearchResultsAsync(ISearchResults results, Func readItem) { Console.WriteLine("Matched items, total score and matched phrases:"); foreach (var result in await results.CreateMatchPhrasesAsync(readItem)) diff --git a/samples/TestConsole/ShardedIndexExample.cs b/samples/TestConsole/ShardedIndexExample.cs index 78db99d0..c520e2c1 100644 --- a/samples/TestConsole/ShardedIndexExample.cs +++ b/samples/TestConsole/ShardedIndexExample.cs @@ -13,7 +13,7 @@ public static class ShardedIndexExample public class ShardedIndex { private static readonly BinarySerializer serializer = new BinarySerializer(); - private readonly Dictionary> indexShards = new Dictionary>(); + private readonly Dictionary> indexShards = []; private readonly SemaphoreSlim syncObject = new SemaphoreSlim(1); public async Task> GetIndexAsync(string partitionKey, CancellationToken cancellationToken = default) diff --git a/samples/TestConsole/TestConsole.csproj b/samples/TestConsole/TestConsole.csproj index f308c48c..db814bfc 100644 --- a/samples/TestConsole/TestConsole.csproj +++ b/samples/TestConsole/TestConsole.csproj @@ -2,8 +2,9 @@ Exe - net6.0 - 11 + net8.0 + latest + enable diff --git a/samples/TestConsole/ThesaurusSample.cs b/samples/TestConsole/ThesaurusSample.cs index 48199583..bb8900ac 100644 --- a/samples/TestConsole/ThesaurusSample.cs +++ b/samples/TestConsole/ThesaurusSample.cs @@ -10,8 +10,8 @@ public class ThesaurusSample : SampleBase { private record Animal(int Id, string Name, string Description); - private static readonly Animal[] animals = new[] -{ + private static readonly Animal[] animals = +[ new Animal(1, "cat", "A domestic mammal, also known as a feline." ), new Animal(2, "dog", "A domesticated carnivorous mammal." ), new Animal(3, "rabbit", "A small, fluffy, hoofed mammal with long ears." ), @@ -22,7 +22,7 @@ private record Animal(int Id, string Name, string Description); new Animal(8, "hamster", "A tiny, stout-bodied rodent with a short, furry tail." ), new Animal(9, "turtle", "A reptile with a hard, protective shell." ), new Animal(10, "snake", "A long, slender reptile with scales and no legs.") - }; + ]; public override async Task RunAsync() { diff --git a/samples/TestConsole/WikipediaSample.cs b/samples/TestConsole/WikipediaSample.cs index 668e29c4..a129aa19 100644 --- a/samples/TestConsole/WikipediaSample.cs +++ b/samples/TestConsole/WikipediaSample.cs @@ -33,7 +33,7 @@ public override async Task RunAsync() do { var query = Console.ReadLine(); - if (query.Length == 0) + if (string.IsNullOrWhiteSpace(query)) { return; } diff --git a/src/Lifti.Core/AssemblyInfo.cs b/src/Lifti.Core/AssemblyInfo.cs index a9d9f658..ee4a7ba1 100644 --- a/src/Lifti.Core/AssemblyInfo.cs +++ b/src/Lifti.Core/AssemblyInfo.cs @@ -1,6 +1,7 @@ using System.Runtime.CompilerServices; [assembly: InternalsVisibleTo("Lifti.Tests")] +[assembly: InternalsVisibleTo("PerformanceProfiling")] namespace System.Runtime.CompilerServices { diff --git a/src/Lifti.Core/ChildNodeMap.cs b/src/Lifti.Core/ChildNodeMap.cs new file mode 100644 index 00000000..ce0481e3 --- /dev/null +++ b/src/Lifti.Core/ChildNodeMap.cs @@ -0,0 +1,186 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Text; + +namespace Lifti +{ + /// + /// An entry in . + /// + public record struct ChildNodeMapEntry(char ChildChar, IndexNode ChildNode); + + /// + /// An immutable map of child nodes. + /// + public readonly struct ChildNodeMap : IEquatable + { + private readonly ChildNodeMapEntry[] childNodes; + + /// + /// Initializes a new empty instance of + /// + public ChildNodeMap() + { + this.childNodes = []; + } + + /// + /// Initializes a new instance of . + /// + /// + /// The child nodes to initialize the map with. + /// + public ChildNodeMap(ChildNodeMapEntry[] map) + { + if (map is null) + { + throw new ArgumentNullException(nameof(map)); + } + + // Verify that the map is sorted +#if DEBUG + for (var i = 1; i < map.Length; i++) + { + Debug.Assert(map[i - 1].ChildChar < map[i].ChildChar); + } +#endif + + this.childNodes = map; + } + + /// + /// Gets an empty instance of . + /// + public static ChildNodeMap Empty { get; } = new ChildNodeMap(); + + /// + /// Gets the number of child nodes in the map. + /// + public int Count => this.childNodes.Length; + + /// + /// Gets the set of characters that link from this instance to the child nodes. + /// + public IReadOnlyList CharacterMap => this.childNodes; + + internal ChildNodeMapMutation StartMutation() + { + return new ChildNodeMapMutation(this); + } + + /// + /// Tries to get the child node for the specified character. + /// + public bool TryGetValue(char value, [NotNullWhen(true)] out IndexNode? nextNode) + { + char character; + var length = this.childNodes.Length; + switch (length) + { + case 0: + nextNode = null; + return false; + + case 1: + (character, nextNode) = this.childNodes[0]; + if (character == value) + { + return true; + } + + return false; + + case 2: + (character, nextNode) = this.childNodes[0]; + if (character == value) + { + return true; + } + + (character, nextNode) = this.childNodes[1]; + if (character == value) + { + return true; + } + + return false; + + default: + // General case - check bounds, then do a binary search if we're in range + if (value < this.childNodes[0].ChildChar || value > this.childNodes[length - 1].ChildChar) + { + nextNode = null; + return false; + } + + nextNode = BinarySearchChildNodes(value); + return nextNode is not null; + } + } + + private IndexNode? BinarySearchChildNodes(char value) + { + // We don't want to use Array.BinarySearch here because of the need to use a custom comparer. + // This custom implementation is significantly faster because we don't get involved in + // any boxing/unboxing of the value types. + var left = 0; + var right = this.childNodes.Length - 1; + + while (left <= right) + { + var middle = left + (right - left) / 2; + var middleChar = this.childNodes[middle].ChildChar; + + if (middleChar == value) + { + return this.childNodes[middle].ChildNode; + } + + if (middleChar < value) + { + left = middle + 1; + } + else + { + right = middle - 1; + } + } + + return null; + } + + /// + public override bool Equals(object? obj) + { + // Because we're immutable, we can use reference equality + return obj is ChildNodeMap other + && this.Equals(other); + } + + /// + public bool Equals(ChildNodeMap other) + { + return other.childNodes == this.childNodes; + } + + /// + public override int GetHashCode() + { + return HashCode.Combine(this.childNodes); + } + + /// + public static bool operator ==(ChildNodeMap left, ChildNodeMap right) + { + return left.Equals(right); + } + + /// + public static bool operator !=(ChildNodeMap left, ChildNodeMap right) + { + return !(left == right); + } + } +} diff --git a/src/Lifti.Core/ChildNodeMapMutation.cs b/src/Lifti.Core/ChildNodeMapMutation.cs new file mode 100644 index 00000000..90f3bcf9 --- /dev/null +++ b/src/Lifti.Core/ChildNodeMapMutation.cs @@ -0,0 +1,133 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Text; + +namespace Lifti +{ + internal sealed class ChildNodeMapMutation + { + private readonly ChildNodeMap? original; + private readonly Dictionary mutated; + private int newChildNodeCount; + + public ChildNodeMapMutation(char splitChar, IndexNodeMutation splitChildNode) + { + this.mutated = new() + { + { splitChar, splitChildNode } + }; + + this.newChildNodeCount = 1; + } + + internal ChildNodeMapMutation(ChildNodeMap original) + { + this.original = original; + this.mutated = []; + } + + public IEnumerable<(char childCharacter, IndexNodeMutation childNode)> GetMutated() + { + foreach (var child in this.mutated) + { + yield return (child.Key, child.Value); + } + } + + public IEnumerable<(char childCharacter, IndexNode childNode)> GetUnmutated() + { + if (this.original is { } originalChildNodeMap) + { + foreach (var (childCharacter, childNode) in originalChildNodeMap.CharacterMap) + { + if (!this.mutated.ContainsKey(childCharacter)) + { + yield return (childCharacter, childNode); + } + } + } + } + + internal ChildNodeMap Apply() + { + // Combine the original and mutated children. + // We need to ensure: + // 1. mutated children in the original list are replaced with the mutated version + // 2. mutated children not in the original list are added to the list + // 3. the resulting list is sorted in ascending order + ChildNodeMapEntry[] newChildNodes; + + // TODO - this could be parallelised now we're setting elements into a fixed array (using Interlocked.Increment for i) + var i = 0; + if (this.original is { } originalChildNodeMap) + { + newChildNodes = new ChildNodeMapEntry[this.newChildNodeCount + originalChildNodeMap.Count]; + + foreach (var (childChar, childNode) in originalChildNodeMap.CharacterMap) + { + if (this.mutated.ContainsKey(childChar) == false) + { + // This child node is not mutated, so add it to the list + newChildNodes[i++] = new(childChar, childNode); + } + } + } + else + { + Debug.Assert(this.newChildNodeCount == this.mutated.Count); + newChildNodes = new ChildNodeMapEntry[this.mutated.Count]; + } + + // Add the mutated children to the list + foreach (var mutation in this.mutated) + { + Debug.Assert(i < newChildNodes.Length); + newChildNodes[i++] = new(mutation.Key, mutation.Value.Apply()); + } + + Debug.Assert(i == newChildNodes.Length, "Expected all elements to have been populated"); + + // Sort the list in-place + Array.Sort(newChildNodes, (x, y) => x.ChildChar.CompareTo(y.ChildChar)); + + return new ChildNodeMap(newChildNodes); + } + + internal IndexNodeMutation GetOrCreateMutation(char indexChar, Func createMutatedNode) + { + if (!this.mutated.TryGetValue(indexChar, out var mutation)) + { + mutation = createMutatedNode(); + this.Mutate(indexChar, mutation); + + if (this.original?.TryGetValue(indexChar, out var _) != true) + { + this.newChildNodeCount++; + } + } + + return mutation; + } + + internal void Mutate(char childChar, IndexNodeMutation mutatedChild) + { + this.mutated[childChar] = mutatedChild; + } + + internal void ToString(StringBuilder builder, int depth) + { + foreach (var (character, childNode) in this.GetUnmutated()) + { + builder.AppendLine(); + childNode.ToString(builder, character, depth); + } + + foreach (var (character, childNode) in this.GetMutated()) + { + builder.AppendLine(); + childNode.ToString(builder, character, depth); + } + } + } +} diff --git a/src/Lifti.Core/DocumentMatchCollector.cs b/src/Lifti.Core/DocumentMatchCollector.cs new file mode 100644 index 00000000..b975ab64 --- /dev/null +++ b/src/Lifti.Core/DocumentMatchCollector.cs @@ -0,0 +1,95 @@ +using System.Collections.Generic; +using System.Linq; + +namespace Lifti.Querying +{ + /// + /// A helper class that allows matches to be collected together efficiently prior to + /// creating a instance. + /// + public sealed class DocumentMatchCollector + { + private readonly Dictionary documentMatches = []; + + internal void Add(int documentId, byte fieldId, IReadOnlyList tokenLocations, double score) + { + if (!this.documentMatches.TryGetValue(documentId, out var fieldMatches)) + { + fieldMatches = new(); + this.documentMatches.Add(documentId, fieldMatches); + } + + fieldMatches.Add(fieldId, score, tokenLocations); + } + + /// + /// Completes the match collection process and converts this instance into an . + /// Once this method has been called, this instance should no longer be used. + /// + public IntermediateQueryResult ToIntermediateQueryResult() + { + var results = new List(this.documentMatches.Count); + + results.AddRange( + this.documentMatches.Select( + d => new ScoredToken(d.Key, d.Value.ToScoredFieldMatches()))); + + return new IntermediateQueryResult(results, false); + } + } + + internal class FieldMatches + { + private readonly Dictionary fieldLookup = []; + + public void Add(byte fieldId, double score, IReadOnlyList tokenLocations) + { + if (!this.fieldLookup.TryGetValue(fieldId, out var fieldMatchCollector)) + { + fieldMatchCollector = new(); + this.fieldLookup.Add(fieldId, fieldMatchCollector); + } + + fieldMatchCollector.Add(score, tokenLocations); + } + + public ScoredFieldMatch[] ToScoredFieldMatches() + { + var results = new ScoredFieldMatch[this.fieldLookup.Count]; + + var i = 0; + foreach (var fieldMatch in this.fieldLookup) + { + results[i++] = fieldMatch.Value.ToScoredToken(fieldMatch.Key); + } + + return results; + } + } + + internal class FieldMatchCollector + { + private int additionCount; + private readonly List fieldLocations = []; + public double Score { get; private set; } + + public void Add(double score, IReadOnlyList tokenLocations) + { + this.Score += score; + + this.fieldLocations.AddRange(tokenLocations); + this.additionCount++; + } + + internal ScoredFieldMatch ToScoredToken(byte fieldId) + { + if (this.additionCount > 1) + { + // Ensure the locations are sorted + this.fieldLocations.Sort(); + } + + return ScoredFieldMatch.CreateFromPresorted(this.Score, fieldId, this.fieldLocations); + } + } +} \ No newline at end of file diff --git a/src/Lifti.Core/DocumentMetadata.cs b/src/Lifti.Core/DocumentMetadata.cs new file mode 100644 index 00000000..2f1a48db --- /dev/null +++ b/src/Lifti.Core/DocumentMetadata.cs @@ -0,0 +1,93 @@ +using System; + +namespace Lifti +{ + /// + /// Describes metadata for an indexed document. + /// + public abstract class DocumentMetadata( + byte? objectTypeId, + int documentId, + DocumentStatistics documentStatistics, + DateTime? scoringFreshnessDate, + double? scoringMagnitude) + { + /// + /// Gets the id of the object type configured for the indexed document. This will be null if the document source was loose + /// indexed text, or the index was deserialized from an older version without object type id awareness. + /// + public byte? ObjectTypeId { get; } = objectTypeId; + + /// + /// Gets the document ID of the document used internally in the index. + /// + public int Id { get; } = documentId; + + /// + /// Gets the statistics for the indexed document, including token count. + /// + public DocumentStatistics DocumentStatistics { get; } = documentStatistics; + + /// + /// Gets the freshness date of the indexed document for scoring purposes, if one was specified. + /// + public DateTime? ScoringFreshnessDate { get; } = scoringFreshnessDate; + + /// + /// Gets the magnitude weighting for the indexed document, if one was specified. + /// + public double? ScoringMagnitude { get; } = scoringMagnitude; + + /// + /// Creates a new instance of the class for the given document id and key. + /// This should be used when the text is not associated with an object. + /// + public static DocumentMetadata ForLooseText(int documentId, TKey key, DocumentStatistics documentStatistics) + { + return new DocumentMetadata(documentId, key, documentStatistics); + } + + /// + /// Creates a new instance of the class for the given document id and key. + /// This should be used when the text is associated with an object. + /// + public static DocumentMetadata ForObject( + byte objectTypeId, + int documentId, + TKey key, + DocumentStatistics documentStatistics, + DateTime? scoringFreshnessDate, + double? scoringMagnitude) + { + return new DocumentMetadata(documentId, key, documentStatistics, objectTypeId, scoringFreshnessDate, scoringMagnitude); + } + } + + /// + /// The type of the key in the index. + public class DocumentMetadata : DocumentMetadata + { + /// + /// Gets the key of the indexed document. + /// + [Obsolete("Use Key property instead")] + public TKey Item => this.Key; + + /// + /// Gets the key of the indexed document. + /// + public TKey Key { get; } + + internal DocumentMetadata( + int documentId, + TKey key, + DocumentStatistics documentStatistics, + byte? objectTypeId = null, + DateTime? scoringFreshnessDate = null, + double? scoringMagnitude = null) + : base(objectTypeId, documentId, documentStatistics, scoringFreshnessDate, scoringMagnitude) + { + this.Key = key; + } + } +} \ No newline at end of file diff --git a/src/Lifti.Core/ItemPhrases.cs b/src/Lifti.Core/DocumentPhrases.cs similarity index 58% rename from src/Lifti.Core/ItemPhrases.cs rename to src/Lifti.Core/DocumentPhrases.cs index 6c7b1277..59aff1f3 100644 --- a/src/Lifti.Core/ItemPhrases.cs +++ b/src/Lifti.Core/DocumentPhrases.cs @@ -1,54 +1,54 @@ using System.Collections.Generic; -using System.Linq; - +using System.Linq; + namespace Lifti -{ - /// - /// Extracted phrases for a field within an item. - /// - public record FieldPhrases(string FoundIn, IReadOnlyList Phrases) - { - /// - /// Creates a new instance of . - /// - public FieldPhrases(string foundIn, params string[] phrases) - : this(foundIn, phrases as IReadOnlyList) - { - } - } - - /// - /// Extracted phrases matched for the given item. - /// - public record ItemPhrases(SearchResult SearchResult, IReadOnlyList> FieldPhrases) - { - /// - /// Enumerates all the matched phrases found within this item regardless of the field they were found in. - /// - /// - public IEnumerable EnumeratePhrases() - { - return this.FieldPhrases.SelectMany(x => x.Phrases); - } - } - - /// - /// Extracted phrases matched for the given item. - /// - public record ItemPhrases : ItemPhrases - { - /// - /// Initializes a new instance of . - /// - public ItemPhrases(TItem item, SearchResult SearchResult, IReadOnlyList> phrases) - : base(SearchResult, phrases) - { - this.Item = item; - } - - /// - /// Gets the item that the matched phrases were returned for. - /// - public TItem Item { get; init; } - } +{ + /// + /// Extracted phrases for a field within an document. + /// + public record FieldPhrases(string FoundIn, IReadOnlyList Phrases) + { + /// + /// Creates a new instance of . + /// + public FieldPhrases(string foundIn, params string[] phrases) + : this(foundIn, phrases as IReadOnlyList) + { + } + } + + /// + /// Extracted phrases matched for the given document. + /// + public record DocumentPhrases(SearchResult SearchResult, IReadOnlyList> FieldPhrases) + { + /// + /// Enumerates all the matched phrases found within this document regardless of the field they were found in. + /// + /// + public IEnumerable EnumeratePhrases() + { + return this.FieldPhrases.SelectMany(x => x.Phrases); + } + } + + /// + /// Extracted phrases matched for the given document. + /// + public record DocumentPhrases : DocumentPhrases + { + /// + /// Initializes a new instance of . + /// + public DocumentPhrases(TObject item, SearchResult SearchResult, IReadOnlyList> phrases) + : base(SearchResult, phrases) + { + this.Item = item; + } + + /// + /// Gets the object that the matched phrases were returned for. + /// + public TObject Item { get; init; } + } } diff --git a/src/Lifti.Core/DocumentStatistics.cs b/src/Lifti.Core/DocumentStatistics.cs index eb8c59cf..4bdc19ce 100644 --- a/src/Lifti.Core/DocumentStatistics.cs +++ b/src/Lifti.Core/DocumentStatistics.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Collections.Immutable; using System.Linq; namespace Lifti @@ -8,20 +7,39 @@ namespace Lifti /// /// Statistics derived from an indexed document. /// - public class DocumentStatistics + public readonly record struct DocumentStatistics { internal DocumentStatistics(byte fieldId, int tokenCount) { - this.TokenCountByField = ImmutableDictionary.Empty.Add(fieldId, tokenCount); + this.TokenCountByField = new Dictionary() { { fieldId, tokenCount } }; this.TotalTokenCount = tokenCount; } + /// + /// Creates a new instance of the class. + /// + /// + /// The token count for each field indexed in the document. The total token count is + /// calculated as the sum of all values in the dictionary. + /// internal DocumentStatistics(IReadOnlyDictionary tokenCountByField) : this(tokenCountByField, tokenCountByField.Values.Sum()) { } - internal DocumentStatistics(IReadOnlyDictionary tokenCountByField, int totalTokenCount) + /// + /// Creates a new instance of the class. + /// + /// + /// The token count for each field indexed in the document. + /// + /// + /// The total token count for the document in all indexed fields. + /// + /// + /// Thrown if is null. + /// + public DocumentStatistics(IReadOnlyDictionary tokenCountByField, int totalTokenCount) { this.TokenCountByField = tokenCountByField ?? throw new ArgumentNullException(nameof(tokenCountByField)); this.TotalTokenCount = totalTokenCount; diff --git a/src/Lifti.Core/DocumentTokenMatchMap.cs b/src/Lifti.Core/DocumentTokenMatchMap.cs new file mode 100644 index 00000000..a708f21a --- /dev/null +++ b/src/Lifti.Core/DocumentTokenMatchMap.cs @@ -0,0 +1,126 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; + +#if NETSTANDARD +using System.Linq; +#endif + +namespace Lifti +{ + /// + /// A read only map of s keyed by the internal document id. + /// + public readonly struct DocumentTokenMatchMap : IEquatable + { + internal DocumentTokenMatchMap(IEnumerable>> data) + { +#if NETSTANDARD + this.DocumentTokenLookup = data.ToDictionary(x => x.Key, x => x.Value); +#else + this.DocumentTokenLookup = new(data); +#endif + } + + /// + /// Constructs a new instance of . + /// + /// + /// A dictionary of document id to indexed tokens to initialize the map with. + /// + public DocumentTokenMatchMap(Dictionary> data) + { + this.DocumentTokenLookup = data; + } + + /// + /// Gets an empty . + /// + public static DocumentTokenMatchMap Empty { get; } = new DocumentTokenMatchMap(Array.Empty>>()); + + internal Dictionary> DocumentTokenLookup { get; } + + /// + /// Gets the number of documents in the map. + /// + public int Count => this.DocumentTokenLookup.Count; + + /// + /// Enumerates all the document matches in the map. + /// + public IEnumerable<(int documentId, IReadOnlyList indexedTokens)> Enumerate() + { + foreach (var document in this.DocumentTokenLookup) + { + yield return (document.Key, document.Value); + } + } + + /// + /// Tries to get the list of indexed tokens for the specified document. + /// + public bool TryGetValue(int documentId, [NotNullWhen(true)] out IReadOnlyList? tokens) + { + return this.DocumentTokenLookup.TryGetValue(documentId, out tokens); + } + + /// + /// Begins mutation the list of indexed tokens for the specified document. If the document is not already + /// indexed at this node, a new empty list will be created. If the document is already indexed at this node, + /// a clone of the list will be created and returned, so is safe to be mutated. + /// + /// + /// + internal List StartMutation(int documentId) + { + if (this.DocumentTokenLookup.TryGetValue(documentId, out var indexedTokens)) + { + return new List(indexedTokens); + } + else + { + return []; + } + } + + /// + /// Gets a value indicating whether the map contains any matches for the specified document. + /// + public bool HasDocument(int documentId) + { + return this.DocumentTokenLookup.ContainsKey(documentId); + } + + /// + public override bool Equals(object? obj) + { + return obj is DocumentTokenMatchMap other + && this.Equals(other); + } + + /// + public bool Equals(DocumentTokenMatchMap other) + { + // Because we're immutable, we can just compare the references + return this.DocumentTokenLookup == other.DocumentTokenLookup; + } + + /// + public override int GetHashCode() + { + return this.DocumentTokenLookup.GetHashCode(); + } + + /// + public static bool operator ==(DocumentTokenMatchMap left, DocumentTokenMatchMap right) + { + return left.Equals(right); + } + + /// + public static bool operator !=(DocumentTokenMatchMap left, DocumentTokenMatchMap right) + { + return !(left == right); + } + } +} \ No newline at end of file diff --git a/src/Lifti.Core/DocumentTokenMatchMapMutation.cs b/src/Lifti.Core/DocumentTokenMatchMapMutation.cs new file mode 100644 index 00000000..51d6c63b --- /dev/null +++ b/src/Lifti.Core/DocumentTokenMatchMapMutation.cs @@ -0,0 +1,93 @@ +using System.Collections.Generic; + +#if NETSTANDARD +using System.Linq; +#endif + +namespace Lifti +{ + internal sealed class DocumentTokenMatchMapMutation + { + private readonly DocumentTokenMatchMap original; + private HashSet? removed; + private Dictionary>? mutated; + + public DocumentTokenMatchMapMutation(DocumentTokenMatchMap original) + { + this.original = original; + } + + public DocumentTokenMatchMap Apply() + { + if (this.mutated == null && this.removed == null) + { + return this.original; + } + + // Copy the original matches except any that have been expressly removed + Dictionary> mutatedMatches = new(this.original.DocumentTokenLookup); + if (this.removed != null) + { + foreach (var documentId in this.removed) + { + mutatedMatches.Remove(documentId); + } + } + + if (this.mutated != null) + { +#if !NETSTANDARD + // Avoid re-allocations by ensuring the dictionary has enough capacity to hold all the new items + // In some situations this may actually be more than we need (A large number of documents have + // been reindexed), but this is better in most cases. We could track the number of "new" documents + // and only increase the capacity by that amount... + mutatedMatches.EnsureCapacity(mutatedMatches.Count + this.mutated.Count); +#endif + + foreach (var documentMutation in this.mutated) + { + mutatedMatches[documentMutation.Key] = documentMutation.Value; + } + } + + return new DocumentTokenMatchMap(mutatedMatches); + } + + public int MutationCount => this.mutated?.Count ?? 0; + + public void Remove(int documentId) + { + if (this.removed == null) + { + this.removed = [documentId]; + } + else + { + this.removed.Add(documentId); + } + + // It's technically possible for a document to be added to the index, and in the same mutation removed + // again. In this case, we can just remove it from the mutations dictionary as if it was never + // added to it. + this.mutated?.Remove(documentId); + } + + internal void Add(int documentId, IndexedToken indexedToken) + { + this.mutated ??= []; + + if (this.mutated.TryGetValue(documentId, out var documentFieldLocations)) + { + // The field locations list will already have been cloned when it was added to the mutations dictionary + // so it's safe to just add to it here + documentFieldLocations.Add(indexedToken); + } + else + { + documentFieldLocations = this.original.StartMutation(documentId); + documentFieldLocations.Add(indexedToken); + this.mutated.Add(documentId, documentFieldLocations); + } + } + } +} \ No newline at end of file diff --git a/src/Lifti.Core/DuplicateItemBehavior.cs b/src/Lifti.Core/DuplicateItemBehavior.cs deleted file mode 100644 index 86095e20..00000000 --- a/src/Lifti.Core/DuplicateItemBehavior.cs +++ /dev/null @@ -1,18 +0,0 @@ -namespace Lifti -{ - /// - /// Describes the behavior of the index when indexing an item that is already present in the index. - /// - public enum DuplicateItemBehavior - { - /// - /// When an item is indexed and it already exists in the index, the text associated to the new item should replace the old. - /// - ReplaceItem = 0, - - /// - /// When an item is indexed and it already exists in the index, a should be thrown. - /// - ThrowException = 1 - } -} \ No newline at end of file diff --git a/src/Lifti.Core/DuplicateKeyBehavior.cs b/src/Lifti.Core/DuplicateKeyBehavior.cs new file mode 100644 index 00000000..23216160 --- /dev/null +++ b/src/Lifti.Core/DuplicateKeyBehavior.cs @@ -0,0 +1,18 @@ +namespace Lifti +{ + /// + /// How an index should behave when adding a document for which the key is already present. + /// + public enum DuplicateKeyBehavior + { + /// + /// When an document is added with a key already present in the index, the new document will replace the old. + /// + Replace = 0, + + /// + /// If when adding a document to the index its key is already present, a will be thrown. + /// + ThrowException = 1 + } +} \ No newline at end of file diff --git a/src/Lifti.Core/ExceptionMessages.Designer.cs b/src/Lifti.Core/ExceptionMessages.Designer.cs index ac563cd4..3484da12 100644 --- a/src/Lifti.Core/ExceptionMessages.Designer.cs +++ b/src/Lifti.Core/ExceptionMessages.Designer.cs @@ -70,7 +70,7 @@ internal class ExceptionMessages { } /// - /// Looks up a localized string similar to At least one field must be configured for an item. + /// Looks up a localized string similar to At least one field must be configured for an object type. /// internal static string AtLeastOneFieldMustBeIndexed { get { @@ -105,6 +105,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Cannot apply a disposed bookmark. + /// + internal static string BookmarkDisposed { + get { + return ResourceManager.GetString("BookmarkDisposed", resourceCulture); + } + } + /// /// Looks up a localized string similar to Cannot combine an empty set of query parts. /// @@ -142,7 +151,7 @@ internal class ExceptionMessages { } /// - /// Looks up a localized string similar to A duplicate field "{0}" was encountered while indexing item {1}. Most likely multiple dynamic field providers have been configured and the same field was produced by more than one of them. Consider using a field prefix when configuring the dynamic fields.. + /// Looks up a localized string similar to A duplicate field "{0}" was encountered while indexing the object with key {1}. Most likely multiple dynamic field providers have been configured and the same field was produced by more than one of them. Consider using a field prefix when configuring the dynamic fields.. /// internal static string DuplicateFieldEncounteredOnObject { get { @@ -168,6 +177,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Cannot create an empty adjacent words query part. + /// + internal static string EmptyAdjacentWordsQueryPart { + get { + return ResourceManager.GetString("EmptyAdjacentWordsQueryPart", resourceCulture); + } + } + /// /// Looks up a localized string similar to Bracketed query parts cannot be empty. /// @@ -177,6 +195,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Empty field name encountered. + /// + internal static string EmptyFieldNameEncountered { + get { + return ResourceManager.GetString("EmptyFieldNameEncountered", resourceCulture); + } + } + /// /// Looks up a localized string similar to Internal error - missing text value. /// @@ -186,6 +213,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Cannot create an empty wildcard query part. + /// + internal static string EmptyWildcardQuery { + get { + return ResourceManager.GetString("EmptyWildcardQuery", resourceCulture); + } + } + /// /// Looks up a localized string similar to Expected at least one query part to be parsed. /// @@ -195,6 +231,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Expected = after bracketed field name. + /// + internal static string ExpectedEqualsAfterFieldName { + get { + return ResourceManager.GetString("ExpectedEqualsAfterFieldName", resourceCulture); + } + } + /// /// Looks up a localized string similar to Expected a fuzzy match token - got {0}. /// @@ -295,7 +340,25 @@ internal class ExceptionMessages { } /// - /// Looks up a localized string similar to Item already indexed. + /// Looks up a localized string similar to Invalid score boost number encountered: {0}. + /// + internal static string InvalidScoreBoost { + get { + return ResourceManager.GetString("InvalidScoreBoost", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Expected a number to follow a score boost, e.g. "Term^3" would indicate that the search term "Term" should have a score boost of 3.. + /// + internal static string InvalidScoreBoostExpectedNumber { + get { + return ResourceManager.GetString("InvalidScoreBoostExpectedNumber", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Document already indexed. /// internal static string ItemAlreadyIndexed { get { @@ -348,6 +411,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to An index can only have a maximum of 31 unique object types configured against it. + /// + internal static string MaximumNumberOfConfiguredObjectTypesReached { + get { + return ResourceManager.GetString("MaximumNumberOfConfiguredObjectTypesReached", resourceCulture); + } + } + /// /// Looks up a localized string similar to Only tokens up to {0} characters long can be indexed.. /// @@ -375,6 +447,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Multiplier values must be greater than one. + /// + internal static string MultiplierValueMustBeGreaterThanOne { + get { + return ResourceManager.GetString("MultiplierValueMustBeGreaterThanOne", resourceCulture); + } + } + /// /// Looks up a localized string similar to No batch change in progress.. /// @@ -429,6 +510,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Internal error: no snapshot initialized in navigator. + /// + internal static string NoSnapshotInitialized { + get { + return ResourceManager.GetString("NoSnapshotInitialized", resourceCulture); + } + } + /// /// Looks up a localized string similar to Not all requested items were returned. Tme missing ids were: {0}. /// @@ -447,6 +537,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Internal error - attempted to calculate an item score boost without initialized score boost stats. + /// + internal static string ScoreBoostsNotCalculated { + get { + return ResourceManager.GetString("ScoreBoostsNotCalculated", resourceCulture); + } + } + /// /// Looks up a localized string similar to Single character wildcards (%) following a multi-character wildcard (*) are not currently supported.. /// @@ -510,6 +609,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Unclosed [ encountered. + /// + internal static string UnclosedSquareBracket { + get { + return ResourceManager.GetString("UnclosedSquareBracket", resourceCulture); + } + } + /// /// Looks up a localized string similar to Unexpected close bracket encountered in query. /// @@ -555,6 +663,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Internal error - unexpected value removal from score boost metadata. + /// + internal static string UnexpectedScoreBoostValueRemoval { + get { + return ResourceManager.GetString("UnexpectedScoreBoostValueRemoval", resourceCulture); + } + } + /// /// Looks up a localized string similar to Unexpected token encountered: {0}. /// @@ -600,6 +717,15 @@ internal class ExceptionMessages { } } + /// + /// Looks up a localized string similar to Unknown object type id {0}. + /// + internal static string UnknownObjectTypeId { + get { + return ResourceManager.GetString("UnknownObjectTypeId", resourceCulture); + } + } + /// /// Looks up a localized string similar to Unknown operator encountered: {0}. /// @@ -635,5 +761,14 @@ internal class ExceptionMessages { return ResourceManager.GetString("ValueMustNotBeLessThanZero", resourceCulture); } } + + /// + /// Looks up a localized string similar to WithScoreBoosting can only be called once per object builder. + /// + internal static string WithScoreBoostingCanOnlyBeCalledOncePerObjectDefinition { + get { + return ResourceManager.GetString("WithScoreBoostingCanOnlyBeCalledOncePerObjectDefinition", resourceCulture); + } + } } } diff --git a/src/Lifti.Core/ExceptionMessages.resx b/src/Lifti.Core/ExceptionMessages.resx index bc1bd62d..74b21746 100644 --- a/src/Lifti.Core/ExceptionMessages.resx +++ b/src/Lifti.Core/ExceptionMessages.resx @@ -121,7 +121,7 @@ When fields are configured with async access methods the async Add methods must be used on the index - At least one field must be configured for an item + At least one field must be configured for an object type An attempt was made to read a dynamic field ("{0}") from a provider that did not produce it. @@ -132,6 +132,9 @@ Cannot start a new batch change while another is already in progress. + + Cannot apply a disposed bookmark + Cannot combine an empty set of query parts @@ -145,7 +148,7 @@ Duplicate dynamic field reader name encountered: {0}. Dynamic field readers must have unique names. - A duplicate field "{0}" was encountered while indexing item {1}. Most likely multiple dynamic field providers have been configured and the same field was produced by more than one of them. Consider using a field prefix when configuring the dynamic fields. + A duplicate field "{0}" was encountered while indexing the object with key {1}. Most likely multiple dynamic field providers have been configured and the same field was produced by more than one of them. Consider using a field prefix when configuring the dynamic fields. The index was serialized with binary serialization version {0} and is incompatible with this release of LIFTI. @@ -153,15 +156,27 @@ Adjacent text query parts cannot be empty + + Cannot create an empty adjacent words query part + Bracketed query parts cannot be empty + + Empty field name encountered + Internal error - missing text value + + Cannot create an empty wildcard query part + Expected at least one query part to be parsed + + Expected = after bracketed field name + Expected a fuzzy match token - got {0} @@ -195,8 +210,14 @@ An index must be empty when attempting to deserialize its contents. + + Invalid score boost number encountered: {0} + + + Expected a number to follow a score boost, e.g. "Term^3" would indicate that the search term "Term" should have a score boost of 3. + - Item already indexed + Document already indexed Item not found @@ -213,6 +234,9 @@ Only 255 distinct fields can currently be indexed + + An index can only have a maximum of 31 unique object types configured against it + Only tokens up to {0} characters long can be indexed. @@ -222,6 +246,9 @@ The expected header bytes could not be found in the stream - this is probably not a serialized index. + + Multiplier values must be greater than one + No batch change in progress. @@ -240,12 +267,18 @@ Attempting to score a query result with no scorer initialized + + Internal error: no snapshot initialized in navigator + Not all requested items were returned. Tme missing ids were: {0} No tokenization options have been provided for type {0} + + Internal error - attempted to calculate an item score boost without initialized score boost stats + Single character wildcards (%) following a multi-character wildcard (*) are not currently supported. @@ -267,6 +300,9 @@ Unable to read header data from serialized index content. + + Unclosed [ encountered + Unexpected close bracket encountered in query @@ -282,6 +318,9 @@ Unexpected OperatorParseState {0} encountered while tokenizing a query + + Internal error - unexpected value removal from score boost metadata + Unexpected token encountered: {0} @@ -297,6 +336,9 @@ Serialized index contains unknown field ids. Fields have most likely been removed from the FullTextIndexBuilder configuration. + + Unknown object type id {0} + Unknown operator encountered: {0} @@ -309,4 +351,7 @@ Value must not be less than zero + + WithScoreBoosting can only be called once per object builder + \ No newline at end of file diff --git a/src/Lifti.Core/FullTextIndex.cs b/src/Lifti.Core/FullTextIndex.cs index fa88716c..e7833e7f 100644 --- a/src/Lifti.Core/FullTextIndex.cs +++ b/src/Lifti.Core/FullTextIndex.cs @@ -1,4 +1,5 @@ using Lifti.Querying; +using Lifti.Serialization; using Lifti.Tokenization; using Lifti.Tokenization.Objects; using Lifti.Tokenization.TextExtraction; @@ -16,11 +17,11 @@ public class FullTextIndex : IFullTextIndex, IDisposable { private readonly Func, CancellationToken, Task>[]? indexModifiedActions; private readonly IndexOptions indexOptions; - private readonly IdPool idPool; private readonly IIndexNavigatorPool indexNavigatorPool; private readonly SemaphoreSlim writeLock = new(1); private readonly TimeSpan writeLockTimeout = TimeSpan.FromSeconds(10); private readonly IndexedFieldLookup fieldLookup; + private IndexMetadata metadata; private bool isDisposed; /// @@ -29,11 +30,11 @@ public class FullTextIndex : IFullTextIndex, IDisposable private IndexSnapshot currentSnapshot = null!; private IndexNode root = null!; - private IndexMutation? batchMutation; + private IndexMutation? batchMutation; internal FullTextIndex( IndexOptions indexOptions, - ObjectTokenizationLookup itemTokenizationOptions, + ObjectTypeConfigurationLookup objectTypeConfiguration, IndexedFieldLookup fieldLookup, IIndexNodeFactory indexNodeFactory, IQueryParser queryParser, @@ -44,15 +45,16 @@ public class FullTextIndex : IFullTextIndex, IDisposable Func, CancellationToken, Task>[]? indexModifiedActions) { this.indexNavigatorPool = new IndexNavigatorPool(scorer); + this.metadata = new IndexMetadata(objectTypeConfiguration.AllConfigurations); + this.indexOptions = indexOptions; - this.ItemTokenization = itemTokenizationOptions ?? throw new ArgumentNullException(nameof(itemTokenizationOptions)); + this.ObjectTypeConfiguration = objectTypeConfiguration ?? throw new ArgumentNullException(nameof(objectTypeConfiguration)); this.IndexNodeFactory = indexNodeFactory ?? throw new ArgumentNullException(nameof(indexNodeFactory)); this.QueryParser = queryParser ?? throw new ArgumentNullException(nameof(queryParser)); this.DefaultTextExtractor = defaultTextExtractor; this.DefaultTokenizer = defaultTokenizer ?? throw new ArgumentNullException(nameof(defaultTokenizer)); this.DefaultThesaurus = defaultThesaurus; this.indexModifiedActions = indexModifiedActions; - this.idPool = new IdPool(); this.fieldLookup = fieldLookup; this.Root = this.IndexNodeFactory.CreateRootNode(); @@ -65,20 +67,26 @@ public IndexNode Root private set { this.root = value; - this.currentSnapshot = new IndexSnapshot(this.indexNavigatorPool, this); + this.currentSnapshot = new IndexSnapshot( + this.indexNavigatorPool, + this.fieldLookup, + value, + this.metadata); } } /// - public IItemStore Items => this.idPool; + [Obsolete("Use the Metadata property instead")] + public IIndexMetadata Items => this.Metadata; - internal IIdPool IdPool => this.idPool; + /// + public IIndexMetadata Metadata => this.metadata; /// public IIndexedFieldLookup FieldLookup => this.fieldLookup; /// - public int Count => this.currentSnapshot.Items.Count; + public int Count => this.currentSnapshot.Metadata.DocumentCount; internal IIndexNodeFactory IndexNodeFactory { get; } @@ -97,7 +105,30 @@ private set /// public IThesaurus DefaultThesaurus { get; } - internal ObjectTokenizationLookup ItemTokenization { get; } + internal ObjectTypeConfigurationLookup ObjectTypeConfiguration { get; } + + /// + /// Restores the index from a previously serialized state. + /// + /// + /// The root node of the index. + /// + /// + /// The metadata for the index. + /// + internal void RestoreIndex(IndexNode rootNode, DocumentMetadataCollector collectedMetadata) + { + // Set the root node and metadata in a write lock to ensure that no other operations are happening + this.PerformWriteLockedAction(() => + { + foreach (var metadata in collectedMetadata.Collected) + { + this.metadata.Add(metadata); + } + + this.Root = rootNode; + }); + } /// public IIndexTokenizer GetTokenizerForField(string fieldName) @@ -121,7 +152,7 @@ public void BeginBatchChange() throw new LiftiException(ExceptionMessages.BatchChangeAlreadyStarted); } - this.batchMutation = new IndexMutation(this.Root, this.IndexNodeFactory); + this.batchMutation = new IndexMutation(this.Root, this.metadata, this.IndexNodeFactory); }); } @@ -145,7 +176,7 @@ public async Task CommitBatchChangeAsync(CancellationToken cancellationToken = d } /// - public async Task AddAsync(TKey itemKey, IEnumerable text, CancellationToken cancellationToken = default) + public async Task AddAsync(TKey key, IEnumerable text, CancellationToken cancellationToken = default) { await this.PerformWriteLockedActionAsync( async () => @@ -154,7 +185,7 @@ public async Task AddAsync(TKey itemKey, IEnumerable text, CancellationT m => { var tokens = ExtractDocumentTokens(text, this.DefaultTextExtractor, this.DefaultTokenizer, this.DefaultThesaurus); - this.AddForDefaultField(m, itemKey, tokens); + this.AddForDefaultField(m, key, tokens); }, cancellationToken) .ConfigureAwait(false); @@ -164,7 +195,7 @@ public async Task AddAsync(TKey itemKey, IEnumerable text, CancellationT } /// - public async Task AddAsync(TKey itemKey, string text, CancellationToken cancellationToken = default) + public async Task AddAsync(TKey key, string text, CancellationToken cancellationToken = default) { await this.PerformWriteLockedActionAsync( async () => @@ -173,7 +204,7 @@ public async Task AddAsync(TKey itemKey, string text, CancellationToken cancella m => { var tokens = ExtractDocumentTokens(text, this.DefaultTextExtractor, this.DefaultTokenizer, this.DefaultThesaurus); - this.AddForDefaultField(m, itemKey, tokens); + this.AddForDefaultField(m, key, tokens); }, cancellationToken) .ConfigureAwait(false); @@ -183,14 +214,14 @@ public async Task AddAsync(TKey itemKey, string text, CancellationToken cancella } /// - public async Task AddRangeAsync(IEnumerable items, CancellationToken cancellationToken = default) + public async Task AddRangeAsync(IEnumerable items, CancellationToken cancellationToken = default) { if (items is null) { throw new ArgumentNullException(nameof(items)); } - var options = this.ItemTokenization.Get(); + var options = this.ObjectTypeConfiguration.Get(); await this.PerformWriteLockedActionAsync( async () => { @@ -209,9 +240,9 @@ public async Task AddRangeAsync(IEnumerable items, CancellationTok } /// - public async Task AddAsync(TItem item, CancellationToken cancellationToken = default) + public async Task AddAsync(TObject item, CancellationToken cancellationToken = default) { - var options = this.ItemTokenization.Get(); + var options = this.ObjectTypeConfiguration.Get(); await this.PerformWriteLockedActionAsync( async () => await this.MutateAsync( async m => await this.AddAsync(item, options, m, cancellationToken).ConfigureAwait(false), @@ -221,29 +252,28 @@ public async Task AddAsync(TItem item, CancellationToken cancellationToke } /// - public async Task RemoveAsync(TKey itemKey, CancellationToken cancellationToken = default) + public async Task RemoveAsync(TKey key, CancellationToken cancellationToken = default) { - var result = false; + var documentRemoved = false; await this.PerformWriteLockedActionAsync( async () => { - if (!this.idPool.Contains(itemKey)) - { - result = false; - return; - } - await this.MutateAsync( - m => this.RemoveKeyFromIndex(itemKey, m), + m => + { + documentRemoved = m.Metadata.Contains(key); + if (documentRemoved) + { + RemoveKeyFromIndex(key, m); + } + }, cancellationToken) .ConfigureAwait(false); - - result = true; }, cancellationToken) .ConfigureAwait(false); - return result; + return documentRemoved; } /// @@ -270,11 +300,6 @@ public override string ToString() return this.Root.ToString(); } - internal void SetRootWithLock(IndexNode indexNode) - { - this.PerformWriteLockedAction(() => this.Root = indexNode); - } - private static List ExtractDocumentTokens( IEnumerable documentTextFragments, ITextExtractor textExtractor, @@ -304,18 +329,18 @@ internal void SetRootWithLock(IndexNode indexNode) private static List TokenizeFragments(IIndexTokenizer tokenizer, IThesaurus thesaurus, IEnumerable fragments) { - return tokenizer.Process(fragments).SelectMany(x => thesaurus.Process(x)).ToList(); + return tokenizer.Process(fragments).SelectMany(thesaurus.Process).ToList(); } - private void AddForDefaultField(IndexMutation mutation, TKey itemKey, List tokens) + private void AddForDefaultField(IndexMutation mutation, TKey key, List tokens) { var fieldId = this.FieldLookup.DefaultField; - var itemId = this.GetUniqueIdForItem( - itemKey, - new DocumentStatistics(fieldId, CalculateTotalTokenCount(tokens)), + var documentId = this.GetUniqueIdForDocument( + key, + new DocumentStatistics(fieldId, tokens.CalculateTotalTokenCount()), mutation); - IndexTokens(mutation, itemId, fieldId, tokens); + IndexTokens(mutation, documentId, fieldId, tokens); } private void PerformWriteLockedAction(Action action) @@ -352,7 +377,7 @@ private async Task PerformWriteLockedActionAsync(Func asyncAction, Cancell } } - private async Task MutateAsync(Action mutationAction, CancellationToken cancellationToken) + private async Task MutateAsync(Action> mutationAction, CancellationToken cancellationToken) { var indexMutation = this.GetCurrentMutationOrCreateTransient(); @@ -364,8 +389,9 @@ private async Task MutateAsync(Action mutationAction, Cancellatio } } - private async Task ApplyMutationsAsync(IndexMutation indexMutation, CancellationToken cancellationToken) + private async Task ApplyMutationsAsync(IndexMutation indexMutation, CancellationToken cancellationToken) { + this.metadata = indexMutation.Metadata; this.Root = indexMutation.Apply(); if (this.indexModifiedActions != null) { @@ -376,12 +402,12 @@ private async Task ApplyMutationsAsync(IndexMutation indexMutation, Cancellation } } - private IndexMutation GetCurrentMutationOrCreateTransient() + private IndexMutation GetCurrentMutationOrCreateTransient() { - return this.batchMutation ?? new IndexMutation(this.Root, this.IndexNodeFactory); + return this.batchMutation ?? new IndexMutation(this.Root, this.metadata, this.IndexNodeFactory); } - private async Task MutateAsync(Func asyncMutationAction, CancellationToken cancellationToken) + private async Task MutateAsync(Func, Task> asyncMutationAction, CancellationToken cancellationToken) { var indexMutation = this.GetCurrentMutationOrCreateTransient(); @@ -393,35 +419,25 @@ private async Task MutateAsync(Func asyncMutationAction, Ca } } - private static void IndexTokens(IndexMutation indexMutation, int itemId, byte fieldId, IEnumerable tokens) + private static void IndexTokens(IndexMutation indexMutation, int documentId, byte fieldId, IEnumerable tokens) { foreach (var token in tokens) { - indexMutation.Add(itemId, fieldId, token); + indexMutation.Add(documentId, fieldId, token); } } - private static int CalculateTotalTokenCount(List tokens) + private static void RemoveKeyFromIndex(TKey key, IndexMutation mutation) { - var totalCount = 0; - for (var i = 0; i < tokens.Count; i++) - { - totalCount += tokens[i].Locations.Count; - } - return totalCount; - - } + var documentId = mutation.Metadata.Remove(key); - private void RemoveKeyFromIndex(TKey itemKey, IndexMutation mutation) - { - var id = this.idPool.ReleaseItem(itemKey); - mutation.Remove(id); + mutation.Remove(documentId); } /// This method is thread safe as we only allow one mutation operation at a time. - private async Task AddAsync(TItem item, ObjectTokenization options, IndexMutation indexMutation, CancellationToken cancellationToken) + private async Task AddAsync(TObject item, ObjectTypeConfiguration options, IndexMutation indexMutation, CancellationToken cancellationToken) { - var itemKey = options.KeyReader(item); + var key = options.KeyReader(item); var fieldTokens = new Dictionary>(); @@ -436,11 +452,11 @@ private async Task AddAsync(TItem item, ObjectTokenization o tokenizer, thesaurus); - MergeFieldTokens(fieldTokens, itemKey, field.Name, fieldId, tokens); + MergeFieldTokens(fieldTokens, key, field.Name, fieldId, tokens); } // Next process any dynamic field readers - var itemType = typeof(TItem); + var objectType = typeof(TObject); foreach (var dynamicFieldReader in options.DynamicFieldReaders) { var dynamicFields = await dynamicFieldReader.ReadAsync(item, cancellationToken).ConfigureAwait(false); @@ -451,20 +467,20 @@ private async Task AddAsync(TItem item, ObjectTokenization o var tokens = ExtractDocumentTokens(rawText, textExtractor, tokenizer, thesaurus); - MergeFieldTokens(fieldTokens, itemKey, name, fieldId, tokens); + MergeFieldTokens(fieldTokens, key, name, fieldId, tokens); } } var documentStatistics = new DocumentStatistics( fieldTokens.ToDictionary( t => t.Key, - t => CalculateTotalTokenCount(t.Value))); + t => t.Value.CalculateTotalTokenCount())); - var itemId = this.GetUniqueIdForItem(itemKey, documentStatistics, indexMutation); + var documentId = this.GetUniqueIdForDocument(item, key, documentStatistics, options, indexMutation); foreach (var fieldTokenList in fieldTokens) { - IndexTokens(indexMutation, itemId, fieldTokenList.Key, fieldTokenList.Value); + IndexTokens(indexMutation, documentId, fieldTokenList.Key, fieldTokenList.Value); } } @@ -478,17 +494,29 @@ private static void MergeFieldTokens(Dictionary> fieldTokens, fieldTokens.Add(fieldId, tokens); } - private int GetUniqueIdForItem(TKey itemKey, DocumentStatistics documentStatistics, IndexMutation mutation) + private int GetUniqueIdForDocument(TKey key, DocumentStatistics documentStatistics, IndexMutation mutation) + { + this.EnforceDuplicateKeyBehavior(key, mutation); + + return mutation.Metadata.Add(key, documentStatistics); + } + + private int GetUniqueIdForDocument(TObject item, TKey key, DocumentStatistics documentStatistics, ObjectTypeConfiguration options, IndexMutation mutation) { - if (this.indexOptions.DuplicateItemBehavior == DuplicateItemBehavior.ReplaceItem) + this.EnforceDuplicateKeyBehavior(key, mutation); + + return mutation.Metadata.Add(key, item, documentStatistics, options); + } + + private void EnforceDuplicateKeyBehavior(TKey key, IndexMutation mutation) + { + if (this.indexOptions.DuplicateKeyBehavior == DuplicateKeyBehavior.Replace) { - if (this.idPool.Contains(itemKey)) + if (mutation.Metadata.Contains(key)) { - this.RemoveKeyFromIndex(itemKey, mutation); + RemoveKeyFromIndex(key, mutation); } } - - return this.idPool.Add(itemKey, documentStatistics); } /// @@ -522,7 +550,7 @@ public void Dispose() /// and the ids in the new index. Assuming the index has been rebuilt with exactly the same configuration, the ids /// will match on each side of the map. /// - internal Dictionary RehydrateSerializedFields(List serializedFields) + internal SerializedFieldIdMap MapSerializedFieldIds(List serializedFields) { var fieldMap = new Dictionary { @@ -532,7 +560,7 @@ public void Dispose() foreach (var field in serializedFields) { - byte newId; + byte newFieldId; switch (field.Kind) { case FieldKind.Dynamic: @@ -542,21 +570,21 @@ public void Dispose() } var newDynamicField = this.fieldLookup.GetOrCreateDynamicFieldInfo(field.DynamicFieldReaderName, field.Name); - newId = newDynamicField.Id; + newFieldId = newDynamicField.Id; break; case FieldKind.Static: var fieldInfo = this.fieldLookup.GetFieldInfo(field.Name); - newId = fieldInfo.Id; + newFieldId = fieldInfo.Id; break; default: throw new LiftiException(ExceptionMessages.UnknownFieldKind, field.Kind); } - fieldMap[field.FieldId] = newId; + fieldMap[field.FieldId] = newFieldId; } - return fieldMap; + return new SerializedFieldIdMap(fieldMap); } } } diff --git a/src/Lifti.Core/FullTextIndexBuilder.cs b/src/Lifti.Core/FullTextIndexBuilder.cs index 8c7e04ca..59064996 100644 --- a/src/Lifti.Core/FullTextIndexBuilder.cs +++ b/src/Lifti.Core/FullTextIndexBuilder.cs @@ -16,7 +16,7 @@ namespace Lifti public class FullTextIndexBuilder where TKey : notnull { - private readonly List objectTokenizationBuilders = new(); + private readonly List objectTokenizationBuilders = []; private readonly IndexOptions advancedOptions = new(); private ThesaurusBuilder? defaultThesaurusBuilder; private IIndexScorerFactory? scorerFactory; @@ -46,12 +46,12 @@ public FullTextIndexBuilder WithTextExtractor(ITextExtractor textExtractor } /// - /// Configures the behavior the index should exhibit when an item that already exists in the index is indexed again. - /// The default value is . + /// Configures the behavior of the index when an key that has already been added to the index is indexed again. + /// The default value is . /// - public FullTextIndexBuilder WithDuplicateItemBehavior(DuplicateItemBehavior duplicateItemBehavior) + public FullTextIndexBuilder WithDuplicateKeyBehavior(DuplicateKeyBehavior duplicateKeyBehavior) { - this.advancedOptions.DuplicateItemBehavior = duplicateItemBehavior; + this.advancedOptions.DuplicateKeyBehavior = duplicateKeyBehavior; return this; } @@ -79,7 +79,7 @@ public FullTextIndexBuilder WithIndexModificationAction(Func, CancellationToken, Task>>(); + this.indexModifiedActions ??= []; this.indexModifiedActions.Add(asyncAction); @@ -123,20 +123,20 @@ public FullTextIndexBuilder WithIndexModificationAction(Action - /// Creates an configuration entry for an item of type + /// Configures the index to support tokenizing text from an object of type /// in the index. /// /// - /// A delegate capable of specifying all the required options for the item tokenization options. + /// A delegate capable of configuring an instance. /// - public FullTextIndexBuilder WithObjectTokenization(Func, ObjectTokenizationBuilder> optionsBuilder) + public FullTextIndexBuilder WithObjectTokenization(Func, ObjectTokenizationBuilder> optionsBuilder) { if (optionsBuilder is null) { throw new ArgumentNullException(nameof(optionsBuilder)); } - var builder = new ObjectTokenizationBuilder(); + var builder = new ObjectTokenizationBuilder(); this.objectTokenizationBuilders.Add(optionsBuilder(builder)); return this; @@ -256,16 +256,36 @@ public FullTextIndex Build() // Building the object tokenizers also populates the index's field lookup with // any static fields that have been defined. var fieldLookup = new IndexedFieldLookup(); - var objectTokenizers = new List(); + var objectTokenizers = new List(); + + // Start object type IDs at 1 - 0 is reserved for special use where the indexed document is + // not associated with a specific object. + byte objectTypeId = 1; foreach (var objectTokenizationBuilder in this.objectTokenizationBuilders) { - var objectTokenizer = objectTokenizationBuilder.Build(this.defaultTokenizer, thesaurusBuilder, textExtractor, fieldLookup); + // This is a limitation of the current binary serialization implementation. We are reserving + // 3 bits of the object type ID for whether the object has various scoring metadata associated + // to it. That leaves us with 5 bits for the object type ID. + // Having more that 31 different *object types* (not fields) seems a bit of a stretch, so this + // feels ok as a design constraint for now. + if (objectTypeId > 31) + { + throw new LiftiException(ExceptionMessages.MaximumNumberOfConfiguredObjectTypesReached); + } + + var objectTokenizer = objectTokenizationBuilder.Build( + objectTypeId++, + this.defaultTokenizer, + thesaurusBuilder, + textExtractor, + fieldLookup); + objectTokenizers.Add(objectTokenizer); } return new FullTextIndex( this.advancedOptions, - new ObjectTokenizationLookup(objectTokenizers), + new ObjectTypeConfigurationLookup(objectTokenizers), fieldLookup, new IndexNodeFactory(this.advancedOptions), this.queryParser ?? new QueryParser(new QueryParserOptions()), diff --git a/src/Lifti.Core/IFullTextIndex.cs b/src/Lifti.Core/IFullTextIndex.cs index 51e476e3..73888435 100644 --- a/src/Lifti.Core/IFullTextIndex.cs +++ b/src/Lifti.Core/IFullTextIndex.cs @@ -1,6 +1,7 @@ using Lifti.Querying; using Lifti.Tokenization; using Lifti.Tokenization.TextExtraction; +using System; using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; @@ -11,11 +12,15 @@ namespace Lifti /// public interface IFullTextIndex : IIndexTokenizerProvider { + /// + [Obsolete("Use the Metadata property instead")] + IIndexMetadata Items { get; } + /// - /// Internally an index keeps track of items and their metadata. Can be used get ids for items and - /// visa-versa, along with other derived metadata such as token counts. + /// The keeps track index metadata, including maps between internal document ids and keys, + /// statistics about token counts and score boost aggregates. /// - IItemStore Items { get; } + IIndexMetadata Metadata { get; } /// /// Fields are tracked internally as a id of type . This lookup can @@ -24,7 +29,7 @@ public interface IFullTextIndex : IIndexTokenizerProvider IIndexedFieldLookup FieldLookup { get; } /// - /// Gets the number of items contained in the index. This will not reflect any new items + /// Gets the number of documents contained in the index. This will not reflect any new documents /// that are currently being inserted in a batch until the batch completes. /// int Count { get; } @@ -63,57 +68,57 @@ public interface IFullTextIndex : IIndexTokenizerProvider /// /// Indexes some text against a given key. /// - /// The key of the item being indexed. - /// The text to index against the item. + /// The key of the document being indexed. + /// The text to index against the document. /// The optional for the operation. - Task AddAsync(TKey itemKey, string text, CancellationToken cancellationToken = default); + Task AddAsync(TKey key, string text, CancellationToken cancellationToken = default); /// /// Indexes some text against a given key. /// - /// The key of the item being indexed. - /// The text to index against the item. + /// The key of the document being indexed. + /// The text to index against the document. /// The optional for the operation. - Task AddAsync(TKey itemKey, IEnumerable text, CancellationToken cancellationToken = default); + Task AddAsync(TKey key, IEnumerable text, CancellationToken cancellationToken = default); /// - /// Indexes a single item of type . This type must have been + /// Indexes a single document extracted from type . This type must have been /// configured when the index was built. /// - /// - /// The type of the item being indexed. + /// + /// The type of the object being indexed. /// /// /// The item to index. /// /// The optional for the operation. - Task AddAsync(TItem item, CancellationToken cancellationToken = default); + Task AddAsync(TObject item, CancellationToken cancellationToken = default); /// - /// Indexes a set of items of type . This type must have been + /// Indexes a set of documents extracted from type . This type must have been /// configured when the index was built. /// - /// - /// The type of the item being indexed. + /// + /// The type of the object being indexed. /// /// /// The items to index. /// /// The optional for the operation. - Task AddRangeAsync(IEnumerable items, CancellationToken cancellationToken = default); + Task AddRangeAsync(IEnumerable items, CancellationToken cancellationToken = default); /// - /// Removes the item with the given key from this index. If the key is not indexed then + /// Removes the document with the given key from this index. If the key is not indexed then /// this operation is a no-op and false is returned. /// - /// - /// The key of the item to remove. + /// + /// The key of the document to remove. /// /// - /// true if the item was in the index, false if it was not. + /// true if the document was in the index, false if it was not. /// /// The optional for the operation. - Task RemoveAsync(TKey itemKey, CancellationToken cancellationToken = default); + Task RemoveAsync(TKey key, CancellationToken cancellationToken = default); /// /// Performs a search against this index. diff --git a/src/Lifti.Core/IIdPool.cs b/src/Lifti.Core/IIdPool.cs deleted file mode 100644 index a2d78116..00000000 --- a/src/Lifti.Core/IIdPool.cs +++ /dev/null @@ -1,33 +0,0 @@ -namespace Lifti -{ - internal interface IIdPool : IItemStore - { - /// - /// Returns the id associated to the given item back to the pool. - /// - /// - /// The id that was associated to the item. - /// - int ReleaseItem(T item); - - /// - /// Adds the given item with the pre-determined id. This is used when - /// de-serializing an index and the ids are already known. - /// - /// - /// Thrown when the id is already used or the item is already indexed. - /// - void Add(int id, T item, DocumentStatistics documentStatistics); - - /// - /// Adds the given item, generating a new id for it as it is stored. - /// - /// - /// The id for the item. - /// - /// - /// Thrown when the item is already indexed. - /// - int Add(T item, DocumentStatistics documentStatistics); - } -} \ No newline at end of file diff --git a/src/Lifti.Core/IIndexMetadata.cs b/src/Lifti.Core/IIndexMetadata.cs new file mode 100644 index 00000000..025e98d4 --- /dev/null +++ b/src/Lifti.Core/IIndexMetadata.cs @@ -0,0 +1,86 @@ +using System; +using System.Collections.Generic; + +namespace Lifti +{ + /// + /// Describes methods for accessing metadata information about an index. + /// + public interface IIndexMetadata + { + /// + /// Gets the number of documents in the index. + /// + [Obsolete("Use DocumentCount property instead")] + int Count { get; } + + /// + /// Gets the number of documents in the index. + /// + int DocumentCount { get; } + + /// + /// Gets the for the given internal document id. + /// + /// + /// Thrown when the id is not known. + /// + DocumentMetadata GetDocumentMetadata(int documentId); + + /// + [Obsolete("Use GetDocumentMetadata(int) instead")] + DocumentMetadata GetMetadata(int documentId); + + /// + /// Gets the calculated for the given object type. This can be used + /// to determine the score boost for an instance of . + /// + ScoreBoostMetadata GetObjectTypeScoreBoostMetadata(byte objectTypeId); + + /// + /// Gets the aggregated statistics for all the indexed documents, including total token count. + /// + IndexStatistics IndexStatistics { get; } + } + + /// + /// Describes methods for accessing metadata information about an index. + /// + /// + /// The type of the key in the index. + /// + public interface IIndexMetadata : IIndexMetadata + { + /// + /// Enumerates each in the index. + /// + IEnumerable> GetIndexedDocuments(); + + /// + /// Gets a value indicating whether the given key has been added to the index. + /// + bool Contains(TKey key); + +#pragma warning disable CS0108 // Member hides inherited member; missing new keyword + /// + /// Gets the for the given document id. + /// + /// + /// Thrown when the id is not known. + /// + DocumentMetadata GetDocumentMetadata(int documentId); + + /// + [Obsolete("Use GetDocumentMetadata(int) instead")] + DocumentMetadata GetMetadata(int documentId); +#pragma warning restore CS0108 // Member hides inherited member; missing new keyword + + /// + /// Gets the for the given key. + /// + /// + /// Thrown when the key is not known. + /// + DocumentMetadata GetMetadata(TKey key); + } +} \ No newline at end of file diff --git a/src/Lifti.Core/IIndexNodeFactory.cs b/src/Lifti.Core/IIndexNodeFactory.cs index ba15601b..b278fd06 100644 --- a/src/Lifti.Core/IIndexNodeFactory.cs +++ b/src/Lifti.Core/IIndexNodeFactory.cs @@ -1,5 +1,4 @@ using System; -using System.Collections.Immutable; namespace Lifti { @@ -23,15 +22,15 @@ public interface IIndexNodeFactory /// text has been completely processed. /// /// - /// The set of child nodes at this instance, keyed by matching character. + /// The at this instance. /// /// - /// The tokens that are matched at this instance, keyed by the internal item id. + /// The , providing access to the tokens that are matched against documents at this point in the index. /// IndexNode CreateNode( ReadOnlyMemory intraNodeText, - ImmutableDictionary childNodes, - ImmutableDictionary> matches); + ChildNodeMap childNodes, + DocumentTokenMatchMap matches); /// /// Gets the for the given into the index. diff --git a/src/Lifti.Core/IIndexSnapshot.cs b/src/Lifti.Core/IIndexSnapshot.cs index d9f1cb97..191460a3 100644 --- a/src/Lifti.Core/IIndexSnapshot.cs +++ b/src/Lifti.Core/IIndexSnapshot.cs @@ -1,4 +1,5 @@ using Lifti.Querying; +using System; namespace Lifti { @@ -23,10 +24,14 @@ public interface IIndexSnapshot /// IIndexNavigator CreateNavigator(); + /// + [Obsolete("Use Metadata property instead")] + IIndexMetadata Items { get; } + /// - /// Gets the in the state it was in when the snapshot was taken. + /// Gets the in the state it was in when the snapshot was taken. /// - IItemStore Items { get; } + IIndexMetadata Metadata { get; } } /// @@ -34,11 +39,16 @@ public interface IIndexSnapshot /// public interface IIndexSnapshot : IIndexSnapshot { + +#pragma warning disable CS0108 // Member hides inherited member; missing new keyword + /// + [Obsolete("Use Metadata property instead")] + IIndexMetadata Items { get; } + /// - /// Gets the in the state it was in when the snapshot was taken. + /// Gets the in the state it was in when the snapshot was taken. /// -#pragma warning disable CS0108 // Member hides inherited member; missing new keyword - IItemStore Items { get; } + IIndexMetadata Metadata { get; } #pragma warning restore CS0108 // Member hides inherited member; missing new keyword } } \ No newline at end of file diff --git a/src/Lifti.Core/IItemMetadata.cs b/src/Lifti.Core/IItemMetadata.cs deleted file mode 100644 index 457d3cb9..00000000 --- a/src/Lifti.Core/IItemMetadata.cs +++ /dev/null @@ -1,30 +0,0 @@ -namespace Lifti -{ - /// - /// Describes metadata for an indexed item. - /// - /// The type of the key in the index. - public interface IItemMetadata : IItemMetadata - { - /// - /// Gets the indexed item. - /// - public TKey Item { get; } - } - - /// - /// Describes metadata for an indexed item. - /// - public interface IItemMetadata - { - /// - /// Gets the reference ID of the indexed item used internally in the index. - /// - public int Id { get; } - - /// - /// Gets the statistics for the indexed document, including token count. - /// - public DocumentStatistics DocumentStatistics { get; } - } -} \ No newline at end of file diff --git a/src/Lifti.Core/IItemStore.cs b/src/Lifti.Core/IItemStore.cs deleted file mode 100644 index 17c2b633..00000000 --- a/src/Lifti.Core/IItemStore.cs +++ /dev/null @@ -1,70 +0,0 @@ -using System.Collections.Generic; - -namespace Lifti -{ - /// - /// Describes methods for accessing information about items stored in an index. - /// - public interface IItemStore - { - /// - /// Gets the number of items managed by this instance. - /// - int Count { get; } - - /// - /// Gets the item metadata for the given id. - /// - /// - /// Thrown when the id is not known. - /// - IItemMetadata GetMetadata(int id); - - /// - /// Gets the aggregated statistics for all the indexed documents, including total token count. - /// - IndexStatistics IndexStatistics { get; } - } - - /// - /// Describes methods for accessing information about items stored in an index. - /// - /// - /// The type of the key in the index. - /// - public interface IItemStore : IItemStore - { - /// - /// Gets each of the items and their associated ids managed by this instance. - /// - IEnumerable> GetIndexedItems(); - - /// - /// Gets a value indicating whether the given item is managed by this instance. - /// - bool Contains(TKey item); - - /// - /// Gets the item metadata for the given id. - /// - /// - /// Thrown when the id is not known. - /// -#pragma warning disable CS0108 // Member hides inherited member; missing new keyword - IItemMetadata GetMetadata(int id); -#pragma warning restore CS0108 // Member hides inherited member; missing new keyword - - /// - /// Gets the item metadata for the given item. - /// - /// - /// Thrown when the item is not known. - /// - IItemMetadata GetMetadata(TKey item); - - /// - /// Creates a snapshot of this instance that can be used even if the index is subsequently mutated. - /// - IItemStore Snapshot(); - } -} \ No newline at end of file diff --git a/src/Lifti.Core/IObjectTokenization.cs b/src/Lifti.Core/IObjectTokenization.cs deleted file mode 100644 index 3ac24283..00000000 --- a/src/Lifti.Core/IObjectTokenization.cs +++ /dev/null @@ -1,16 +0,0 @@ -using System; - -namespace Lifti -{ - /// - /// Describes the configuration that should be used when indexing - /// a strongly typed item against an index. - /// - internal interface IObjectTokenization - { - /// - /// Gets the type of the item this instance represents. - /// - Type ItemType { get; } - } -} diff --git a/src/Lifti.Core/IObjectTypeConfiguration.cs b/src/Lifti.Core/IObjectTypeConfiguration.cs new file mode 100644 index 00000000..6ba9f193 --- /dev/null +++ b/src/Lifti.Core/IObjectTypeConfiguration.cs @@ -0,0 +1,26 @@ +using Lifti.Tokenization.Objects; +using System; + +namespace Lifti +{ + /// + /// Describes the configuration that should be used when indexing text from an object type. + /// + internal interface IObjectTypeConfiguration + { + /// + /// Gets the type of the object this configuration is for. + /// + Type ObjectType { get; } + + /// + /// Gets the unique id for the object type. + /// + byte Id { get; } + + /// + /// Gets the non-type specific score boost options. + /// + ObjectScoreBoostOptions ScoreBoostOptions { get; } + } +} diff --git a/src/Lifti.Core/ISearchResults.cs b/src/Lifti.Core/ISearchResults.cs index 1c740cc0..afa11fcf 100644 --- a/src/Lifti.Core/ISearchResults.cs +++ b/src/Lifti.Core/ISearchResults.cs @@ -11,86 +11,97 @@ namespace Lifti /// public interface ISearchResults : IEnumerable> { - /// + /// + /// Gets the number of results in the set. + /// + int Count { get; } + + /// /// - /// A function capable of retrieving all the original items that were indexed. - /// The loaded text will be assumed to be unchanged since the item was indexed and all items must be returned, - /// though the order is unimportant. An exception will be thrown if requested items are missing from the returned list. + /// A function capable of retrieving all the original objects that were indexed. + /// The loaded text will be assumed to be unchanged since the object was indexed and all items must be returned, + /// though the order is unimportant. An exception will be thrown if requested objects are missing from the returned list. /// /// /// The optional to use. /// - Task>> CreateMatchPhrasesAsync( - Func, IReadOnlyList> loadItems, + /// + /// Thrown if the function does not return all the requested objects. + /// + Task>> CreateMatchPhrasesAsync( + Func, IReadOnlyList> loadItems, CancellationToken cancellationToken = default); /// - /// Creates phrases extracted from the source item's text. Where words appear in sequence they will be combined as a single string. + /// Creates phrases extracted from the source object's text. Where words appear in sequence they will be combined as a single string. /// Each result has its phrases ordered by the those with most words, then by the most frequent phrases. /// /// - /// A function capable of retrieving the original item that was indexed. The loaded text will be assumed to be unchanged since the item was indexed. + /// A function capable of retrieving the original object that was indexed. The loaded text will be assumed to be unchanged since the object was indexed. /// /// /// The optional to use. /// - Task>> CreateMatchPhrasesAsync( - Func loadItem, + Task>> CreateMatchPhrasesAsync( + Func loadItem, CancellationToken cancellationToken = default); - /// + /// /// /// A function capable of retrieving all the original items that were indexed. - /// The loaded text will be assumed to be unchanged since the item was indexed and all items must be returned, - /// though the order is unimportant. An exception will be thrown if requested items are missing from the returned list. + /// The loaded text will be assumed to be unchanged since the object was indexed and all objects must be returned, + /// though the order is unimportant. An exception will be thrown if requested objects are missing from the returned list. /// /// /// The optional to use. /// - Task>> CreateMatchPhrasesAsync( - Func, CancellationToken, ValueTask>> loadItemsAsync, + /// + /// Thrown if the function does not return all the requested objects. + /// + Task>> CreateMatchPhrasesAsync( + Func, CancellationToken, ValueTask>> loadItemsAsync, CancellationToken cancellationToken = default); - /// - Task>> CreateMatchPhrasesAsync( - Func, ValueTask>> loadItemsAsync, + /// + Task>> CreateMatchPhrasesAsync( + Func, ValueTask>> loadItemsAsync, CancellationToken cancellationToken = default); - /// + /// /// - /// A function capable of asynchronously retrieving the original item that was indexed. The loaded text will be assumed to be unchanged since the item was indexed. + /// A function capable of asynchronously retrieving the original item that was indexed. The loaded text will be assumed to be unchanged since the object was indexed. /// /// /// The optional to use. /// - Task>> CreateMatchPhrasesAsync( - Func> loadItemAsync, + Task>> CreateMatchPhrasesAsync( + Func> loadItemAsync, CancellationToken cancellationToken = default); /// - Task>> CreateMatchPhrasesAsync( - Func> loadItemAsync, + Task>> CreateMatchPhrasesAsync( + Func> loadItemAsync, CancellationToken cancellationToken = default); - /// + /// /// A function capable of retrieving the original text that was indexed against the key. The loaded text will be assumed to be /// unchanged since it was indexed. - IEnumerable> CreateMatchPhrases( + IEnumerable> CreateMatchPhrases( Func loadText); - /// + /// /// A function capable of asynchronously retrieving the original text that was indexed against the key. The loaded text will be /// assumed to be unchanged since it was indexed. /// /// The optional to use. /// - Task>> CreateMatchPhrasesAsync( + Task>> CreateMatchPhrasesAsync( Func> loadTextAsync, CancellationToken cancellationToken = default); /// - Task>> CreateMatchPhrasesAsync( + Task>> CreateMatchPhrasesAsync( Func> loadTextAsync, CancellationToken cancellationToken = default); diff --git a/src/Lifti.Core/ITokenLocation.cs b/src/Lifti.Core/ITokenLocation.cs new file mode 100644 index 00000000..09e6f9db --- /dev/null +++ b/src/Lifti.Core/ITokenLocation.cs @@ -0,0 +1,23 @@ +using Lifti.Querying; +using System; +using System.Collections.Generic; + +namespace Lifti +{ + internal interface ITokenLocation : IComparable, IEquatable + { + /// + /// Gets the maximum index in the field that the token matched. + /// + int MaxTokenIndex { get; } + + /// + /// Gets the minimum index in the field that the token matched. + /// + int MinTokenIndex { get; } + + void AddTo(HashSet collector); + + CompositeTokenLocation ComposeWith(ITokenLocation other); + } +} diff --git a/src/Lifti.Core/IdPool.cs b/src/Lifti.Core/IdPool.cs index f086a607..62c61a40 100644 --- a/src/Lifti.Core/IdPool.cs +++ b/src/Lifti.Core/IdPool.cs @@ -4,64 +4,38 @@ namespace Lifti { /// - /// Extends by adding additional methods for controlling - /// the addition and removal of items, caching and reusing the item ids. - /// - public class IdPool : ItemStore, IIdPool - where T : notnull + /// Provides methods for generating unique internal ids for documents. + /// + /// The type of key in the index. + internal class IdPool + where TKey : notnull { - private readonly Queue reusableIds = new Queue(); - private int nextId; - - /// - public int Add(T item, DocumentStatistics documentStatistics) - { - if (this.ItemLookup.ContainsKey(item)) - { - throw new LiftiException(ExceptionMessages.ItemAlreadyIndexed); - } - - var id = this.reusableIds.Count == 0 ? this.nextId++ : this.reusableIds.Dequeue(); - this.Add(id, item, new ItemMetadata(id, item, documentStatistics)); - return id; - } - - /// - public int ReleaseItem(T item) - { - var itemMetadata = this.ItemLookup[item]; - var id = itemMetadata.Id; - - this.ItemLookup = this.ItemLookup.Remove(item); - this.ItemIdLookup = this.ItemIdLookup.Remove(id); - this.IndexStatistics = this.IndexStatistics.Remove(itemMetadata.DocumentStatistics); - - this.reusableIds.Enqueue(id); - return id; - } - - /// - public void Add(int id, T item, DocumentStatistics documentStatistics) - { - if (this.ItemLookup.ContainsKey(item)) - { - throw new LiftiException(ExceptionMessages.ItemAlreadyIndexed); - } - - if (this.ItemIdLookup.ContainsKey(id)) - { - throw new LiftiException(ExceptionMessages.IdAlreadyUsed, id); - } - - this.Add(id, item, new ItemMetadata(id, item, documentStatistics)); - this.nextId = Math.Max(this.nextId, id + 1); - } - - private void Add(int id, T item, ItemMetadata itemMetadata) - { - this.ItemLookup = this.ItemLookup.Add(item, itemMetadata); - this.ItemIdLookup = this.ItemIdLookup.Add(id, itemMetadata); - this.IndexStatistics = this.IndexStatistics.Add(itemMetadata.DocumentStatistics); + private readonly Queue reusableIds = new(); + private int nextId; + + /// + /// Gets the next available id from the pool. + /// + public int Next() + { + return this.reusableIds.Count == 0 ? this.nextId++ : this.reusableIds.Dequeue(); + } + + /// + /// Returns the given id to the pool. + /// + public void Return(int id) + { + this.reusableIds.Enqueue(id); + } + + /// + /// Used during index deserialization to ensure that the next id generated is greater than any id used in + /// the index. + /// + internal void RegisterUsedId(int id) + { + this.nextId = Math.Max(this.nextId, id + 1); } } } diff --git a/src/Lifti.Core/IndexMetadata.cs b/src/Lifti.Core/IndexMetadata.cs new file mode 100644 index 00000000..02557845 --- /dev/null +++ b/src/Lifti.Core/IndexMetadata.cs @@ -0,0 +1,245 @@ +using Lifti.Tokenization.Objects; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti +{ + /// + public sealed class IndexMetadata : IIndexMetadata + where TKey : notnull + { + private readonly Dictionary scoreBoostMetadata; + private readonly IdPool idPool; + + internal IndexMetadata(IEnumerable configureObjectTypes) + { + this.idPool = new IdPool(); + this.DocumentKeyLookup = []; + this.DocumentIdLookup = []; + this.IndexStatistics = new(); + this.scoreBoostMetadata = configureObjectTypes.ToDictionary(o => o.Id, o => new ScoreBoostMetadata(o.ScoreBoostOptions)); + } + + /// + /// Creates a new instance that is a copy of the given instance and is safe to mutate. + /// + /// + internal IndexMetadata(IndexMetadata original) + { + this.idPool = original.idPool; + this.DocumentKeyLookup = new(original.DocumentKeyLookup); + this.DocumentIdLookup = new(original.DocumentIdLookup); + this.IndexStatistics = new(original.IndexStatistics); + this.scoreBoostMetadata = original.scoreBoostMetadata; + } + + /// + [Obsolete("Use DocumentCount property instead")] + public int Count => this.DocumentCount; + + /// + public int DocumentCount => this.DocumentKeyLookup.Count; + + /// + public IndexStatistics IndexStatistics { get; } + + /// + /// Gets or sets the lookup of document key to information. + /// + private Dictionary> DocumentKeyLookup { get; set; } + + /// + /// Gets or sets the lookup of internal document id to information. + /// + private Dictionary> DocumentIdLookup { get; set; } + + /// \ + public IEnumerable> GetIndexedDocuments() + { + return this.DocumentKeyLookup.Values; + } + + /// + /// Adds the given document key and associated . Used when indexing loose text not associated with an object. + /// + /// + /// The key to add. + /// + /// + /// The document statistics for the document. + /// + /// + /// The internal document id. + /// + public int Add(TKey key, DocumentStatistics documentStatistics) + { + return this.Add( + id => DocumentMetadata.ForLooseText(id, key, documentStatistics)); + } + + /// + public void Add(DocumentMetadata documentMetadata) + { + if (documentMetadata is null) + { + throw new ArgumentNullException(nameof(documentMetadata)); + } + + // Make the ID pool aware of the ID we are using + this.idPool.RegisterUsedId(documentMetadata.Id); + + if (documentMetadata.ObjectTypeId is byte objectTypeId) + { + // Add the document to the overall score boost metadata for the object type + this.GetObjectTypeScoreBoostMetadata(objectTypeId) + .Add(documentMetadata); + } + + this.UpdateLookups(documentMetadata); + } + + /// + [Obsolete("Use GetDocumentMetadata instead")] + public DocumentMetadata GetMetadata(int documentId) + { + return this.GetDocumentMetadata(documentId); + } + + /// + public DocumentMetadata GetDocumentMetadata(int documentId) + { + if (!this.DocumentIdLookup.TryGetValue(documentId, out var documentMetadata)) + { + throw new LiftiException(ExceptionMessages.ItemNotFound); + } + + return documentMetadata; + } + + /// + public DocumentMetadata GetMetadata(TKey key) + { + if (!this.DocumentKeyLookup.TryGetValue(key, out var documentMetadata)) + { + throw new LiftiException(ExceptionMessages.ItemNotFound); + } + + return documentMetadata; + } + + /// + public bool Contains(TKey key) + { + return this.DocumentKeyLookup.ContainsKey(key); + } + + /// + public ScoreBoostMetadata GetObjectTypeScoreBoostMetadata(byte objectTypeId) + { + if (!this.scoreBoostMetadata.TryGetValue(objectTypeId, out var scoreBoostMetadata)) + { + throw new LiftiException(ExceptionMessages.UnknownObjectTypeId, objectTypeId); + } + + return scoreBoostMetadata; + } + + /// + DocumentMetadata IIndexMetadata.GetDocumentMetadata(int documentId) + { + return this.GetDocumentMetadata(documentId); + } + + /// + [Obsolete("Use GetDocumentMetadata instead")] + DocumentMetadata IIndexMetadata.GetMetadata(int documentId) + { + return this.GetDocumentMetadata(documentId); + } + + /// + /// Removes information about a document from this instance. + /// + /// + /// The internal document id of the removed document. + /// + internal int Remove(TKey key) + { + var documentInfo = this.DocumentKeyLookup[key]; + var documentId = documentInfo.Id; + this.DocumentKeyLookup.Remove(key); + this.DocumentIdLookup.Remove(documentId); + this.IndexStatistics.Remove(documentInfo.DocumentStatistics); + + if (documentInfo.ObjectTypeId is byte objectTypeId) + { + // Remove the document from the overall score boost metadata for the object type + this.GetObjectTypeScoreBoostMetadata(objectTypeId) + .Remove(documentInfo); + } + + this.idPool.Return(documentId); + + return documentId; + } + + /// + /// Adds the given document key associated to the given object. + /// + /// + internal int Add(TKey key, TObject item, DocumentStatistics documentStatistics, ObjectTypeConfiguration objectConfiguration) + { + // Get the score boosts for the item + var scoreBoostOptions = objectConfiguration.ScoreBoostOptions; + var freshnessDate = scoreBoostOptions.FreshnessProvider?.Invoke(item); + var scoringMagnitude = scoreBoostOptions.MagnitudeProvider?.Invoke(item); + + return this.Add( + documentId => + { + var documentMetadata = DocumentMetadata.ForObject( + objectTypeId: objectConfiguration.Id, + documentId: documentId, + key, + documentStatistics, + freshnessDate, + scoringMagnitude); + + this.GetObjectTypeScoreBoostMetadata(objectConfiguration.Id) + .Add(documentMetadata); + + return documentMetadata; + }); + } + + private int Add(Func> createDocumentMetadata) + { + var documentId = this.idPool.Next(); + var documentMetadata = createDocumentMetadata(documentId); + + this.Add(documentMetadata); + + return documentId; + } + + private void UpdateLookups(DocumentMetadata documentMetadata) + { + var key = documentMetadata.Key; + var documentId = documentMetadata.Id; + if (this.DocumentKeyLookup.ContainsKey(key)) + { + throw new LiftiException(ExceptionMessages.ItemAlreadyIndexed); + } + + if (this.DocumentIdLookup.ContainsKey(documentId)) + { + throw new LiftiException(ExceptionMessages.IdAlreadyUsed, documentId); + } + + this.DocumentKeyLookup.Add(key, documentMetadata); + this.DocumentIdLookup.Add(documentId, documentMetadata); + this.IndexStatistics.Add(documentMetadata.DocumentStatistics); + } + } +} diff --git a/src/Lifti.Core/IndexMutation.cs b/src/Lifti.Core/IndexMutation.cs index 4c820ccf..7689a045 100644 --- a/src/Lifti.Core/IndexMutation.cs +++ b/src/Lifti.Core/IndexMutation.cs @@ -3,28 +3,38 @@ namespace Lifti { - internal class IndexMutation + internal class IndexMutation + where TKey : notnull { private readonly IndexNodeMutation root; - public IndexMutation(IndexNode root, IIndexNodeFactory indexNodeFactory) + public IndexMutation( + IndexNode root, + IndexMetadata originalMetadata, + IIndexNodeFactory indexNodeFactory) { this.root = new IndexNodeMutation(0, root, indexNodeFactory); + this.Metadata = new(originalMetadata); } - internal void Add(int itemId, byte fieldId, Token token) + /// + /// A mutating copy of the index metadata. + /// + public IndexMetadata Metadata { get; } + + internal void Add(int documentId, byte fieldId, Token token) { if (token is null) { throw new ArgumentNullException(nameof(token)); } - this.root.Index(itemId, fieldId, token.Locations, token.Value.AsMemory()); + this.root.Index(documentId, fieldId, token.Locations, token.Value.AsMemory()); } - internal void Remove(int itemId) + internal void Remove(int documentId) { - this.root.Remove(itemId); + this.root.Remove(documentId); } public IndexNode Apply() diff --git a/src/Lifti.Core/IndexNode.cs b/src/Lifti.Core/IndexNode.cs index 8a951c01..160b06b2 100644 --- a/src/Lifti.Core/IndexNode.cs +++ b/src/Lifti.Core/IndexNode.cs @@ -1,5 +1,4 @@ using System; -using System.Collections.Immutable; using System.Diagnostics.Contracts; using System.Text; @@ -12,8 +11,8 @@ public class IndexNode { internal IndexNode( ReadOnlyMemory intraNodeText, - ImmutableDictionary childNodes, - ImmutableDictionary> matches) + ChildNodeMap childNodes, + DocumentTokenMatchMap matches) { this.IntraNodeText = intraNodeText; this.ChildNodes = childNodes; @@ -26,18 +25,18 @@ public class IndexNode /// text has been completely processed. /// public ReadOnlyMemory IntraNodeText { get; } - + /// /// Gets any child nodes that can be navigated to from this instance, once the intra-node text has /// been processed. /// - public ImmutableDictionary ChildNodes { get; } + public ChildNodeMap ChildNodes { get; } /// /// Gets the set of matches that are found at this location in the index (once all the /// has been processed.) /// - public ImmutableDictionary> Matches { get; } + public DocumentTokenMatchMap Matches { get; } /// /// Gets a value indicating whether this node is empty. A node is considered empty if it doesn't have @@ -51,7 +50,7 @@ public class IndexNode public bool HasChildNodes => this.ChildNodes.Count > 0; /// - /// Gets a value indicating whether this instance has any items matched at it. + /// Gets a value indicating whether this instance has any documents matched at it. /// public bool HasMatches => this.Matches.Count > 0; @@ -71,6 +70,9 @@ public override string ToString() return builder.ToString(); } + /// + /// Formats a single child node linked from this instance to the given . + /// internal void ToString(StringBuilder builder, char linkChar, int currentDepth) { builder.Append(' ', currentDepth * 2) @@ -82,15 +84,30 @@ internal void ToString(StringBuilder builder, char linkChar, int currentDepth) this.FormatChildNodeText(builder, currentDepth); } + /// + /// Formats all the child nodes of this instance to the given . + /// + /// + /// + internal void ToString(StringBuilder builder, int nextDepth) + { + foreach (var (character, childNode) in this.ChildNodes.CharacterMap) + { + builder.AppendLine(); + childNode.ToString(builder, character, nextDepth); + } + } + private void FormatChildNodeText(StringBuilder builder, int currentDepth) { if (this.HasChildNodes) { var nextDepth = currentDepth + 1; - foreach (var item in this.ChildNodes) + + foreach (var (character, childNode) in this.ChildNodes.CharacterMap) { builder.AppendLine(); - item.Value.ToString(builder, item.Key, nextDepth); + childNode.ToString(builder, character, nextDepth); } } } diff --git a/src/Lifti.Core/IndexNodeFactory.cs b/src/Lifti.Core/IndexNodeFactory.cs index 12d41d33..cad55b4d 100644 --- a/src/Lifti.Core/IndexNodeFactory.cs +++ b/src/Lifti.Core/IndexNodeFactory.cs @@ -1,5 +1,4 @@ using System; -using System.Collections.Immutable; namespace Lifti { @@ -31,8 +30,8 @@ public IndexNode CreateRootNode() { return new IndexNode( null, - ImmutableDictionary.Empty, - ImmutableDictionary>.Empty); + ChildNodeMap.Empty, + DocumentTokenMatchMap.Empty); } /// @@ -46,8 +45,8 @@ public IndexSupportLevelKind GetIndexSupportLevelForDepth(int depth) /// public IndexNode CreateNode( ReadOnlyMemory intraNodeText, - ImmutableDictionary childNodes, - ImmutableDictionary> matches) + ChildNodeMap childNodes, + DocumentTokenMatchMap matches) { return new IndexNode(intraNodeText, childNodes, matches); } diff --git a/src/Lifti.Core/IndexNodeMutation.cs b/src/Lifti.Core/IndexNodeMutation.cs index 18135c05..c8de9260 100644 --- a/src/Lifti.Core/IndexNodeMutation.cs +++ b/src/Lifti.Core/IndexNodeMutation.cs @@ -1,9 +1,7 @@ using System; using System.Collections.Generic; -using System.Collections.Immutable; using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; -using System.Linq; using System.Text; namespace Lifti @@ -45,30 +43,12 @@ public IndexNodeMutation(int depth, IndexNode node, IIndexNodeFactory indexNodeF public bool HasChildNodes { get; private set; } public bool HasMatches { get; private set; } public ReadOnlyMemory IntraNodeText { get; private set; } - public Dictionary? MutatedChildNodes { get; private set; } + public ChildNodeMapMutation? ChildNodeMapMutation { get; private set; } - public IEnumerable> UnmutatedChildNodes - { - get - { - if (this.original == null) - { - return Array.Empty>(); - } - - if (this.MutatedChildNodes == null) - { - return this.original.ChildNodes; - } - - return this.original.ChildNodes.Where(n => !this.MutatedChildNodes.ContainsKey(n.Key)); - } - } - - public Dictionary>? MutatedMatches { get; private set; } + public DocumentTokenMatchMapMutation? DocumentTokenMatchMapMutation { get; private set; } internal void Index( - int itemId, + int documentId, byte fieldId, IReadOnlyList locations, ReadOnlyMemory remainingTokenText) @@ -77,10 +57,10 @@ public IndexNodeMutation(int depth, IndexNode node, IIndexNodeFactory indexNodeF switch (indexSupportLevel) { case IndexSupportLevelKind.CharacterByCharacter: - this.IndexFromCharacter(itemId, fieldId, locations, remainingTokenText); + this.IndexFromCharacter(documentId, fieldId, locations, remainingTokenText); break; case IndexSupportLevelKind.IntraNodeText: - this.IndexWithIntraNodeTextSupport(itemId, fieldId, locations, remainingTokenText); + this.IndexWithIntraNodeTextSupport(documentId, fieldId, locations, remainingTokenText); break; default: throw new LiftiException(ExceptionMessages.UnsupportedIndexSupportLevel, indexSupportLevel); @@ -89,78 +69,77 @@ public IndexNodeMutation(int depth, IndexNode node, IIndexNodeFactory indexNodeF internal IndexNode Apply() { - ImmutableDictionary childNodes; - ImmutableDictionary> matches; - - IEnumerable> mapNodeMutations() - { - return this.MutatedChildNodes.Select(p => new KeyValuePair(p.Key, p.Value.Apply())); - } + ChildNodeMap childNodes; + DocumentTokenMatchMap matches; if (this.original == null) { - childNodes = this.MutatedChildNodes == null ? ImmutableDictionary.Empty : mapNodeMutations().ToImmutableDictionary(); - matches = this.MutatedMatches == null ? ImmutableDictionary>.Empty : this.MutatedMatches.ToImmutableDictionary(); + childNodes = this.ChildNodeMapMutation?.Apply() ?? ChildNodeMap.Empty; + matches = this.DocumentTokenMatchMapMutation?.Apply() ?? DocumentTokenMatchMap.Empty; } else { - childNodes = this.original.ChildNodes; - if (this.MutatedChildNodes?.Count > 0) - { - childNodes = childNodes.SetItems(mapNodeMutations()); - } - - matches = this.MutatedMatches == null - ? this.original.Matches - : this.MutatedMatches.ToImmutableDictionary(); + childNodes = this.ChildNodeMapMutation?.Apply() ?? this.original.ChildNodes; + matches = this.DocumentTokenMatchMapMutation?.Apply() ?? this.original.Matches; } return this.indexNodeFactory.CreateNode(this.IntraNodeText, childNodes, matches); } - internal void Remove(int itemId) + internal void Remove(int documentId) { if (this.HasChildNodes) { - // First look through any already mutated child nodes - if (this.MutatedChildNodes != null) + if (this.ChildNodeMapMutation != null) { - foreach (var child in this.MutatedChildNodes) + // First look through any already mutated child nodes + foreach (var (_, mutatedChild) in this.ChildNodeMapMutation.GetMutated()) { - child.Value.Remove(itemId); + mutatedChild.Remove(documentId); } - } - // Then any unmutated children - foreach (var child in this.UnmutatedChildNodes) + // Then any unmutated children + foreach (var (childChar, childNode) in this.ChildNodeMapMutation.GetUnmutated()) + { + if (this.TryRemove(childNode, documentId, this.depth + 1, out var mutatedChild)) + { + this.ChildNodeMapMutation.Mutate(childChar, mutatedChild); + } + } + } + else if (this.original != null) { - if (this.TryRemove(child.Value, itemId, this.depth + 1, out var mutatedChild)) + // Then any unmutated children + foreach (var (childChar, childNode) in this.original.ChildNodes.CharacterMap) { - this.EnsureMutatedChildNodesCreated(); - this.MutatedChildNodes!.Add(child.Key, mutatedChild); + if (this.TryRemove(childNode, documentId, this.depth + 1, out var mutatedChild)) + { + var childNodeMapMutation = this.EnsureMutatedChildNodesCreated(); + childNodeMapMutation.Mutate(childChar, mutatedChild); + } } } } if (this.HasMatches) { - if (this.MutatedMatches != null) + if (this.DocumentTokenMatchMapMutation != null) { - this.MutatedMatches.Remove(itemId); + this.DocumentTokenMatchMapMutation.Remove(documentId); } else { - if (this.original != null && this.original.Matches.ContainsKey(itemId)) + if (this.original != null && this.original.Matches.HasDocument(documentId)) { // Mutate and remove - this.EnsureMutatedMatchesCreated(); - this.MutatedMatches!.Remove(itemId); + var matchMutation = this.EnsureMutatedMatchesCreated(); + matchMutation.Remove(documentId); } } } } - private bool TryRemove(IndexNode node, int itemId, int nodeDepth, [NotNullWhen(true)] out IndexNodeMutation? mutatedNode) + private bool TryRemove(IndexNode node, int documentId, int nodeDepth, [NotNullWhen(true)] out IndexNodeMutation? mutatedNode) { mutatedNode = null; @@ -168,9 +147,9 @@ private bool TryRemove(IndexNode node, int itemId, int nodeDepth, [NotNullWhen(t { // Work through the child nodes and recursively determine whether removals are needed from // them. If they are, then this instance will also become mutated. - foreach (var child in node.ChildNodes) + foreach (var (character, childNode) in node.ChildNodes.CharacterMap) { - if (this.TryRemove(child.Value, itemId, nodeDepth + 1, out var mutatedChild)) + if (this.TryRemove(childNode, documentId, nodeDepth + 1, out var mutatedChild)) { if (mutatedNode == null) { @@ -178,22 +157,19 @@ private bool TryRemove(IndexNode node, int itemId, int nodeDepth, [NotNullWhen(t mutatedNode.EnsureMutatedChildNodesCreated(); } - mutatedNode.MutatedChildNodes!.Add(child.Key, mutatedChild); + mutatedNode.ChildNodeMapMutation!.Mutate(character, mutatedChild); } } } if (node.HasMatches) { - // Removing an item from the nodes current matches will return the same dictionary - // if the item didn't exist - this removes the need for an extra Exists check - var mutatedMatches = node.Matches.Remove(itemId); - if (mutatedMatches != node.Matches) + if (node.Matches.HasDocument(documentId)) { mutatedNode ??= new IndexNodeMutation(nodeDepth, node, this.indexNodeFactory); - mutatedNode.EnsureMutatedMatchesCreated(); - mutatedNode.MutatedMatches!.Remove(itemId); + var matchMutation = mutatedNode.EnsureMutatedMatchesCreated(); + matchMutation.Remove(documentId); } } @@ -201,7 +177,7 @@ private bool TryRemove(IndexNode node, int itemId, int nodeDepth, [NotNullWhen(t } private void IndexFromCharacter( - int itemId, + int documentId, byte fieldId, IReadOnlyList locations, ReadOnlyMemory remainingTokenText, @@ -209,17 +185,17 @@ private bool TryRemove(IndexNode node, int itemId, int nodeDepth, [NotNullWhen(t { if (remainingTokenText.Length > testLength) { - this.ContinueIndexingAtChild(itemId, fieldId, locations, remainingTokenText, testLength); + this.ContinueIndexingAtChild(documentId, fieldId, locations, remainingTokenText, testLength); } else { // Remaining text == intraNodeText - this.AddMatchedItem(itemId, fieldId, locations); + this.AddMatchedDocument(documentId, fieldId, locations); } } private void ContinueIndexingAtChild( - int itemId, + int documentId, byte fieldId, IReadOnlyList locations, ReadOnlyMemory remainingTokenText, @@ -227,38 +203,42 @@ private bool TryRemove(IndexNode node, int itemId, int nodeDepth, [NotNullWhen(t { var indexChar = remainingTokenText.Span[remainingTextSplitPosition]; - this.EnsureMutatedChildNodesCreated(); - if (!this.MutatedChildNodes!.TryGetValue(indexChar, out var childNode)) - { - if (this.original != null && this.original.ChildNodes.TryGetValue(indexChar, out var originalChildNode)) - { + var childNodeMutation = this.EnsureMutatedChildNodesCreated(); + var childNode = childNodeMutation.GetOrCreateMutation( + indexChar, + () => this.original?.ChildNodes.TryGetValue(indexChar, out var originalChildNode) == true // the original had an unmutated child node that matched the index character - mutate it now - childNode = new IndexNodeMutation(this.depth + 1, originalChildNode, this.indexNodeFactory); - } - else - { + ? new IndexNodeMutation(this.depth + 1, originalChildNode, this.indexNodeFactory) // This is a novel branch in the index - childNode = new IndexNodeMutation(this); - } + : new IndexNodeMutation(this)); - // Track the mutated node - this.MutatedChildNodes.Add(indexChar, childNode); + childNode.Index(documentId, fieldId, locations, remainingTokenText.Slice(remainingTextSplitPosition + 1)); + } + + private ChildNodeMapMutation EnsureMutatedChildNodesCreated() + { + if (this.ChildNodeMapMutation == null) + { + this.HasChildNodes = true; + this.ChildNodeMapMutation = new ChildNodeMapMutation(this.original?.ChildNodes ?? ChildNodeMap.Empty); } - childNode.Index(itemId, fieldId, locations, remainingTokenText.Slice(remainingTextSplitPosition + 1)); + return this.ChildNodeMapMutation; } - private void EnsureMutatedChildNodesCreated() + private DocumentTokenMatchMapMutation EnsureMutatedMatchesCreated() { - if (this.MutatedChildNodes == null) + if (this.DocumentTokenMatchMapMutation == null) { - this.HasChildNodes = true; - this.MutatedChildNodes = new Dictionary(); + this.HasMatches = true; + this.DocumentTokenMatchMapMutation = new DocumentTokenMatchMapMutation(this.original?.Matches ?? DocumentTokenMatchMap.Empty); } + + return this.DocumentTokenMatchMapMutation; } private void IndexWithIntraNodeTextSupport( - int itemId, + int documentId, byte fieldId, IReadOnlyList locations, ReadOnlyMemory remainingTokenText) @@ -269,11 +249,11 @@ private void EnsureMutatedChildNodesCreated() { // Currently a leaf node this.IntraNodeText = remainingTokenText.Length == 0 ? null : remainingTokenText; - this.AddMatchedItem(itemId, fieldId, locations); + this.AddMatchedDocument(documentId, fieldId, locations); } else { - this.IndexFromCharacter(itemId, fieldId, locations, remainingTokenText); + this.IndexFromCharacter(documentId, fieldId, locations, remainingTokenText); } } else @@ -282,10 +262,12 @@ private void EnsureMutatedChildNodesCreated() { // The indexing ends before the start of the intranode text so we need to split this.SplitIntraNodeText(0); - this.AddMatchedItem(itemId, fieldId, locations); + this.AddMatchedDocument(documentId, fieldId, locations); return; } + // Test the current intra-node text against the remaining token text to see if + // we can index here or need to split var testLength = Math.Min(remainingTokenText.Length, this.IntraNodeText.Length); var intraNodeSpan = this.IntraNodeText.Span; var tokenSpan = remainingTokenText.Span; @@ -294,7 +276,7 @@ private void EnsureMutatedChildNodesCreated() if (tokenSpan[i] != intraNodeSpan[i]) { this.SplitIntraNodeText(i); - this.ContinueIndexingAtChild(itemId, fieldId, locations, remainingTokenText, i); + this.ContinueIndexingAtChild(documentId, fieldId, locations, remainingTokenText, i); return; } } @@ -305,52 +287,15 @@ private void EnsureMutatedChildNodesCreated() this.SplitIntraNodeText(testLength); } - this.IndexFromCharacter(itemId, fieldId, locations, remainingTokenText, testLength); + this.IndexFromCharacter(documentId, fieldId, locations, remainingTokenText, testLength); } } - private void AddMatchedItem(int itemId, byte fieldId, IReadOnlyList locations) + private void AddMatchedDocument(int documentId, byte fieldId, IReadOnlyList locations) { - this.EnsureMutatedMatchesCreated(); - var indexedToken = new IndexedToken(fieldId, locations); - if (this.MutatedMatches!.TryGetValue(itemId, out var itemFieldLocations)) - { - this.MutatedMatches[itemId] = itemFieldLocations.Add(new IndexedToken(fieldId, locations)); - } - else - { - if (this.MutatedMatches.TryGetValue(itemId, out var originalItemFieldLocations)) - { - this.MutatedMatches[itemId] = originalItemFieldLocations.Add(indexedToken); - } - else - { - // This item has not been indexed at this location previously - var builder = ImmutableList.CreateBuilder(); - builder.Add(indexedToken); - this.MutatedMatches.Add(itemId, builder.ToImmutable()); - } - } - } - - private void EnsureMutatedMatchesCreated() - { - if (this.MutatedMatches == null) - { - this.HasMatches = true; - - if (this.original?.HasMatches ?? false) - { - // Once we're mutating matches, copy everything across - this.MutatedMatches = new Dictionary>( - this.original.Matches); - } - else - { - this.MutatedMatches = new Dictionary>(); - } - } + var documentTokenMatchMutations = this.EnsureMutatedMatchesCreated(); + documentTokenMatchMutations.Add(documentId, indexedToken); } private void SplitIntraNodeText(int splitIndex) @@ -359,8 +304,8 @@ private void SplitIntraNodeText(int splitIndex) { HasMatches = this.HasMatches, HasChildNodes = this.HasChildNodes, - MutatedChildNodes = this.MutatedChildNodes, - MutatedMatches = this.MutatedMatches, + ChildNodeMapMutation = this.ChildNodeMapMutation, + DocumentTokenMatchMapMutation = this.DocumentTokenMatchMapMutation, IntraNodeText = splitIndex + 1 == this.IntraNodeText.Length ? null : this.IntraNodeText.Slice(splitIndex + 1), // Pass the original down to the child node - the only state that matters there is any unmutated child nodes/matches @@ -372,17 +317,14 @@ private void SplitIntraNodeText(int splitIndex) var splitChar = this.IntraNodeText.Span[splitIndex]; // Reset the matches at this node - this.MutatedMatches = null; + this.DocumentTokenMatchMapMutation = null; this.HasMatches = false; // Replace any remaining intra node text this.IntraNodeText = splitIndex == 0 ? null : this.IntraNodeText.Slice(0, splitIndex); this.HasChildNodes = true; - this.MutatedChildNodes = new Dictionary - { - { splitChar, splitChildNode } - }; + this.ChildNodeMapMutation = new(splitChar, splitChildNode); } [Pure] @@ -400,7 +342,7 @@ public override string ToString() return builder.ToString(); } - private void ToString(StringBuilder builder, char linkChar, int currentDepth) + internal void ToString(StringBuilder builder, char linkChar, int currentDepth) { builder.Append(' ', currentDepth * 2) .Append(linkChar) @@ -417,23 +359,15 @@ private void FormatChildNodeText(StringBuilder builder, int currentDepth) if (this.HasChildNodes) { var nextDepth = currentDepth + 1; - if (this.original != null) - { - foreach (var item in this.original.ChildNodes.Where(e => this.MutatedChildNodes == null || !this.MutatedChildNodes.ContainsKey(e.Key))) - { - builder.AppendLine(); - item.Value.ToString(builder, item.Key, nextDepth); - } + + if (this.ChildNodeMapMutation is { } childNodeMutations) + { + childNodeMutations.ToString(builder, currentDepth); } - - if (this.MutatedChildNodes != null) + else { - foreach (var item in this.MutatedChildNodes) - { - builder.AppendLine(); - item.Value.ToString(builder, item.Key, nextDepth); - } - } + this.original?.ToString(builder, nextDepth); + } } } @@ -446,7 +380,11 @@ private void FormatNodeText(StringBuilder builder) if (this.HasMatches) { - builder.Append($" [{this.original?.Matches.Count ?? 0} original matche(s) - {this.MutatedMatches?.Count ?? 0} mutated]"); + builder.Append( +#if !NETSTANDARD + System.Globalization.CultureInfo.InvariantCulture, +#endif + $" [{this.original?.Matches.Count ?? 0} original match(es) - {this.DocumentTokenMatchMapMutation?.MutationCount ?? 0} mutated]"); } } } diff --git a/src/Lifti.Core/IndexOptions.cs b/src/Lifti.Core/IndexOptions.cs index 416cb198..83118a3d 100644 --- a/src/Lifti.Core/IndexOptions.cs +++ b/src/Lifti.Core/IndexOptions.cs @@ -1,4 +1,6 @@ -namespace Lifti +using System; + +namespace Lifti { /// /// Options that are passed to the index at construction time. @@ -12,15 +14,19 @@ internal IndexOptions() /// /// Gets the depth of the index tree after which intra-node text is supported. /// A value of zero indicates that intra-node text is always supported. To disable - /// intra-node text completely, set this to an arbitrarily large value, e.g. . + /// intra-node text completely, set this to an arbitrarily large value, e.g. . /// The default value is 4. /// public int SupportIntraNodeTextAfterIndexDepth { get; internal set; } = 4; + /// + [Obsolete("Use DuplicateKeyBehavior property instead")] + public DuplicateKeyBehavior DuplicateItemBehavior => this.DuplicateKeyBehavior; + /// - /// Gets the behavior the index should exhibit when an item that already exists in the index is indexed again. - /// The default value is . + /// Gets the behavior the index should exhibit when key that already exists in the index is added again. + /// The default value is . /// - public DuplicateItemBehavior DuplicateItemBehavior { get; internal set; } = DuplicateItemBehavior.ReplaceItem; + public DuplicateKeyBehavior DuplicateKeyBehavior { get; internal set; } = DuplicateKeyBehavior.Replace; } } diff --git a/src/Lifti.Core/IndexSnapshot.cs b/src/Lifti.Core/IndexSnapshot.cs index b5ec45f8..827f19ec 100644 --- a/src/Lifti.Core/IndexSnapshot.cs +++ b/src/Lifti.Core/IndexSnapshot.cs @@ -1,4 +1,5 @@ using Lifti.Querying; +using System; namespace Lifti { @@ -8,18 +9,24 @@ public class IndexSnapshot : IIndexSnapshot, IIndexSnapshot { private readonly IIndexNavigatorPool indexNavigatorPool; - internal IndexSnapshot(IIndexNavigatorPool indexNavigatorPool, FullTextIndex index) + internal IndexSnapshot( + IIndexNavigatorPool indexNavigatorPool, + IIndexedFieldLookup fieldLookup, + IndexNode rootNode, + IIndexMetadata indexMetadata) { - this.Items = index.Items.Snapshot(); - this.Root = index.Root; + this.Metadata = indexMetadata; + this.Root = rootNode; this.indexNavigatorPool = indexNavigatorPool; - - // Field lookup is read-only once the index is constructed - this.FieldLookup = index.FieldLookup; + this.FieldLookup = fieldLookup; } + + /// + [Obsolete("Use Metadata property instead")] + public IIndexMetadata Items => this.Metadata; /// - public IItemStore Items { get; } + public IIndexMetadata Metadata { get; } /// public IndexNode Root { get; } @@ -27,7 +34,9 @@ internal IndexSnapshot(IIndexNavigatorPool indexNavigatorPool, FullTextIndex public IIndexedFieldLookup FieldLookup { get; } - IItemStore IIndexSnapshot.Items => this.Items; + IIndexMetadata IIndexSnapshot.Metadata => this.Metadata; + + IIndexMetadata IIndexSnapshot.Items => this.Metadata; /// public IIndexNavigator CreateNavigator() diff --git a/src/Lifti.Core/IndexStatistics.cs b/src/Lifti.Core/IndexStatistics.cs index fec4c069..5719b519 100644 --- a/src/Lifti.Core/IndexStatistics.cs +++ b/src/Lifti.Core/IndexStatistics.cs @@ -1,5 +1,4 @@ -using System; -using System.Collections.Immutable; +using System.Collections.Generic; namespace Lifti { @@ -8,56 +7,70 @@ namespace Lifti /// public class IndexStatistics { - private IndexStatistics() + private readonly Dictionary tokenCountByField; + + internal IndexStatistics() { - this.TokenCountByField = ImmutableDictionary.Empty; + this.tokenCountByField = []; } - internal IndexStatistics(ImmutableDictionary tokenCountByField, long totalTokenCount) + /// + /// Creates a copy of the specified instance and safe to mutate. + /// + internal IndexStatistics(IndexStatistics original) { - this.TokenCountByField = tokenCountByField; - this.TotalTokenCount = totalTokenCount; + this.tokenCountByField = new(original.tokenCountByField); + this.TotalTokenCount = original.TotalTokenCount; } - internal static IndexStatistics Empty { get; } = new IndexStatistics(); + internal IndexStatistics(Dictionary tokenCountByField, long totalTokenCount) + { + this.tokenCountByField = tokenCountByField; + this.TotalTokenCount = totalTokenCount; + } /// - /// Gets a dictionary containing the token count for each field indexed in the index. + /// Gets the token count for the specified field. /// - public ImmutableDictionary TokenCountByField { get; } + public long GetFieldTokenCount(byte fieldId) + { + if (!this.tokenCountByField.TryGetValue(fieldId, out var tokenCount)) + { + throw new LiftiException(ExceptionMessages.UnknownField, fieldId); + } + + return tokenCount; + } /// /// Gets the total token count for all documents in the index. /// - public long TotalTokenCount { get; } + public long TotalTokenCount { get; private set; } - internal IndexStatistics Remove(DocumentStatistics documentStatistics) + /// + /// Gets the total number of tokens stored each field in the index. + /// + public IReadOnlyDictionary TokenCountByField => this.tokenCountByField; + + internal void Remove(DocumentStatistics documentStatistics) { - return Adjust(documentStatistics, -1); + this.Adjust(documentStatistics, -1); } - internal IndexStatistics Add(DocumentStatistics documentStatistics) + internal void Add(DocumentStatistics documentStatistics) { - return Adjust(documentStatistics, 1); + this.Adjust(documentStatistics, 1); } - private IndexStatistics Adjust(DocumentStatistics documentStatistics, int direction) + private void Adjust(DocumentStatistics documentStatistics, int direction) { - if (documentStatistics is null) - { - throw new ArgumentNullException(nameof(documentStatistics)); - } - - var updatedFieldTokenCount = this.TokenCountByField; foreach (var fieldTokenCount in documentStatistics.TokenCountByField) { - updatedFieldTokenCount.TryGetValue(fieldTokenCount.Key, out var previousCount); - updatedFieldTokenCount = updatedFieldTokenCount.SetItem(fieldTokenCount.Key, previousCount + (fieldTokenCount.Value * direction)); + this.tokenCountByField.TryGetValue(fieldTokenCount.Key, out var previousCount); + this.tokenCountByField[fieldTokenCount.Key] = previousCount + (fieldTokenCount.Value * direction); } - return new IndexStatistics( - updatedFieldTokenCount, - this.TotalTokenCount + (documentStatistics.TotalTokenCount * direction)); + this.TotalTokenCount += documentStatistics.TotalTokenCount * direction; } } } diff --git a/src/Lifti.Core/IndexedFieldDetails.cs b/src/Lifti.Core/IndexedFieldDetails.cs index 12a9f6ed..6f5ba2d2 100644 --- a/src/Lifti.Core/IndexedFieldDetails.cs +++ b/src/Lifti.Core/IndexedFieldDetails.cs @@ -21,7 +21,8 @@ public abstract class IndexedFieldDetails ITextExtractor textExtractor, IIndexTokenizer tokenizer, IThesaurus thesaurus, - string? dynamicFieldReaderName) + string? dynamicFieldReaderName, + double scoreBoost) { this.Id = id; this.Name = name; @@ -30,7 +31,8 @@ public abstract class IndexedFieldDetails this.TextExtractor = textExtractor; this.Tokenizer = tokenizer; this.Thesaurus = thesaurus; - this.DynamicFieldReaderName = dynamicFieldReaderName; + this.DynamicFieldReaderName = dynamicFieldReaderName; + this.ScoreBoost = scoreBoost; } /// @@ -71,11 +73,19 @@ public abstract class IndexedFieldDetails /// /// Gets the name of the dynamic field reader that generated this field. If this field is not a dynamic field, this will be null. /// - public string? DynamicFieldReaderName { get; } - + public string? DynamicFieldReaderName { get; } + + /// + /// Gets the score boost to apply to this field. + /// + public double ScoreBoost { get; } + /// - /// Reads the text for the field from the specified item. The item must be of the type specified by the property. - /// + /// Reads the text for the field from the specified object. The object must be of the type specified by the property. + /// + /// + /// Thrown when the is not of the expected type. + /// public abstract ValueTask> ReadAsync(object item, CancellationToken cancellationToken); internal void Deconstruct(out byte fieldId, out ITextExtractor textExtractor, out IIndexTokenizer tokenizer, out IThesaurus thesaurus) @@ -88,32 +98,34 @@ internal void Deconstruct(out byte fieldId, out ITextExtractor textExtractor, ou } /// - public class IndexedFieldDetails : IndexedFieldDetails + public class IndexedFieldDetails : IndexedFieldDetails { - private readonly Func>> fieldReader; + private readonly Func>> fieldReader; private IndexedFieldDetails( byte id, string name, - Func>> fieldReader, + Func>> fieldReader, FieldKind fieldKind, ITextExtractor textExtractor, IIndexTokenizer tokenizer, IThesaurus thesaurus, - string? dynamicFieldReaderName) - : base(id, name, typeof(TItem), fieldKind, textExtractor, tokenizer, thesaurus, dynamicFieldReaderName) + string? dynamicFieldReaderName, + double scoreBoost) + : base(id, name, typeof(TObject), fieldKind, textExtractor, tokenizer, thesaurus, dynamicFieldReaderName, scoreBoost) { this.fieldReader = fieldReader; } - internal static IndexedFieldDetails Static(byte id, + internal static IndexedFieldDetails Static(byte id, string name, - Func>> fieldReader, + Func>> fieldReader, ITextExtractor textExtractor, IIndexTokenizer tokenizer, - IThesaurus thesaurus) + IThesaurus thesaurus, + double scoreBoost) { - return new IndexedFieldDetails( + return new IndexedFieldDetails( id, name, fieldReader, @@ -121,18 +133,20 @@ public class IndexedFieldDetails : IndexedFieldDetails textExtractor, tokenizer, thesaurus, - null); + null, + scoreBoost); } - internal static IndexedFieldDetails Dynamic(byte id, + internal static IndexedFieldDetails Dynamic(byte id, string name, string dynamicFieldReaderName, - Func>> fieldReader, + Func>> fieldReader, ITextExtractor textExtractor, IIndexTokenizer tokenizer, - IThesaurus thesaurus) + IThesaurus thesaurus, + double scoreBoost) { - return new IndexedFieldDetails( + return new IndexedFieldDetails( id, name, fieldReader, @@ -140,7 +154,8 @@ public class IndexedFieldDetails : IndexedFieldDetails textExtractor, tokenizer, thesaurus, - dynamicFieldReaderName); + dynamicFieldReaderName, + scoreBoost); } /// @@ -151,12 +166,12 @@ public override ValueTask> ReadAsync(object item, Cancellati throw new ArgumentNullException(nameof(item)); } - if (item is TItem typedItem) + if (item is TObject typedItem) { return this.fieldReader(typedItem, cancellationToken); } - throw new ArgumentException($"Item type {item.GetType().Name} is not expected type {this.ObjectType.Name}"); + throw new ArgumentException($"Object type {item.GetType().Name} is not expected type {this.ObjectType.Name}"); } } diff --git a/src/Lifti.Core/IndexedFieldLookup.cs b/src/Lifti.Core/IndexedFieldLookup.cs index 853771f9..4b5579df 100644 --- a/src/Lifti.Core/IndexedFieldLookup.cs +++ b/src/Lifti.Core/IndexedFieldLookup.cs @@ -12,12 +12,12 @@ internal class IndexedFieldLookup : IIndexedFieldLookup private readonly Dictionary fieldToDetailsLookup = new(StringComparer.OrdinalIgnoreCase); /// - /// This allows us to create a dynamic field at runtime while only knowing the name of the dynamic field reader. - /// In this situation we won't know the associated item type and we can avoid runtime reflection. + /// This allows us to create a dynamic field at runtime while only knowing the name of the dynamic field reader, + /// because in this situation we won't know the associated object type and we want to avoid runtime reflection. /// - private readonly Dictionary> dynamicFieldFactoryLookup = new(); + private readonly Dictionary> dynamicFieldFactoryLookup = []; - private readonly Dictionary idToFieldLookup = new(); + private readonly Dictionary idToFieldLookup = []; private int nextId; /// @@ -59,7 +59,7 @@ public bool IsKnownField(Type objectType, string fieldName) return this.fieldToDetailsLookup.TryGetValue(fieldName, out var fieldDetails) && fieldDetails.ObjectType == objectType; } - internal void RegisterDynamicFieldReader(DynamicFieldReader reader) + internal void RegisterDynamicFieldReader(DynamicFieldReader reader) { if (this.dynamicFieldFactoryLookup.ContainsKey(reader.Name)) { @@ -69,17 +69,18 @@ internal void RegisterDynamicFieldReader(DynamicFieldReader reader this.dynamicFieldFactoryLookup[reader.Name] = fieldName => this.GetOrCreateDynamicFieldInfo(reader, fieldName); } - internal void RegisterStaticField(IStaticFieldReader reader) + internal void RegisterStaticField(IStaticFieldReader reader) { this.RegisterField( reader.Name, - (name, id) => IndexedFieldDetails.Static( + (name, id) => IndexedFieldDetails.Static( id, name, reader.ReadAsync, reader.TextExtractor, reader.Tokenizer, - reader.Thesaurus)); + reader.Thesaurus, + reader.ScoreBoost)); } internal IndexedFieldDetails GetOrCreateDynamicFieldInfo(string dynamicFieldReaderName, string fieldName) @@ -92,20 +93,21 @@ internal IndexedFieldDetails GetOrCreateDynamicFieldInfo(string dynamicFieldRead throw new LiftiException(ExceptionMessages.UnknownDynamicFieldReaderNameEncountered, dynamicFieldReaderName); } - internal IndexedFieldDetails GetOrCreateDynamicFieldInfo(DynamicFieldReader fieldReader, string fieldName) + internal IndexedFieldDetails GetOrCreateDynamicFieldInfo(DynamicFieldReader fieldReader, string fieldName) { if (!this.fieldToDetailsLookup.TryGetValue(fieldName, out var details)) { details = this.RegisterField( fieldName, - (name, id) => IndexedFieldDetails.Dynamic( + (name, id) => IndexedFieldDetails.Dynamic( id, name, fieldReader.Name, (item, cancellationToken) => fieldReader.ReadAsync(item, fieldName, cancellationToken), fieldReader.TextExtractor, fieldReader.Tokenizer, - fieldReader.Thesaurus)); + fieldReader.Thesaurus, + fieldReader.ScoreBoost)); } else { @@ -115,7 +117,7 @@ internal IndexedFieldDetails GetOrCreateDynamicFieldInfo(DynamicFieldRead throw new LiftiException(ExceptionMessages.CannotRegisterDynamicFieldWithSameNameAsStaticField, fieldName); } - if (details.ObjectType != typeof(TItem)) + if (details.ObjectType != typeof(TObject)) { // Field was previously registered with throw new LiftiException(ExceptionMessages.CannotRegisterDynamicFieldWithSameNameForTwoDifferentObjectTypes, fieldName); @@ -125,9 +127,9 @@ internal IndexedFieldDetails GetOrCreateDynamicFieldInfo(DynamicFieldRead return details; } - private IndexedFieldDetails RegisterField( + private IndexedFieldDetails RegisterField( string fieldName, - Func> createFieldDetails) + Func> createFieldDetails) { if (this.fieldToDetailsLookup.ContainsKey(fieldName)) { diff --git a/src/Lifti.Core/ItemMetadata.cs b/src/Lifti.Core/ItemMetadata.cs deleted file mode 100644 index 9f62aca1..00000000 --- a/src/Lifti.Core/ItemMetadata.cs +++ /dev/null @@ -1,25 +0,0 @@ -namespace Lifti -{ - /// - public class ItemMetadata : IItemMetadata - { - /// - /// Constructs a new instance of . - /// - public ItemMetadata(int id, T item, DocumentStatistics documentStatistics) - { - this.Id = id; - this.Item = item; - this.DocumentStatistics = documentStatistics; - } - - /// - public T Item { get; } - - /// - public int Id { get; } - - /// - public DocumentStatistics DocumentStatistics { get; } - } -} \ No newline at end of file diff --git a/src/Lifti.Core/ItemStore.cs b/src/Lifti.Core/ItemStore.cs deleted file mode 100644 index 5f568fec..00000000 --- a/src/Lifti.Core/ItemStore.cs +++ /dev/null @@ -1,92 +0,0 @@ -using System.Collections.Generic; -using System.Collections.Immutable; - -namespace Lifti -{ - /// - public class ItemStore : IItemStore - where T : notnull - { - internal ItemStore() - : this( - ImmutableDictionary>.Empty, - ImmutableDictionary>.Empty, - IndexStatistics.Empty) - { - } - - private ItemStore( - ImmutableDictionary> itemLookup, - ImmutableDictionary> itemIdLookup, - IndexStatistics indexStatistics) - { - this.ItemLookup = itemLookup; - this.ItemIdLookup = itemIdLookup; - this.IndexStatistics = indexStatistics; - } - - /// - public int Count => this.ItemLookup.Count; - - /// - public IndexStatistics IndexStatistics { get; protected set; } = IndexStatistics.Empty; - - /// - /// Gets or sets the lookup of item key to information. - /// - protected ImmutableDictionary> ItemLookup { get; set; } - - /// - /// Gets or sets the lookup of internal item id to information. - /// - protected ImmutableDictionary> ItemIdLookup { get; set; } - - /// \ - public IEnumerable> GetIndexedItems() - { - return this.ItemLookup.Values; - } - - /// - public IItemMetadata GetMetadata(int id) - { - if (!this.ItemIdLookup.TryGetValue(id, out var itemMetadata)) - { - throw new LiftiException(ExceptionMessages.ItemNotFound); - } - - return itemMetadata; - } - - /// - public IItemMetadata GetMetadata(T item) - { - if (!this.ItemLookup.TryGetValue(item, out var itemMetadata)) - { - throw new LiftiException(ExceptionMessages.ItemNotFound); - } - - return itemMetadata; - } - - /// - public bool Contains(T item) - { - return this.ItemLookup.ContainsKey(item); - } - - /// - public IItemStore Snapshot() - { - return new ItemStore(this.ItemLookup, - this.ItemIdLookup, - this.IndexStatistics - ); - } - - IItemMetadata IItemStore.GetMetadata(int id) - { - return this.GetMetadata(id); - } - } -} diff --git a/src/Lifti.Core/Lifti.Core.csproj b/src/Lifti.Core/Lifti.Core.csproj index b6cfee87..1adcfde9 100644 --- a/src/Lifti.Core/Lifti.Core.csproj +++ b/src/Lifti.Core/Lifti.Core.csproj @@ -2,7 +2,7 @@ Library - netstandard2;net6.0;net7.0 + netstandard2;net6.0;net7.0;net8.0 Lifti Mike Goatly LIFTI @@ -15,7 +15,7 @@ Lifti.Core Mike Goatly Lifti.Core - 9.0 + latest enable LIFTI favicon-256.png @@ -23,6 +23,7 @@ True latest-all True + RS0026 @@ -78,13 +79,10 @@ - - all - runtime; build; native; contentfiles; analyzers; buildtransitive + + all + runtime; build; native; contentfiles; analyzers; buildtransitive - - - @@ -104,4 +102,14 @@ + + + + + + + + + + diff --git a/src/Lifti.Core/LiftiException.cs b/src/Lifti.Core/LiftiException.cs index 3bf53340..3d0c642b 100644 --- a/src/Lifti.Core/LiftiException.cs +++ b/src/Lifti.Core/LiftiException.cs @@ -22,9 +22,11 @@ public class LiftiException : Exception /// public LiftiException(string message, System.Exception inner) : base(message, inner) { } +#if NETSTANDARD /// protected LiftiException( SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } } diff --git a/src/Lifti.Core/PublicAPI.Unshipped.txt b/src/Lifti.Core/PublicAPI.Unshipped.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/Lifti.Core/PublicApi/MarkShipped.cmd b/src/Lifti.Core/PublicApi/MarkShipped.cmd new file mode 100644 index 00000000..98670b30 --- /dev/null +++ b/src/Lifti.Core/PublicApi/MarkShipped.cmd @@ -0,0 +1,2 @@ +@echo off +powershell -noprofile -executionPolicy RemoteSigned -file "%~dp0\markshipped.ps1" \ No newline at end of file diff --git a/src/Lifti.Core/PublicApi/MarkShipped.ps1 b/src/Lifti.Core/PublicApi/MarkShipped.ps1 new file mode 100644 index 00000000..2bf168d1 --- /dev/null +++ b/src/Lifti.Core/PublicApi/MarkShipped.ps1 @@ -0,0 +1,51 @@ +[CmdletBinding(PositionalBinding=$false)] +param () + +Set-StrictMode -version 2.0 +$ErrorActionPreference = "Stop" + +function MarkShipped([string]$dir) { + $shippedFilePath = Join-Path $dir "PublicAPI.Shipped.txt" + $shipped = Get-Content $shippedFilePath + if ($null -eq $shipped) { + $shipped = @() + } + + $unshippedFilePath = Join-Path $dir "PublicAPI.Unshipped.txt" + $unshipped = Get-Content $unshippedFilePath + $removed = @() + $removedPrefix = "*REMOVED*"; + Write-Host "Processing $dir" + + foreach ($item in $unshipped) { + if ($item.Length -gt 0) { + if ($item.StartsWith($removedPrefix)) { + $item = $item.Substring($removedPrefix.Length) + $removed += $item + } + else { + $shipped += $item + } + } + } + + $shipped | Sort-Object | ?{ -not $removed.Contains($_) } | Out-File $shippedFilePath -Encoding Ascii + "" | Out-File $unshippedFilePath -Encoding Ascii +} + +try { + Push-Location $PSScriptRoot + + foreach ($file in Get-ChildItem -re -in "PublicApi.Shipped.txt") { + $dir = Split-Path -parent $file + MarkShipped $dir + } +} +catch { + Write-Host $_ + Write-Host $_.Exception + exit 1 +} +finally { + Pop-Location +} diff --git a/src/Lifti.Core/PublicApi/PublicAPI.Shipped.txt b/src/Lifti.Core/PublicApi/PublicAPI.Shipped.txt new file mode 100644 index 00000000..75b2cece --- /dev/null +++ b/src/Lifti.Core/PublicApi/PublicAPI.Shipped.txt @@ -0,0 +1,730 @@ +#nullable enable +abstract Lifti.IndexedFieldDetails.ReadAsync(object! item, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask!> +abstract Lifti.Querying.QueryParts.BinaryQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +abstract Lifti.Querying.QueryParts.BinaryQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence +abstract Lifti.Querying.QueryParts.ScoreBoostedQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +abstract Lifti.Querying.QueryParts.ScoreBoostedQueryPart.RunWeightingCalculation(System.Func! navigatorCreator) -> double +abstract Lifti.Serialization.IndexDeserializerBase.DeserializeDocumentMetadataAsync(System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask!> +abstract Lifti.Serialization.IndexDeserializerBase.DeserializeIndexNodeHierarchyAsync(Lifti.Serialization.SerializedFieldIdMap serializedFieldIdMap, Lifti.IIndexNodeFactory! indexNodeFactory, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexDeserializerBase.DeserializeKnownFieldsAsync(System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexSerializerBase.WriteFieldsAsync(System.Collections.Generic.IReadOnlyList! fields, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexSerializerBase.WriteIndexMetadataAsync(Lifti.IIndexSnapshot! index, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexSerializerBase.WriteNodesAsync(Lifti.IndexNode! rootNode, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +Lifti.ChildNodeMap +Lifti.ChildNodeMap.CharacterMap.get -> System.Collections.Generic.IReadOnlyList! +Lifti.ChildNodeMap.ChildNodeMap() -> void +Lifti.ChildNodeMap.ChildNodeMap(Lifti.ChildNodeMapEntry[]! map) -> void +Lifti.ChildNodeMap.Count.get -> int +Lifti.ChildNodeMap.Equals(Lifti.ChildNodeMap other) -> bool +Lifti.ChildNodeMap.TryGetValue(char value, out Lifti.IndexNode? nextNode) -> bool +Lifti.ChildNodeMapEntry +Lifti.ChildNodeMapEntry.ChildChar.get -> char +Lifti.ChildNodeMapEntry.ChildChar.set -> void +Lifti.ChildNodeMapEntry.ChildNode.get -> Lifti.IndexNode! +Lifti.ChildNodeMapEntry.ChildNode.set -> void +Lifti.ChildNodeMapEntry.ChildNodeMapEntry() -> void +Lifti.ChildNodeMapEntry.ChildNodeMapEntry(char ChildChar, Lifti.IndexNode! ChildNode) -> void +Lifti.DocumentMetadata +Lifti.DocumentMetadata.DocumentMetadata(byte? objectTypeId, int documentId, Lifti.DocumentStatistics documentStatistics, System.DateTime? scoringFreshnessDate, double? scoringMagnitude) -> void +Lifti.DocumentMetadata.DocumentStatistics.get -> Lifti.DocumentStatistics +Lifti.DocumentMetadata.Id.get -> int +Lifti.DocumentMetadata.ObjectTypeId.get -> byte? +Lifti.DocumentMetadata.ScoringFreshnessDate.get -> System.DateTime? +Lifti.DocumentMetadata.ScoringMagnitude.get -> double? +Lifti.DocumentMetadata +Lifti.DocumentMetadata.Item.get -> TKey +Lifti.DocumentMetadata.Key.get -> TKey +Lifti.DocumentPhrases +Lifti.DocumentPhrases.DocumentPhrases(TObject item, Lifti.SearchResult! SearchResult, System.Collections.Generic.IReadOnlyList!>! phrases) -> void +Lifti.DocumentPhrases.Item.get -> TObject +Lifti.DocumentPhrases.Item.init -> void +Lifti.DocumentPhrases +Lifti.DocumentPhrases.DocumentPhrases(Lifti.SearchResult! SearchResult, System.Collections.Generic.IReadOnlyList!>! FieldPhrases) -> void +Lifti.DocumentPhrases.EnumeratePhrases() -> System.Collections.Generic.IEnumerable! +Lifti.DocumentPhrases.FieldPhrases.get -> System.Collections.Generic.IReadOnlyList!>! +Lifti.DocumentPhrases.FieldPhrases.init -> void +Lifti.DocumentPhrases.SearchResult.get -> Lifti.SearchResult! +Lifti.DocumentPhrases.SearchResult.init -> void +Lifti.DocumentStatistics +Lifti.DocumentStatistics.DocumentStatistics() -> void +Lifti.DocumentStatistics.DocumentStatistics(System.Collections.Generic.IReadOnlyDictionary! tokenCountByField, int totalTokenCount) -> void +Lifti.DocumentStatistics.TokenCountByField.get -> System.Collections.Generic.IReadOnlyDictionary! +Lifti.DocumentStatistics.TotalTokenCount.get -> int +Lifti.DocumentTokenMatchMap +Lifti.DocumentTokenMatchMap.Count.get -> int +Lifti.DocumentTokenMatchMap.DocumentTokenMatchMap() -> void +Lifti.DocumentTokenMatchMap.DocumentTokenMatchMap(System.Collections.Generic.Dictionary!>! data) -> void +Lifti.DocumentTokenMatchMap.Enumerate() -> System.Collections.Generic.IEnumerable<(int documentId, System.Collections.Generic.IReadOnlyList! indexedTokens)>! +Lifti.DocumentTokenMatchMap.Equals(Lifti.DocumentTokenMatchMap other) -> bool +Lifti.DocumentTokenMatchMap.HasDocument(int documentId) -> bool +Lifti.DocumentTokenMatchMap.TryGetValue(int documentId, out System.Collections.Generic.IReadOnlyList? tokens) -> bool +Lifti.DuplicateKeyBehavior +Lifti.DuplicateKeyBehavior.Replace = 0 -> Lifti.DuplicateKeyBehavior +Lifti.DuplicateKeyBehavior.ThrowException = 1 -> Lifti.DuplicateKeyBehavior +Lifti.FieldKind +Lifti.FieldKind.Dynamic = 2 -> Lifti.FieldKind +Lifti.FieldKind.Static = 1 -> Lifti.FieldKind +Lifti.FieldKind.Unknown = 0 -> Lifti.FieldKind +Lifti.FieldPhrases +Lifti.FieldPhrases.FieldPhrases(string! foundIn, params string![]! phrases) -> void +Lifti.FieldPhrases.FieldPhrases(string! FoundIn, System.Collections.Generic.IReadOnlyList! Phrases) -> void +Lifti.FieldPhrases.FoundIn.get -> string! +Lifti.FieldPhrases.FoundIn.init -> void +Lifti.FieldPhrases.Phrases.get -> System.Collections.Generic.IReadOnlyList! +Lifti.FieldPhrases.Phrases.init -> void +Lifti.FieldSearchResult +Lifti.FieldSearchResult.FieldSearchResult(string! foundIn, double score, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.FieldSearchResult.FoundIn.get -> string! +Lifti.FieldSearchResult.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.FieldSearchResult.Score.get -> double +Lifti.FullTextIndex +Lifti.FullTextIndex.AddAsync(TKey key, string! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.AddAsync(TKey key, System.Collections.Generic.IEnumerable! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.AddAsync(TObject item, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.AddRangeAsync(System.Collections.Generic.IEnumerable! items, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.BeginBatchChange() -> void +Lifti.FullTextIndex.CommitBatchChangeAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.Count.get -> int +Lifti.FullTextIndex.CreateNavigator() -> Lifti.Querying.IIndexNavigator! +Lifti.FullTextIndex.DefaultTextExtractor.get -> Lifti.Tokenization.TextExtraction.ITextExtractor! +Lifti.FullTextIndex.DefaultThesaurus.get -> Lifti.Tokenization.IThesaurus! +Lifti.FullTextIndex.DefaultTokenizer.get -> Lifti.Tokenization.IIndexTokenizer! +Lifti.FullTextIndex.Dispose() -> void +Lifti.FullTextIndex.FieldLookup.get -> Lifti.IIndexedFieldLookup! +Lifti.FullTextIndex.GetTokenizerForField(string! fieldName) -> Lifti.Tokenization.IIndexTokenizer! +Lifti.FullTextIndex.Items.get -> Lifti.IIndexMetadata! +Lifti.FullTextIndex.Metadata.get -> Lifti.IIndexMetadata! +Lifti.FullTextIndex.QueryParser.get -> Lifti.Querying.IQueryParser! +Lifti.FullTextIndex.RemoveAsync(TKey key, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.Root.get -> Lifti.IndexNode! +Lifti.FullTextIndex.Search(Lifti.Querying.IQuery! query) -> Lifti.ISearchResults! +Lifti.FullTextIndex.Search(string! searchText) -> Lifti.ISearchResults! +Lifti.FullTextIndex.Snapshot.get -> Lifti.IIndexSnapshot! +Lifti.FullTextIndexBuilder +Lifti.FullTextIndexBuilder.Build() -> Lifti.FullTextIndex! +Lifti.FullTextIndexBuilder.FullTextIndexBuilder() -> void +Lifti.FullTextIndexBuilder.WithDefaultThesaurus(System.Func! thesaurusBuilder) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithDefaultTokenization(System.Func! optionsBuilder) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithDuplicateKeyBehavior(Lifti.DuplicateKeyBehavior duplicateKeyBehavior) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithIndexModificationAction(System.Action!>! action) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithIndexModificationAction(System.Func!, System.Threading.CancellationToken, System.Threading.Tasks.Task!>! asyncAction) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithIndexModificationAction(System.Func!, System.Threading.Tasks.Task!>! asyncAction) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithIntraNodeTextSupportedAfterIndexDepth(int depth) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithObjectTokenization(System.Func!, Lifti.Tokenization.Objects.ObjectTokenizationBuilder!>! optionsBuilder) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithQueryParser(Lifti.Querying.IQueryParser! queryParser) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithQueryParser(System.Func! optionsBuilder) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithScorerFactory(Lifti.Querying.IIndexScorerFactory! scorerFactory) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithSimpleQueryParser() -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithSimpleQueryParser(System.Func! optionsBuilder) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithTextExtractor(Lifti.Tokenization.TextExtraction.ITextExtractor! textExtractor) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithTextExtractor() -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexExtensions +Lifti.IFullTextIndex +Lifti.IFullTextIndex.AddAsync(TKey key, string! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.AddAsync(TKey key, System.Collections.Generic.IEnumerable! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.AddAsync(TObject item, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.AddRangeAsync(System.Collections.Generic.IEnumerable! items, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.BeginBatchChange() -> void +Lifti.IFullTextIndex.CommitBatchChangeAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.Count.get -> int +Lifti.IFullTextIndex.CreateNavigator() -> Lifti.Querying.IIndexNavigator! +Lifti.IFullTextIndex.DefaultTextExtractor.get -> Lifti.Tokenization.TextExtraction.ITextExtractor! +Lifti.IFullTextIndex.DefaultThesaurus.get -> Lifti.Tokenization.IThesaurus! +Lifti.IFullTextIndex.FieldLookup.get -> Lifti.IIndexedFieldLookup! +Lifti.IFullTextIndex.Items.get -> Lifti.IIndexMetadata! +Lifti.IFullTextIndex.Metadata.get -> Lifti.IIndexMetadata! +Lifti.IFullTextIndex.QueryParser.get -> Lifti.Querying.IQueryParser! +Lifti.IFullTextIndex.RemoveAsync(TKey key, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.Search(Lifti.Querying.IQuery! query) -> Lifti.ISearchResults! +Lifti.IFullTextIndex.Search(string! searchText) -> Lifti.ISearchResults! +Lifti.IFullTextIndex.Snapshot.get -> Lifti.IIndexSnapshot! +Lifti.IIndexedFieldLookup +Lifti.IIndexedFieldLookup.AllFieldNames.get -> System.Collections.Generic.IReadOnlyCollection! +Lifti.IIndexedFieldLookup.DefaultField.get -> byte +Lifti.IIndexedFieldLookup.GetFieldForId(byte id) -> string! +Lifti.IIndexedFieldLookup.GetFieldInfo(string! fieldName) -> Lifti.IndexedFieldDetails! +Lifti.IIndexedFieldLookup.IsKnownField(System.Type! objectType, string! fieldName) -> bool +Lifti.IIndexMetadata +Lifti.IIndexMetadata.Count.get -> int +Lifti.IIndexMetadata.DocumentCount.get -> int +Lifti.IIndexMetadata.GetDocumentMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetObjectTypeScoreBoostMetadata(byte objectTypeId) -> Lifti.ScoreBoostMetadata! +Lifti.IIndexMetadata.IndexStatistics.get -> Lifti.IndexStatistics! +Lifti.IIndexMetadata +Lifti.IIndexMetadata.Contains(TKey key) -> bool +Lifti.IIndexMetadata.GetDocumentMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetIndexedDocuments() -> System.Collections.Generic.IEnumerable!>! +Lifti.IIndexMetadata.GetMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetMetadata(TKey key) -> Lifti.DocumentMetadata! +Lifti.IIndexNodeFactory +Lifti.IIndexNodeFactory.CreateNode(System.ReadOnlyMemory intraNodeText, Lifti.ChildNodeMap childNodes, Lifti.DocumentTokenMatchMap matches) -> Lifti.IndexNode! +Lifti.IIndexNodeFactory.CreateRootNode() -> Lifti.IndexNode! +Lifti.IIndexNodeFactory.GetIndexSupportLevelForDepth(int depth) -> Lifti.IndexSupportLevelKind +Lifti.IIndexSnapshot +Lifti.IIndexSnapshot.CreateNavigator() -> Lifti.Querying.IIndexNavigator! +Lifti.IIndexSnapshot.FieldLookup.get -> Lifti.IIndexedFieldLookup! +Lifti.IIndexSnapshot.Items.get -> Lifti.IIndexMetadata! +Lifti.IIndexSnapshot.Metadata.get -> Lifti.IIndexMetadata! +Lifti.IIndexSnapshot.Root.get -> Lifti.IndexNode! +Lifti.IIndexSnapshot +Lifti.IIndexSnapshot.Items.get -> Lifti.IIndexMetadata! +Lifti.IIndexSnapshot.Metadata.get -> Lifti.IIndexMetadata! +Lifti.IIndexTokenizerProvider +Lifti.IIndexTokenizerProvider.DefaultTokenizer.get -> Lifti.Tokenization.IIndexTokenizer! +Lifti.IIndexTokenizerProvider.GetTokenizerForField(string! fieldName) -> Lifti.Tokenization.IIndexTokenizer! +Lifti.IndexedFieldDetails +Lifti.IndexedFieldDetails.DynamicFieldReaderName.get -> string? +Lifti.IndexedFieldDetails.FieldKind.get -> Lifti.FieldKind +Lifti.IndexedFieldDetails.Id.get -> byte +Lifti.IndexedFieldDetails.Name.get -> string! +Lifti.IndexedFieldDetails.ObjectType.get -> System.Type! +Lifti.IndexedFieldDetails.ScoreBoost.get -> double +Lifti.IndexedFieldDetails.TextExtractor.get -> Lifti.Tokenization.TextExtraction.ITextExtractor! +Lifti.IndexedFieldDetails.Thesaurus.get -> Lifti.Tokenization.IThesaurus! +Lifti.IndexedFieldDetails.Tokenizer.get -> Lifti.Tokenization.IIndexTokenizer! +Lifti.IndexedFieldDetails +Lifti.IndexedToken +Lifti.IndexedToken.Equals(Lifti.IndexedToken other) -> bool +Lifti.IndexedToken.FieldId.get -> byte +Lifti.IndexedToken.IndexedToken() -> void +Lifti.IndexedToken.IndexedToken(byte fieldId, params Lifti.TokenLocation![]! locations) -> void +Lifti.IndexedToken.IndexedToken(byte fieldId, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.IndexedToken.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.IndexMetadata +Lifti.IndexMetadata.Add(Lifti.DocumentMetadata! documentMetadata) -> void +Lifti.IndexMetadata.Add(TKey key, Lifti.DocumentStatistics documentStatistics) -> int +Lifti.IndexMetadata.Contains(TKey key) -> bool +Lifti.IndexMetadata.Count.get -> int +Lifti.IndexMetadata.DocumentCount.get -> int +Lifti.IndexMetadata.GetDocumentMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IndexMetadata.GetIndexedDocuments() -> System.Collections.Generic.IEnumerable!>! +Lifti.IndexMetadata.GetMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IndexMetadata.GetMetadata(TKey key) -> Lifti.DocumentMetadata! +Lifti.IndexMetadata.GetObjectTypeScoreBoostMetadata(byte objectTypeId) -> Lifti.ScoreBoostMetadata! +Lifti.IndexMetadata.IndexStatistics.get -> Lifti.IndexStatistics! +Lifti.IndexNode +Lifti.IndexNode.ChildNodes.get -> Lifti.ChildNodeMap +Lifti.IndexNode.HasChildNodes.get -> bool +Lifti.IndexNode.HasMatches.get -> bool +Lifti.IndexNode.IntraNodeText.get -> System.ReadOnlyMemory +Lifti.IndexNode.IsEmpty.get -> bool +Lifti.IndexNode.Matches.get -> Lifti.DocumentTokenMatchMap +Lifti.IndexNodeFactory +Lifti.IndexNodeFactory.CreateNode(System.ReadOnlyMemory intraNodeText, Lifti.ChildNodeMap childNodes, Lifti.DocumentTokenMatchMap matches) -> Lifti.IndexNode! +Lifti.IndexNodeFactory.CreateRootNode() -> Lifti.IndexNode! +Lifti.IndexNodeFactory.GetIndexSupportLevelForDepth(int depth) -> Lifti.IndexSupportLevelKind +Lifti.IndexNodeFactory.IndexNodeFactory(Lifti.IndexOptions! options) -> void +Lifti.IndexOptions +Lifti.IndexOptions.DuplicateItemBehavior.get -> Lifti.DuplicateKeyBehavior +Lifti.IndexOptions.DuplicateKeyBehavior.get -> Lifti.DuplicateKeyBehavior +Lifti.IndexOptions.SupportIntraNodeTextAfterIndexDepth.get -> int +Lifti.IndexSnapshot +Lifti.IndexSnapshot.CreateNavigator() -> Lifti.Querying.IIndexNavigator! +Lifti.IndexSnapshot.FieldLookup.get -> Lifti.IIndexedFieldLookup! +Lifti.IndexSnapshot.Items.get -> Lifti.IIndexMetadata! +Lifti.IndexSnapshot.Metadata.get -> Lifti.IIndexMetadata! +Lifti.IndexSnapshot.Root.get -> Lifti.IndexNode! +Lifti.IndexStatistics +Lifti.IndexStatistics.GetFieldTokenCount(byte fieldId) -> long +Lifti.IndexStatistics.TokenCountByField.get -> System.Collections.Generic.IReadOnlyDictionary! +Lifti.IndexStatistics.TotalTokenCount.get -> long +Lifti.IndexSupportLevelKind +Lifti.IndexSupportLevelKind.CharacterByCharacter = 0 -> Lifti.IndexSupportLevelKind +Lifti.IndexSupportLevelKind.IntraNodeText = 1 -> Lifti.IndexSupportLevelKind +Lifti.ISearchResults +Lifti.ISearchResults.Count.get -> int +Lifti.ISearchResults.CreateMatchPhrases(System.Func! loadText) -> System.Collections.Generic.IEnumerable!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadTextAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadTextAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Collections.Generic.IReadOnlyList!>! loadItems, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Threading.CancellationToken, System.Threading.Tasks.ValueTask!>>! loadItemsAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Threading.Tasks.ValueTask!>>! loadItemsAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadItemAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadItemAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func! loadItem, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.OrderByField(string! fieldName) -> Lifti.ISearchResults! +Lifti.LiftiException +Lifti.LiftiException.LiftiException() -> void +Lifti.LiftiException.LiftiException(string! message) -> void +Lifti.LiftiException.LiftiException(string! message, params object![]! args) -> void +Lifti.LiftiException.LiftiException(string! message, System.Exception! inner) -> void +Lifti.PreprocessedInput +Lifti.PreprocessedInput.Equals(Lifti.PreprocessedInput other) -> bool +Lifti.PreprocessedInput.PreprocessedInput() -> void +Lifti.PreprocessedInput.PreprocessedInput(char value) -> void +Lifti.PreprocessedInput.PreprocessedInput(string! replacement) -> void +Lifti.PreprocessedInput.Replacement.get -> string? +Lifti.PreprocessedInput.Value.get -> char +Lifti.Querying.FieldScoreBoostProvider +Lifti.Querying.FieldScoreBoostProvider.FieldScoreBoostProvider(Lifti.IIndexedFieldLookup! fieldLookup) -> void +Lifti.Querying.FieldScoreBoostProvider.GetScoreBoost(byte fieldId) -> double +Lifti.Querying.IFieldScoreBoostProvider +Lifti.Querying.IFieldScoreBoostProvider.GetScoreBoost(byte fieldId) -> double +Lifti.Querying.IIndexNavigator +Lifti.Querying.IIndexNavigator.AddExactAndChildMatches(Lifti.Querying.QueryContext! queryContext, Lifti.Querying.DocumentMatchCollector! documentMatchCollector, double weighting = 1) -> void +Lifti.Querying.IIndexNavigator.AddExactMatches(Lifti.Querying.QueryContext! queryContext, Lifti.Querying.DocumentMatchCollector! documentMatchCollector, double weighting = 1) -> void +Lifti.Querying.IIndexNavigator.CreateBookmark() -> Lifti.Querying.IIndexNavigatorBookmark! +Lifti.Querying.IIndexNavigator.EnumerateIndexedTokens() -> System.Collections.Generic.IEnumerable! +Lifti.Querying.IIndexNavigator.EnumerateNextCharacters() -> System.Collections.Generic.IEnumerable! +Lifti.Querying.IIndexNavigator.ExactMatchCount() -> int +Lifti.Querying.IIndexNavigator.GetExactAndChildMatches(double weighting = 1) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IIndexNavigator.GetExactAndChildMatches(Lifti.Querying.QueryContext! queryContext, double weighting = 1) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IIndexNavigator.GetExactMatches(double weighting = 1) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IIndexNavigator.GetExactMatches(Lifti.Querying.QueryContext! queryContext, double weighting = 1) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IIndexNavigator.HasExactMatches.get -> bool +Lifti.Querying.IIndexNavigator.Process(char value) -> bool +Lifti.Querying.IIndexNavigator.Process(string! text) -> bool +Lifti.Querying.IIndexNavigator.Process(System.ReadOnlySpan text) -> bool +Lifti.Querying.IIndexNavigator.Snapshot.get -> Lifti.IIndexSnapshot! +Lifti.Querying.IIndexNavigatorBookmark +Lifti.Querying.IIndexNavigatorBookmark.Apply() -> void +Lifti.Querying.IIndexScorerFactory +Lifti.Querying.IIndexScorerFactory.CreateIndexScorer(Lifti.IIndexSnapshot! indexSnapshot) -> Lifti.Querying.IScorer! +Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IntermediateQueryResult.CompositePositionalIntersect(Lifti.Querying.IntermediateQueryResult results, int leftTolerance, int rightTolerance) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IntermediateQueryResult.Equals(Lifti.Querying.IntermediateQueryResult other) -> bool +Lifti.Querying.IntermediateQueryResult.IntermediateQueryResult() -> void +Lifti.Querying.IntermediateQueryResult.Intersect(Lifti.Querying.IntermediateQueryResult results) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IntermediateQueryResult.Matches.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Querying.IntermediateQueryResult.PrecedingIntersect(Lifti.Querying.IntermediateQueryResult results) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IntermediateQueryResult.Union(Lifti.Querying.IntermediateQueryResult results) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IQuery +Lifti.Querying.IQuery.Execute(Lifti.IIndexSnapshot! index) -> System.Collections.Generic.IEnumerable!>! +Lifti.Querying.IQuery.Root.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.IQueryParser +Lifti.Querying.IQueryParser.Parse(Lifti.IIndexedFieldLookup! fieldLookup, string! queryText, Lifti.IIndexTokenizerProvider! tokenizerProvider) -> Lifti.Querying.IQuery! +Lifti.Querying.IScorer +Lifti.Querying.IScorer.CalculateScore(int totalMatchedDocuments, int documentId, byte fieldId, System.Collections.Generic.IReadOnlyList! tokenLocations, double weighting) -> double +Lifti.Querying.DocumentMatchCollector +Lifti.Querying.DocumentMatchCollector.DocumentMatchCollector() -> void +Lifti.Querying.DocumentMatchCollector.ToIntermediateQueryResult() -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.OkapiBm25ScorerFactory +Lifti.Querying.OkapiBm25ScorerFactory.CreateIndexScorer(Lifti.IIndexSnapshot! indexSnapshot) -> Lifti.Querying.IScorer! +Lifti.Querying.OkapiBm25ScorerFactory.OkapiBm25ScorerFactory(double k1 = 1.2, double b = 0.75) -> void +Lifti.Querying.OperatorPrecedence +Lifti.Querying.OperatorPrecedence.And = 1 -> Lifti.Querying.OperatorPrecedence +Lifti.Querying.OperatorPrecedence.Or = 2 -> Lifti.Querying.OperatorPrecedence +Lifti.Querying.OperatorPrecedence.Positional = 0 -> Lifti.Querying.OperatorPrecedence +Lifti.Querying.Query +Lifti.Querying.Query.Execute(Lifti.IIndexSnapshot! index) -> System.Collections.Generic.IEnumerable!>! +Lifti.Querying.Query.Query(Lifti.Querying.QueryParts.IQueryPart! root) -> void +Lifti.Querying.Query.Root.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryContext +Lifti.Querying.QueryContext.FilterToDocumentIds.get -> System.Collections.Generic.IReadOnlySet? +Lifti.Querying.QueryContext.FilterToDocumentIds.init -> void +Lifti.Querying.QueryContext.FilterToFieldId.get -> byte? +Lifti.Querying.QueryContext.FilterToFieldId.init -> void +Lifti.Querying.QueryContext.QueryContext(byte? FilterToFieldId = null, System.Collections.Generic.IReadOnlySet? FilterToDocumentIds = null) -> void +Lifti.Querying.QueryParserBuilder +Lifti.Querying.QueryParserBuilder.AssumeFuzzySearchTerms(bool fuzzySearchByDefault = true) -> Lifti.Querying.QueryParserBuilder! +Lifti.Querying.QueryParserBuilder.Build() -> Lifti.Querying.IQueryParser! +Lifti.Querying.QueryParserBuilder.WithDefaultJoiningOperator(Lifti.Querying.QueryTermJoinOperatorKind joiningOperator = Lifti.Querying.QueryTermJoinOperatorKind.And) -> Lifti.Querying.QueryParserBuilder! +Lifti.Querying.QueryParserBuilder.WithFuzzySearchDefaults(System.Func! maxEditDistance, System.Func! maxSequentialEdits) -> Lifti.Querying.QueryParserBuilder! +Lifti.Querying.QueryParserBuilder.WithFuzzySearchDefaults(ushort maxEditDistance = 4, ushort maxSequentialEdits = 1) -> Lifti.Querying.QueryParserBuilder! +Lifti.Querying.QueryParserBuilder.WithQueryParserFactory(System.Func! factory) -> Lifti.Querying.QueryParserBuilder! +Lifti.Querying.QueryParserException +Lifti.Querying.QueryParserException.QueryParserException() -> void +Lifti.Querying.QueryParserException.QueryParserException(string! message) -> void +Lifti.Querying.QueryParserException.QueryParserException(string! message, params object![]! args) -> void +Lifti.Querying.QueryParserException.QueryParserException(string! message, System.Exception! inner) -> void +Lifti.Querying.QueryParserOptions +Lifti.Querying.QueryParserOptions.AssumeFuzzySearchTerms.get -> bool +Lifti.Querying.QueryParserOptions.DefaultJoiningOperator.get -> Lifti.Querying.QueryTermJoinOperatorKind +Lifti.Querying.QueryParserOptions.FuzzySearchMaxEditDistance.get -> System.Func! +Lifti.Querying.QueryParserOptions.FuzzySearchMaxSequentialEdits.get -> System.Func! +Lifti.Querying.QueryParts.AdjacentWordsQueryOperator +Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.AdjacentWordsQueryOperator(System.Collections.Generic.IReadOnlyList! words) -> void +Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.Words.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Querying.QueryParts.AndQueryOperator +Lifti.Querying.QueryParts.AndQueryOperator.AndQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right) -> void +Lifti.Querying.QueryParts.BinaryQueryOperator +Lifti.Querying.QueryParts.BinaryQueryOperator.BinaryQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right) -> void +Lifti.Querying.QueryParts.BinaryQueryOperator.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.BinaryQueryOperator.EvaluateWithDocumentIntersection(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> (Lifti.Querying.IntermediateQueryResult leftResults, Lifti.Querying.IntermediateQueryResult rightResults) +Lifti.Querying.QueryParts.BinaryQueryOperator.Left.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryParts.BinaryQueryOperator.Left.set -> void +Lifti.Querying.QueryParts.BinaryQueryOperator.Right.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryParts.BinaryQueryOperator.Right.set -> void +Lifti.Querying.QueryParts.BracketedQueryPart +Lifti.Querying.QueryParts.BracketedQueryPart.BracketedQueryPart(Lifti.Querying.QueryParts.IQueryPart! statement) -> void +Lifti.Querying.QueryParts.BracketedQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.BracketedQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.BracketedQueryPart.Statement.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryParts.EmptyQueryPart +Lifti.Querying.QueryParts.EmptyQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.EmptyQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.ExactWordQueryPart +Lifti.Querying.QueryParts.ExactWordQueryPart.ExactWordQueryPart(string! word, double? scoreBoost = null) -> void +Lifti.Querying.QueryParts.FieldFilterQueryOperator +Lifti.Querying.QueryParts.FieldFilterQueryOperator.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.FieldFilterQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.FieldFilterQueryOperator.FieldFilterQueryOperator(string! fieldName, byte fieldId, Lifti.Querying.QueryParts.IQueryPart! statement) -> void +Lifti.Querying.QueryParts.FieldFilterQueryOperator.FieldId.get -> byte +Lifti.Querying.QueryParts.FieldFilterQueryOperator.FieldName.get -> string! +Lifti.Querying.QueryParts.FieldFilterQueryOperator.Statement.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryParts.FuzzyMatchQueryPart +Lifti.Querying.QueryParts.FuzzyMatchQueryPart.FuzzyMatchQueryPart(string! word, ushort maxEditDistance = 4, ushort maxSequentialEdits = 1, double? scoreBoost = null) -> void +Lifti.Querying.QueryParts.IBinaryQueryOperator +Lifti.Querying.QueryParts.IBinaryQueryOperator.Left.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryParts.IBinaryQueryOperator.Left.set -> void +Lifti.Querying.QueryParts.IBinaryQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence +Lifti.Querying.QueryParts.IBinaryQueryOperator.Right.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryParts.IBinaryQueryOperator.Right.set -> void +Lifti.Querying.QueryParts.IQueryPart +Lifti.Querying.QueryParts.IQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.IQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.NearQueryOperator +Lifti.Querying.QueryParts.NearQueryOperator.NearQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right, int tolerance = 5) -> void +Lifti.Querying.QueryParts.NearQueryOperator.Tolerance.get -> int +Lifti.Querying.QueryParts.OrQueryOperator +Lifti.Querying.QueryParts.OrQueryOperator.OrQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right) -> void +Lifti.Querying.QueryParts.PrecedingNearQueryOperator +Lifti.Querying.QueryParts.PrecedingNearQueryOperator.PrecedingNearQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right, int tolerance = 5) -> void +Lifti.Querying.QueryParts.PrecedingNearQueryOperator.Tolerance.get -> int +Lifti.Querying.QueryParts.PrecedingQueryOperator +Lifti.Querying.QueryParts.PrecedingQueryOperator.PrecedingQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right) -> void +Lifti.Querying.QueryParts.ScoreBoostedQueryPart +Lifti.Querying.QueryParts.ScoreBoostedQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.ScoreBoostedQueryPart.ScoreBoost.get -> double? +Lifti.Querying.QueryParts.ScoreBoostedQueryPart.ScoreBoostedQueryPart(double? scoreBoost) -> void +Lifti.Querying.QueryParts.WildcardQueryFragment +Lifti.Querying.QueryParts.WildcardQueryFragment.Equals(Lifti.Querying.QueryParts.WildcardQueryFragment other) -> bool +Lifti.Querying.QueryParts.WildcardQueryFragment.Kind.get -> Lifti.Querying.QueryParts.WildcardQueryFragmentKind +Lifti.Querying.QueryParts.WildcardQueryFragment.Text.get -> string? +Lifti.Querying.QueryParts.WildcardQueryFragment.WildcardQueryFragment() -> void +Lifti.Querying.QueryParts.WildcardQueryFragmentKind +Lifti.Querying.QueryParts.WildcardQueryFragmentKind.MultiCharacter = 2 -> Lifti.Querying.QueryParts.WildcardQueryFragmentKind +Lifti.Querying.QueryParts.WildcardQueryFragmentKind.SingleCharacter = 1 -> Lifti.Querying.QueryParts.WildcardQueryFragmentKind +Lifti.Querying.QueryParts.WildcardQueryFragmentKind.Text = 0 -> Lifti.Querying.QueryParts.WildcardQueryFragmentKind +Lifti.Querying.QueryParts.WildcardQueryPart +Lifti.Querying.QueryParts.WildcardQueryPart.WildcardQueryPart(params Lifti.Querying.QueryParts.WildcardQueryFragment[]! fragments) -> void +Lifti.Querying.QueryParts.WildcardQueryPart.WildcardQueryPart(System.Collections.Generic.IEnumerable! fragments, double? scoreBoost = null) -> void +Lifti.Querying.QueryParts.WordQueryPart +Lifti.Querying.QueryParts.WordQueryPart.Word.get -> string! +Lifti.Querying.QueryParts.WordQueryPart.WordQueryPart(string! word, double? scoreBoost) -> void +Lifti.Querying.QueryTermJoinOperatorKind +Lifti.Querying.QueryTermJoinOperatorKind.And = 0 -> Lifti.Querying.QueryTermJoinOperatorKind +Lifti.Querying.QueryTermJoinOperatorKind.Or = 1 -> Lifti.Querying.QueryTermJoinOperatorKind +Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.AndOperator = 1 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.BeginAdjacentTextOperator = 5 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.CloseBracket = 4 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.EndAdjacentTextOperator = 6 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.FieldFilter = 10 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.NearOperator = 7 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.OpenBracket = 3 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.OrOperator = 2 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.PrecedingNearOperator = 8 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.PrecedingOperator = 9 -> Lifti.Querying.QueryTokenType +Lifti.Querying.QueryTokenType.Text = 0 -> Lifti.Querying.QueryTokenType +Lifti.Querying.ScoredFieldMatch +Lifti.Querying.ScoredFieldMatch.Equals(Lifti.Querying.ScoredFieldMatch? other) -> bool +Lifti.Querying.ScoredFieldMatch.FieldId.get -> byte +Lifti.Querying.ScoredFieldMatch.GetTokenLocations() -> System.Collections.Generic.IReadOnlyList! +Lifti.Querying.ScoredFieldMatch.Score.get -> double +Lifti.Querying.ScoredToken +Lifti.Querying.ScoredToken.DocumentId.get -> int +Lifti.Querying.ScoredToken.Equals(Lifti.Querying.ScoredToken other) -> bool +Lifti.Querying.ScoredToken.FieldMatches.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Querying.ScoredToken.ItemId.get -> int +Lifti.Querying.ScoredToken.ScoredToken() -> void +Lifti.Querying.ScoredToken.ScoredToken(int documentId, System.Collections.Generic.IReadOnlyList! fieldMatches) -> void +Lifti.Querying.SimpleQueryParser +Lifti.Querying.SimpleQueryParser.Parse(Lifti.IIndexedFieldLookup! fieldLookup, string! queryText, Lifti.IIndexTokenizerProvider! tokenizerProvider) -> Lifti.Querying.IQuery! +Lifti.Querying.SimpleQueryParser.SimpleQueryParser(Lifti.Querying.QueryParserOptions! options) -> void +Lifti.ScoreBoostMetadata +Lifti.SearchResult +Lifti.SearchResult.FieldMatches.get -> System.Collections.Generic.IReadOnlyList! +Lifti.SearchResult.Key.get -> TKey +Lifti.SearchResult.Score.get -> double +Lifti.SearchResult.SearchResult(TKey key, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.Serialization.Binary.BinarySerializer +Lifti.Serialization.Binary.BinarySerializer.BinarySerializer() -> void +Lifti.Serialization.Binary.BinarySerializer.BinarySerializer(Lifti.Serialization.Binary.IKeySerializer! keySerializer) -> void +Lifti.Serialization.Binary.BinarySerializer.DeserializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.BinarySerializer.SerializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.BinarySerializer.SerializeAsync(Lifti.IIndexSnapshot! snapshot, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.DeserializationException +Lifti.Serialization.Binary.DeserializationException.DeserializationException() -> void +Lifti.Serialization.Binary.DeserializationException.DeserializationException(string! message) -> void +Lifti.Serialization.Binary.DeserializationException.DeserializationException(string! message, params object![]! args) -> void +Lifti.Serialization.Binary.DeserializationException.DeserializationException(string! message, System.Exception! inner) -> void +Lifti.Serialization.Binary.GuidFormatterKeySerializer +Lifti.Serialization.Binary.GuidFormatterKeySerializer.GuidFormatterKeySerializer() -> void +Lifti.Serialization.Binary.IIndexSerializer +Lifti.Serialization.Binary.IIndexSerializer.DeserializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.IIndexSerializer.SerializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.IIndexSerializer.SerializeAsync(Lifti.IIndexSnapshot! snapshot, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.IKeySerializer +Lifti.Serialization.Binary.IKeySerializer.Read(System.IO.BinaryReader! reader) -> TKey +Lifti.Serialization.Binary.IKeySerializer.Write(System.IO.BinaryWriter! writer, TKey key) -> void +Lifti.Serialization.Binary.IntFormatterKeySerializer +Lifti.Serialization.Binary.IntFormatterKeySerializer.IntFormatterKeySerializer() -> void +Lifti.Serialization.Binary.KeySerializerBase +Lifti.Serialization.Binary.KeySerializerBase.KeySerializerBase(System.Action! dataWriter, System.Func! dataReader) -> void +Lifti.Serialization.Binary.KeySerializerBase.Read(System.IO.BinaryReader! reader) -> TKey +Lifti.Serialization.Binary.KeySerializerBase.Write(System.IO.BinaryWriter! writer, TKey key) -> void +Lifti.Serialization.Binary.StringFormatterKeySerializer +Lifti.Serialization.Binary.StringFormatterKeySerializer.StringFormatterKeySerializer() -> void +Lifti.Serialization.Binary.UIntFormatterKeySerializer +Lifti.Serialization.Binary.UIntFormatterKeySerializer.UIntFormatterKeySerializer() -> void +Lifti.Serialization.DeserializedDataCollector +Lifti.Serialization.DeserializedDataCollector.Add(T item) -> void +Lifti.Serialization.DeserializedDataCollector.DeserializedDataCollector(int expectedCount) -> void +Lifti.Serialization.DocumentMetadataCollector +Lifti.Serialization.DocumentMetadataCollector.DocumentMetadataCollector() -> void +Lifti.Serialization.DocumentMetadataCollector.DocumentMetadataCollector(int expectedCount) -> void +Lifti.Serialization.IIndexDeserializer +Lifti.Serialization.IIndexDeserializer.ReadAsync(Lifti.FullTextIndex! index, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.ValueTask +Lifti.Serialization.IndexDeserializerBase +Lifti.Serialization.IndexDeserializerBase.Dispose() -> void +Lifti.Serialization.IndexDeserializerBase.IndexDeserializerBase() -> void +Lifti.Serialization.IndexDeserializerBase.ReadAsync(Lifti.FullTextIndex! index, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +Lifti.Serialization.IndexSerializerBase +Lifti.Serialization.IndexSerializerBase.Dispose() -> void +Lifti.Serialization.IndexSerializerBase.IndexSerializerBase() -> void +Lifti.Serialization.IndexSerializerBase.WriteAsync(Lifti.IIndexSnapshot! snapshot, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +Lifti.Serialization.SerializedFieldCollector +Lifti.Serialization.SerializedFieldCollector.SerializedFieldCollector() -> void +Lifti.Serialization.SerializedFieldCollector.SerializedFieldCollector(int expectedCount) -> void +Lifti.Serialization.SerializedFieldIdMap +Lifti.Serialization.SerializedFieldIdMap.Map(byte serializedFieldId) -> byte +Lifti.Serialization.SerializedFieldIdMap.SerializedFieldIdMap() -> void +Lifti.Serialization.SerializedFieldInfo +Lifti.Serialization.SerializedFieldInfo.DynamicFieldReaderName.get -> string? +Lifti.Serialization.SerializedFieldInfo.DynamicFieldReaderName.set -> void +Lifti.Serialization.SerializedFieldInfo.FieldId.get -> byte +Lifti.Serialization.SerializedFieldInfo.FieldId.set -> void +Lifti.Serialization.SerializedFieldInfo.Kind.get -> Lifti.FieldKind +Lifti.Serialization.SerializedFieldInfo.Kind.set -> void +Lifti.Serialization.SerializedFieldInfo.Name.get -> string! +Lifti.Serialization.SerializedFieldInfo.Name.set -> void +Lifti.Serialization.SerializedFieldInfo.SerializedFieldInfo() -> void +Lifti.Serialization.SerializedFieldInfo.SerializedFieldInfo(byte FieldId, string! Name, Lifti.FieldKind Kind, string? DynamicFieldReaderName) -> void +Lifti.ThesaurusBuilder +Lifti.ThesaurusBuilder.WithHypernyms(string! word, params string![]! hypernyms) -> Lifti.ThesaurusBuilder! +Lifti.ThesaurusBuilder.WithHypernyms(string! word, System.Collections.Generic.IEnumerable! hypernyms) -> Lifti.ThesaurusBuilder! +Lifti.ThesaurusBuilder.WithHyponyms(string! word, params string![]! hyponyms) -> Lifti.ThesaurusBuilder! +Lifti.ThesaurusBuilder.WithHyponyms(string! word, System.Collections.Generic.IEnumerable! hyponyms) -> Lifti.ThesaurusBuilder! +Lifti.ThesaurusBuilder.WithSynonyms(params string![]! synonyms) -> Lifti.ThesaurusBuilder! +Lifti.ThesaurusBuilder.WithSynonyms(System.Collections.Generic.ICollection! synonyms) -> Lifti.ThesaurusBuilder! +Lifti.Tokenization.IIndexTokenizer +Lifti.Tokenization.IIndexTokenizer.IsSplitCharacter(char character) -> bool +Lifti.Tokenization.IIndexTokenizer.Normalize(System.ReadOnlySpan tokenText) -> string! +Lifti.Tokenization.IIndexTokenizer.Process(System.Collections.Generic.IEnumerable! input) -> System.Collections.Generic.IReadOnlyCollection! +Lifti.Tokenization.IIndexTokenizer.Process(System.ReadOnlySpan tokenText) -> System.Collections.Generic.IReadOnlyCollection! +Lifti.Tokenization.IndexTokenizer +Lifti.Tokenization.IndexTokenizer.IndexTokenizer(Lifti.TokenizationOptions! tokenizationOptions) -> void +Lifti.Tokenization.IndexTokenizer.Normalize(System.ReadOnlySpan tokenText) -> string! +Lifti.Tokenization.IndexTokenizer.Options.get -> Lifti.TokenizationOptions! +Lifti.Tokenization.IndexTokenizer.Process(System.Collections.Generic.IEnumerable! input) -> System.Collections.Generic.IReadOnlyCollection! +Lifti.Tokenization.IndexTokenizer.Process(System.ReadOnlySpan input) -> System.Collections.Generic.IReadOnlyCollection! +Lifti.Tokenization.IStemmer +Lifti.Tokenization.IStemmer.RequiresAccentInsensitivity.get -> bool +Lifti.Tokenization.IStemmer.RequiresCaseInsensitivity.get -> bool +Lifti.Tokenization.IStemmer.Stem(System.Text.StringBuilder! builder) -> void +Lifti.Tokenization.IThesaurus +Lifti.Tokenization.IThesaurus.Process(Lifti.Tokenization.Token! token) -> System.Collections.Generic.IEnumerable! +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder.Freshness(System.Func! freshnessProvider, double multiplier) -> Lifti.Tokenization.Objects.ObjectScoreBoostBuilder! +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder.Magnitude(System.Func! magnitudeProvider, double multiplier) -> Lifti.Tokenization.Objects.ObjectScoreBoostBuilder! +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder.ObjectScoreBoostBuilder() -> void +Lifti.Tokenization.Objects.ObjectScoreBoostOptions +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.FreshnessMultiplier.get -> double +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.MagnitudeMultiplier.get -> double +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.ObjectScoreBoostOptions(double magnitudeMultiplier, double freshnessMultiplier) -> void +Lifti.Tokenization.Objects.ObjectScoreBoostOptions +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.FreshnessProvider.get -> System.Func? +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.MagnitudeProvider.get -> System.Func? +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.ObjectScoreBoostOptions(double magnitudeMultiplier, System.Func? magnitudeProvider, double freshnessMultiplier, System.Func? freshnessProvider) -> void +Lifti.Tokenization.Objects.ObjectTokenizationBuilder +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.ObjectTokenizationBuilder() -> void +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func!>!>! dynamicFieldReader, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, System.Func! getFieldName, System.Func! getFieldText, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, System.Func! getFieldName, System.Func!>! getFieldText, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! reader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>!>! fieldTextReader, System.Func? tokenizationOptions = null, System.Func? thesaurusOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithKey(System.Func! keyReader) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithScoreBoosting(System.Action!>! scoreBoostingOptions) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Preprocessing.CaseInsensitiveNormalizer +Lifti.Tokenization.Preprocessing.CaseInsensitiveNormalizer.CaseInsensitiveNormalizer() -> void +Lifti.Tokenization.Preprocessing.CaseInsensitiveNormalizer.Preprocess(char input) -> Lifti.PreprocessedInput +Lifti.Tokenization.Preprocessing.IgnoredCharacterPreprocessor +Lifti.Tokenization.Preprocessing.IgnoredCharacterPreprocessor.IgnoredCharacterPreprocessor(System.Collections.Generic.IReadOnlyList! ignoreCharacters) -> void +Lifti.Tokenization.Preprocessing.IgnoredCharacterPreprocessor.Preprocess(char input) -> Lifti.PreprocessedInput +Lifti.Tokenization.Preprocessing.IInputPreprocessor +Lifti.Tokenization.Preprocessing.IInputPreprocessor.Preprocess(char input) -> Lifti.PreprocessedInput +Lifti.Tokenization.Preprocessing.IInputPreprocessorPipeline +Lifti.Tokenization.Preprocessing.IInputPreprocessorPipeline.Process(char input) -> System.Collections.Generic.IEnumerable! +Lifti.Tokenization.Preprocessing.InputPreprocessorPipeline +Lifti.Tokenization.Preprocessing.InputPreprocessorPipeline.InputPreprocessorPipeline(Lifti.TokenizationOptions! options) -> void +Lifti.Tokenization.Preprocessing.InputPreprocessorPipeline.Process(char input) -> System.Collections.Generic.IEnumerable! +Lifti.Tokenization.Preprocessing.LatinCharacterNormalizer +Lifti.Tokenization.Preprocessing.LatinCharacterNormalizer.LatinCharacterNormalizer() -> void +Lifti.Tokenization.Preprocessing.LatinCharacterNormalizer.Preprocess(char input) -> Lifti.PreprocessedInput +Lifti.Tokenization.TextExtraction.DocumentTextFragment +Lifti.Tokenization.TextExtraction.DocumentTextFragment.DocumentTextFragment() -> void +Lifti.Tokenization.TextExtraction.DocumentTextFragment.DocumentTextFragment(int offset, System.ReadOnlyMemory text) -> void +Lifti.Tokenization.TextExtraction.DocumentTextFragment.Equals(Lifti.Tokenization.TextExtraction.DocumentTextFragment other) -> bool +Lifti.Tokenization.TextExtraction.DocumentTextFragment.Offset.get -> int +Lifti.Tokenization.TextExtraction.DocumentTextFragment.Text.get -> System.ReadOnlyMemory +Lifti.Tokenization.TextExtraction.ITextExtractor +Lifti.Tokenization.TextExtraction.ITextExtractor.Extract(System.ReadOnlyMemory document, int startOffset = 0) -> System.Collections.Generic.IEnumerable! +Lifti.Tokenization.TextExtraction.PlainTextExtractor +Lifti.Tokenization.TextExtraction.PlainTextExtractor.Extract(System.ReadOnlyMemory document, int startOffset) -> System.Collections.Generic.IEnumerable! +Lifti.Tokenization.TextExtraction.PlainTextExtractor.PlainTextExtractor() -> void +Lifti.Tokenization.TextExtraction.XmlTextExtractor +Lifti.Tokenization.TextExtraction.XmlTextExtractor.Extract(System.ReadOnlyMemory document, int startOffset) -> System.Collections.Generic.IEnumerable! +Lifti.Tokenization.TextExtraction.XmlTextExtractor.XmlTextExtractor() -> void +Lifti.Tokenization.Token +Lifti.Tokenization.Token.AddLocation(Lifti.TokenLocation! location) -> void +Lifti.Tokenization.Token.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Tokenization.Token.Token(string! token, Lifti.TokenLocation! location) -> void +Lifti.Tokenization.Token.Token(string! token, params Lifti.TokenLocation![]! locations) -> void +Lifti.Tokenization.Token.Token(string! token, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.Tokenization.Token.Value.get -> string! +Lifti.TokenizationOptions +Lifti.TokenizationOptions.AccentInsensitive.get -> bool +Lifti.TokenizationOptions.AdditionalSplitCharacters.get -> System.Collections.Generic.IReadOnlyList! +Lifti.TokenizationOptions.CaseInsensitive.get -> bool +Lifti.TokenizationOptions.IgnoreCharacters.get -> System.Collections.Generic.IReadOnlyList! +Lifti.TokenizationOptions.SplitOnPunctuation.get -> bool +Lifti.TokenizationOptions.Stemmer.get -> Lifti.Tokenization.IStemmer? +Lifti.TokenizerBuilder +Lifti.TokenizerBuilder.AccentInsensitive(bool accentInsensitive = true) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.Build() -> Lifti.Tokenization.IIndexTokenizer! +Lifti.TokenizerBuilder.CaseInsensitive(bool caseInsensitive = true) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.IgnoreCharacters(params char[]! ignoreCharacters) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.SplitOnCharacters(params char[]! additionalSplitCharacters) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.SplitOnPunctuation(bool splitOnPunctuation = true) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.TokenizerBuilder() -> void +Lifti.TokenizerBuilder.WithFactory(System.Func! tokenizerFactory) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.WithStemming() -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.WithStemming(bool stemming = true) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.WithStemming(Lifti.Tokenization.IStemmer! stemmer) -> Lifti.TokenizerBuilder! +Lifti.TokenLocation +Lifti.TokenLocation.Length.get -> ushort +Lifti.TokenLocation.Start.get -> int +Lifti.TokenLocation.TokenIndex.get -> int +Lifti.TokenLocation.TokenLocation(int tokenIndex, int start, ushort length) -> void +override Lifti.ChildNodeMap.Equals(object? obj) -> bool +override Lifti.ChildNodeMap.GetHashCode() -> int +override Lifti.DocumentTokenMatchMap.Equals(object? obj) -> bool +override Lifti.DocumentTokenMatchMap.GetHashCode() -> int +override Lifti.FieldSearchResult.ToString() -> string! +override Lifti.FullTextIndex.ToString() -> string! +override Lifti.IndexedFieldDetails.ReadAsync(object! item, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask!> +override Lifti.IndexedToken.Equals(object? obj) -> bool +override Lifti.IndexedToken.GetHashCode() -> int +override Lifti.IndexNode.ToString() -> string! +override Lifti.PreprocessedInput.Equals(object? obj) -> bool +override Lifti.PreprocessedInput.GetHashCode() -> int +override Lifti.Querying.IntermediateQueryResult.Equals(object? obj) -> bool +override Lifti.Querying.IntermediateQueryResult.GetHashCode() -> int +override Lifti.Querying.IntermediateQueryResult.ToString() -> string! +override Lifti.Querying.Query.ToString() -> string? +override Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.ToString() -> string! +override Lifti.Querying.QueryParts.AndQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.AndQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence +override Lifti.Querying.QueryParts.AndQueryOperator.ToString() -> string! +override Lifti.Querying.QueryParts.BracketedQueryPart.ToString() -> string! +override Lifti.Querying.QueryParts.ExactWordQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.ExactWordQueryPart.ToString() -> string! +override Lifti.Querying.QueryParts.FieldFilterQueryOperator.ToString() -> string! +override Lifti.Querying.QueryParts.FuzzyMatchQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.FuzzyMatchQueryPart.ToString() -> string! +override Lifti.Querying.QueryParts.NearQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.NearQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence +override Lifti.Querying.QueryParts.NearQueryOperator.ToString() -> string! +override Lifti.Querying.QueryParts.OrQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.OrQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence +override Lifti.Querying.QueryParts.OrQueryOperator.ToString() -> string! +override Lifti.Querying.QueryParts.PrecedingNearQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.PrecedingNearQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence +override Lifti.Querying.QueryParts.PrecedingNearQueryOperator.ToString() -> string! +override Lifti.Querying.QueryParts.PrecedingQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.PrecedingQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence +override Lifti.Querying.QueryParts.PrecedingQueryOperator.ToString() -> string! +override Lifti.Querying.QueryParts.WildcardQueryFragment.Equals(object? obj) -> bool +override Lifti.Querying.QueryParts.WildcardQueryFragment.GetHashCode() -> int +override Lifti.Querying.QueryParts.WildcardQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.WildcardQueryPart.ToString() -> string! +override Lifti.Querying.QueryParts.WordQueryPart.RunWeightingCalculation(System.Func! navigatorCreator) -> double +override Lifti.Querying.ScoredFieldMatch.Equals(object? obj) -> bool +override Lifti.Querying.ScoredFieldMatch.GetHashCode() -> int +override Lifti.Querying.ScoredToken.Equals(object? obj) -> bool +override Lifti.Querying.ScoredToken.GetHashCode() -> int +override Lifti.SearchResult.ToString() -> string! +override Lifti.Tokenization.TextExtraction.DocumentTextFragment.Equals(object? obj) -> bool +override Lifti.Tokenization.TextExtraction.DocumentTextFragment.GetHashCode() -> int +override Lifti.TokenLocation.Equals(object? obj) -> bool +override Lifti.TokenLocation.GetHashCode() -> int +override Lifti.TokenLocation.ToString() -> string! +static Lifti.ChildNodeMap.Empty.get -> Lifti.ChildNodeMap +static Lifti.ChildNodeMap.operator !=(Lifti.ChildNodeMap left, Lifti.ChildNodeMap right) -> bool +static Lifti.ChildNodeMap.operator ==(Lifti.ChildNodeMap left, Lifti.ChildNodeMap right) -> bool +static Lifti.DocumentMetadata.ForLooseText(int documentId, TKey key, Lifti.DocumentStatistics documentStatistics) -> Lifti.DocumentMetadata! +static Lifti.DocumentMetadata.ForObject(byte objectTypeId, int documentId, TKey key, Lifti.DocumentStatistics documentStatistics, System.DateTime? scoringFreshnessDate, double? scoringMagnitude) -> Lifti.DocumentMetadata! +static Lifti.DocumentTokenMatchMap.Empty.get -> Lifti.DocumentTokenMatchMap +static Lifti.DocumentTokenMatchMap.operator !=(Lifti.DocumentTokenMatchMap left, Lifti.DocumentTokenMatchMap right) -> bool +static Lifti.DocumentTokenMatchMap.operator ==(Lifti.DocumentTokenMatchMap left, Lifti.DocumentTokenMatchMap right) -> bool +static Lifti.FullTextIndexExtensions.ParseQuery(this Lifti.IFullTextIndex! index, string! queryText) -> Lifti.Querying.IQuery! +static Lifti.IndexedToken.operator !=(Lifti.IndexedToken left, Lifti.IndexedToken right) -> bool +static Lifti.IndexedToken.operator ==(Lifti.IndexedToken left, Lifti.IndexedToken right) -> bool +static Lifti.PreprocessedInput.Empty.get -> Lifti.PreprocessedInput +static Lifti.PreprocessedInput.implicit operator Lifti.PreprocessedInput(char value) -> Lifti.PreprocessedInput +static Lifti.PreprocessedInput.implicit operator Lifti.PreprocessedInput(string! replacement) -> Lifti.PreprocessedInput +static Lifti.PreprocessedInput.operator !=(Lifti.PreprocessedInput left, Lifti.PreprocessedInput right) -> bool +static Lifti.PreprocessedInput.operator ==(Lifti.PreprocessedInput left, Lifti.PreprocessedInput right) -> bool +static Lifti.PreprocessedInput.ToPreprocessedInput(char value) -> Lifti.PreprocessedInput +static Lifti.PreprocessedInput.ToPreprocessedInput(string! value) -> Lifti.PreprocessedInput +static Lifti.Querying.IntermediateQueryResult.Empty.get -> Lifti.Querying.IntermediateQueryResult +static Lifti.Querying.IntermediateQueryResult.operator !=(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right) -> bool +static Lifti.Querying.IntermediateQueryResult.operator ==(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right) -> bool +static Lifti.Querying.Query.Empty.get -> Lifti.Querying.IQuery! +static Lifti.Querying.QueryContext.Empty.get -> Lifti.Querying.QueryContext! +static Lifti.Querying.QueryParts.AndQueryOperator.CombineAll(System.Collections.Generic.IEnumerable! queryParts) -> Lifti.Querying.QueryParts.IQueryPart! +static Lifti.Querying.QueryParts.EmptyQueryPart.Instance.get -> Lifti.Querying.QueryParts.EmptyQueryPart! +static Lifti.Querying.QueryParts.FieldFilterQueryOperator.CreateForField(Lifti.IIndexedFieldLookup! fieldLookup, string! fieldName, Lifti.Querying.QueryParts.IQueryPart! statement) -> Lifti.Querying.QueryParts.FieldFilterQueryOperator! +static Lifti.Querying.QueryParts.OrQueryOperator.CombineAll(System.Collections.Generic.IEnumerable! queryParts) -> Lifti.Querying.QueryParts.IQueryPart! +static Lifti.Querying.QueryParts.WildcardQueryFragment.CreateText(string! text) -> Lifti.Querying.QueryParts.WildcardQueryFragment +static Lifti.Querying.QueryParts.WildcardQueryFragment.MultiCharacter.get -> Lifti.Querying.QueryParts.WildcardQueryFragment +static Lifti.Querying.QueryParts.WildcardQueryFragment.operator !=(Lifti.Querying.QueryParts.WildcardQueryFragment left, Lifti.Querying.QueryParts.WildcardQueryFragment right) -> bool +static Lifti.Querying.QueryParts.WildcardQueryFragment.operator ==(Lifti.Querying.QueryParts.WildcardQueryFragment left, Lifti.Querying.QueryParts.WildcardQueryFragment right) -> bool +static Lifti.Querying.QueryParts.WildcardQueryFragment.SingleCharacter.get -> Lifti.Querying.QueryParts.WildcardQueryFragment +static Lifti.Querying.ScoredFieldMatch.operator !=(Lifti.Querying.ScoredFieldMatch? left, Lifti.Querying.ScoredFieldMatch? right) -> bool +static Lifti.Querying.ScoredFieldMatch.operator ==(Lifti.Querying.ScoredFieldMatch? left, Lifti.Querying.ScoredFieldMatch? right) -> bool +static Lifti.Querying.ScoredToken.operator !=(Lifti.Querying.ScoredToken left, Lifti.Querying.ScoredToken right) -> bool +static Lifti.Querying.ScoredToken.operator ==(Lifti.Querying.ScoredToken left, Lifti.Querying.ScoredToken right) -> bool +static Lifti.Tokenization.IndexTokenizer.Default.get -> Lifti.Tokenization.IndexTokenizer! +static Lifti.Tokenization.TextExtraction.DocumentTextFragment.operator !=(Lifti.Tokenization.TextExtraction.DocumentTextFragment left, Lifti.Tokenization.TextExtraction.DocumentTextFragment right) -> bool +static Lifti.Tokenization.TextExtraction.DocumentTextFragment.operator ==(Lifti.Tokenization.TextExtraction.DocumentTextFragment left, Lifti.Tokenization.TextExtraction.DocumentTextFragment right) -> bool +static Lifti.TokenizationOptions.Default.get -> Lifti.TokenizationOptions! +static Lifti.TokenLocation.operator !=(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator <(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator <=(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator ==(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator >(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator >=(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +System.Runtime.CompilerServices.IsExternalInit +System.Runtime.CompilerServices.IsExternalInit.IsExternalInit() -> void +virtual Lifti.Querying.QueryParts.BinaryQueryOperator.RunWeightingCalculation(System.Func! navigatorCreator) -> double +virtual Lifti.Querying.QueryParts.ScoreBoostedQueryPart.ToString(string! searchTerm) -> string! +virtual Lifti.ScoreBoostMetadata.CalculateScoreBoost(Lifti.DocumentMetadata! documentMetadata) -> double +virtual Lifti.Serialization.IndexDeserializerBase.Dispose(bool disposing) -> void +virtual Lifti.Serialization.IndexDeserializerBase.OnDeserializationCompleteAsync(Lifti.FullTextIndex! index, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Serialization.IndexDeserializerBase.OnDeserializationStartingAsync(System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Serialization.IndexSerializerBase.Dispose(bool disposing) -> void +virtual Lifti.Serialization.IndexSerializerBase.OnSerializationComplete(Lifti.IIndexSnapshot! snapshot, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Serialization.IndexSerializerBase.OnSerializationStart(Lifti.IIndexSnapshot! snapshot, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Tokenization.IndexTokenizer.IsSplitCharacter(char character) -> bool +virtual Lifti.TokenLocation.CompareTo(Lifti.TokenLocation? other) -> int +virtual Lifti.TokenLocation.Equals(Lifti.TokenLocation? other) -> bool diff --git a/src/Lifti.Core/PublicApi/PublicAPI.Unshipped.txt b/src/Lifti.Core/PublicApi/PublicAPI.Unshipped.txt new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/src/Lifti.Core/PublicApi/PublicAPI.Unshipped.txt @@ -0,0 +1 @@ + diff --git a/src/Lifti.Core/PublicAPI.Shipped.txt b/src/Lifti.Core/PublicApi/netstandard2/PublicAPI.Shipped.txt similarity index 53% rename from src/Lifti.Core/PublicAPI.Shipped.txt rename to src/Lifti.Core/PublicApi/netstandard2/PublicAPI.Shipped.txt index d8b917c9..69797c29 100644 --- a/src/Lifti.Core/PublicAPI.Shipped.txt +++ b/src/Lifti.Core/PublicApi/netstandard2/PublicAPI.Shipped.txt @@ -1,14 +1,66 @@ #nullable enable abstract Lifti.IndexedFieldDetails.ReadAsync(object! item, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask!> -abstract Lifti.Querying.QueryParts.BinaryQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +abstract Lifti.Querying.QueryParts.BinaryQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult abstract Lifti.Querying.QueryParts.BinaryQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence -abstract Lifti.Querying.QueryParts.WordQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +abstract Lifti.Querying.QueryParts.ScoreBoostedQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +abstract Lifti.Querying.QueryParts.ScoreBoostedQueryPart.RunWeightingCalculation(System.Func! navigatorCreator) -> double +abstract Lifti.Serialization.IndexDeserializerBase.DeserializeDocumentMetadataAsync(System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask!> +abstract Lifti.Serialization.IndexDeserializerBase.DeserializeIndexNodeHierarchyAsync(Lifti.Serialization.SerializedFieldIdMap serializedFieldIdMap, Lifti.IIndexNodeFactory! indexNodeFactory, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexDeserializerBase.DeserializeKnownFieldsAsync(System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexSerializerBase.WriteFieldsAsync(System.Collections.Generic.IReadOnlyList! fields, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexSerializerBase.WriteIndexMetadataAsync(Lifti.IIndexSnapshot! index, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +abstract Lifti.Serialization.IndexSerializerBase.WriteNodesAsync(Lifti.IndexNode! rootNode, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +Lifti.ChildNodeMap +Lifti.ChildNodeMap.CharacterMap.get -> System.Collections.Generic.IReadOnlyList! +Lifti.ChildNodeMap.ChildNodeMap() -> void +Lifti.ChildNodeMap.ChildNodeMap(Lifti.ChildNodeMapEntry[]! map) -> void +Lifti.ChildNodeMap.Count.get -> int +Lifti.ChildNodeMap.Equals(Lifti.ChildNodeMap other) -> bool +Lifti.ChildNodeMap.TryGetValue(char value, out Lifti.IndexNode? nextNode) -> bool +Lifti.ChildNodeMapEntry +Lifti.ChildNodeMapEntry.ChildChar.get -> char +Lifti.ChildNodeMapEntry.ChildChar.set -> void +Lifti.ChildNodeMapEntry.ChildNode.get -> Lifti.IndexNode! +Lifti.ChildNodeMapEntry.ChildNode.set -> void +Lifti.ChildNodeMapEntry.ChildNodeMapEntry() -> void +Lifti.ChildNodeMapEntry.ChildNodeMapEntry(char ChildChar, Lifti.IndexNode! ChildNode) -> void +Lifti.DocumentMetadata +Lifti.DocumentMetadata.DocumentMetadata(byte? objectTypeId, int documentId, Lifti.DocumentStatistics documentStatistics, System.DateTime? scoringFreshnessDate, double? scoringMagnitude) -> void +Lifti.DocumentMetadata.DocumentStatistics.get -> Lifti.DocumentStatistics +Lifti.DocumentMetadata.Id.get -> int +Lifti.DocumentMetadata.ObjectTypeId.get -> byte? +Lifti.DocumentMetadata.ScoringFreshnessDate.get -> System.DateTime? +Lifti.DocumentMetadata.ScoringMagnitude.get -> double? +Lifti.DocumentMetadata +Lifti.DocumentMetadata.Item.get -> TKey +Lifti.DocumentMetadata.Key.get -> TKey +Lifti.DocumentPhrases +Lifti.DocumentPhrases.DocumentPhrases(TObject item, Lifti.SearchResult! SearchResult, System.Collections.Generic.IReadOnlyList!>! phrases) -> void +Lifti.DocumentPhrases.Item.get -> TObject +Lifti.DocumentPhrases.Item.init -> void +Lifti.DocumentPhrases +Lifti.DocumentPhrases.DocumentPhrases(Lifti.SearchResult! SearchResult, System.Collections.Generic.IReadOnlyList!>! FieldPhrases) -> void +Lifti.DocumentPhrases.EnumeratePhrases() -> System.Collections.Generic.IEnumerable! +Lifti.DocumentPhrases.FieldPhrases.get -> System.Collections.Generic.IReadOnlyList!>! +Lifti.DocumentPhrases.FieldPhrases.init -> void +Lifti.DocumentPhrases.SearchResult.get -> Lifti.SearchResult! +Lifti.DocumentPhrases.SearchResult.init -> void Lifti.DocumentStatistics +Lifti.DocumentStatistics.DocumentStatistics() -> void +Lifti.DocumentStatistics.DocumentStatistics(System.Collections.Generic.IReadOnlyDictionary! tokenCountByField, int totalTokenCount) -> void Lifti.DocumentStatistics.TokenCountByField.get -> System.Collections.Generic.IReadOnlyDictionary! Lifti.DocumentStatistics.TotalTokenCount.get -> int -Lifti.DuplicateItemBehavior -Lifti.DuplicateItemBehavior.ReplaceItem = 0 -> Lifti.DuplicateItemBehavior -Lifti.DuplicateItemBehavior.ThrowException = 1 -> Lifti.DuplicateItemBehavior +Lifti.DocumentTokenMatchMap +Lifti.DocumentTokenMatchMap.Count.get -> int +Lifti.DocumentTokenMatchMap.DocumentTokenMatchMap() -> void +Lifti.DocumentTokenMatchMap.DocumentTokenMatchMap(System.Collections.Generic.Dictionary!>! data) -> void +Lifti.DocumentTokenMatchMap.Enumerate() -> System.Collections.Generic.IEnumerable<(int documentId, System.Collections.Generic.IReadOnlyList! indexedTokens)>! +Lifti.DocumentTokenMatchMap.Equals(Lifti.DocumentTokenMatchMap other) -> bool +Lifti.DocumentTokenMatchMap.HasDocument(int documentId) -> bool +Lifti.DocumentTokenMatchMap.TryGetValue(int documentId, out System.Collections.Generic.IReadOnlyList? tokens) -> bool +Lifti.DuplicateKeyBehavior +Lifti.DuplicateKeyBehavior.Replace = 0 -> Lifti.DuplicateKeyBehavior +Lifti.DuplicateKeyBehavior.ThrowException = 1 -> Lifti.DuplicateKeyBehavior Lifti.FieldKind Lifti.FieldKind.Dynamic = 2 -> Lifti.FieldKind Lifti.FieldKind.Static = 1 -> Lifti.FieldKind @@ -21,15 +73,15 @@ Lifti.FieldPhrases.FoundIn.init -> void Lifti.FieldPhrases.Phrases.get -> System.Collections.Generic.IReadOnlyList! Lifti.FieldPhrases.Phrases.init -> void Lifti.FieldSearchResult -Lifti.FieldSearchResult.FieldSearchResult(string! foundIn, double score, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.FieldSearchResult.FieldSearchResult(string! foundIn, double score, System.Collections.Generic.IReadOnlyList! locations) -> void Lifti.FieldSearchResult.FoundIn.get -> string! -Lifti.FieldSearchResult.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.FieldSearchResult.Locations.get -> System.Collections.Generic.IReadOnlyList! Lifti.FieldSearchResult.Score.get -> double Lifti.FullTextIndex -Lifti.FullTextIndex.AddAsync(TKey itemKey, string! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! -Lifti.FullTextIndex.AddAsync(TKey itemKey, System.Collections.Generic.IEnumerable! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! -Lifti.FullTextIndex.AddAsync(TItem item, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! -Lifti.FullTextIndex.AddRangeAsync(System.Collections.Generic.IEnumerable! items, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.AddAsync(TKey key, string! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.AddAsync(TKey key, System.Collections.Generic.IEnumerable! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.AddAsync(TObject item, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.AddRangeAsync(System.Collections.Generic.IEnumerable! items, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.FullTextIndex.BeginBatchChange() -> void Lifti.FullTextIndex.CommitBatchChangeAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.FullTextIndex.Count.get -> int @@ -40,9 +92,10 @@ Lifti.FullTextIndex.DefaultTokenizer.get -> Lifti.Tokenization.IIndexToken Lifti.FullTextIndex.Dispose() -> void Lifti.FullTextIndex.FieldLookup.get -> Lifti.IIndexedFieldLookup! Lifti.FullTextIndex.GetTokenizerForField(string! fieldName) -> Lifti.Tokenization.IIndexTokenizer! -Lifti.FullTextIndex.Items.get -> Lifti.IItemStore! +Lifti.FullTextIndex.Items.get -> Lifti.IIndexMetadata! +Lifti.FullTextIndex.Metadata.get -> Lifti.IIndexMetadata! Lifti.FullTextIndex.QueryParser.get -> Lifti.Querying.IQueryParser! -Lifti.FullTextIndex.RemoveAsync(TKey itemKey, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.FullTextIndex.RemoveAsync(TKey key, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.FullTextIndex.Root.get -> Lifti.IndexNode! Lifti.FullTextIndex.Search(Lifti.Querying.IQuery! query) -> Lifti.ISearchResults! Lifti.FullTextIndex.Search(string! searchText) -> Lifti.ISearchResults! @@ -52,12 +105,12 @@ Lifti.FullTextIndexBuilder.Build() -> Lifti.FullTextIndex! Lifti.FullTextIndexBuilder.FullTextIndexBuilder() -> void Lifti.FullTextIndexBuilder.WithDefaultThesaurus(System.Func! thesaurusBuilder) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithDefaultTokenization(System.Func! optionsBuilder) -> Lifti.FullTextIndexBuilder! -Lifti.FullTextIndexBuilder.WithDuplicateItemBehavior(Lifti.DuplicateItemBehavior duplicateItemBehavior) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithDuplicateKeyBehavior(Lifti.DuplicateKeyBehavior duplicateKeyBehavior) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithIndexModificationAction(System.Action!>! action) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithIndexModificationAction(System.Func!, System.Threading.CancellationToken, System.Threading.Tasks.Task!>! asyncAction) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithIndexModificationAction(System.Func!, System.Threading.Tasks.Task!>! asyncAction) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithIntraNodeTextSupportedAfterIndexDepth(int depth) -> Lifti.FullTextIndexBuilder! -Lifti.FullTextIndexBuilder.WithObjectTokenization(System.Func!, Lifti.Tokenization.Objects.ObjectTokenizationBuilder!>! optionsBuilder) -> Lifti.FullTextIndexBuilder! +Lifti.FullTextIndexBuilder.WithObjectTokenization(System.Func!, Lifti.Tokenization.Objects.ObjectTokenizationBuilder!>! optionsBuilder) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithQueryParser(Lifti.Querying.IQueryParser! queryParser) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithQueryParser(System.Func! optionsBuilder) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithScorerFactory(Lifti.Querying.IIndexScorerFactory! scorerFactory) -> Lifti.FullTextIndexBuilder! @@ -66,16 +119,11 @@ Lifti.FullTextIndexBuilder.WithSimpleQueryParser(System.Func.WithTextExtractor(Lifti.Tokenization.TextExtraction.ITextExtractor! textExtractor) -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexBuilder.WithTextExtractor() -> Lifti.FullTextIndexBuilder! Lifti.FullTextIndexExtensions -Lifti.IdPool -Lifti.IdPool.Add(int id, T item, Lifti.DocumentStatistics! documentStatistics) -> void -Lifti.IdPool.Add(T item, Lifti.DocumentStatistics! documentStatistics) -> int -Lifti.IdPool.IdPool() -> void -Lifti.IdPool.ReleaseItem(T item) -> int Lifti.IFullTextIndex -Lifti.IFullTextIndex.AddAsync(TKey itemKey, string! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! -Lifti.IFullTextIndex.AddAsync(TKey itemKey, System.Collections.Generic.IEnumerable! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! -Lifti.IFullTextIndex.AddAsync(TItem item, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! -Lifti.IFullTextIndex.AddRangeAsync(System.Collections.Generic.IEnumerable! items, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.AddAsync(TKey key, string! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.AddAsync(TKey key, System.Collections.Generic.IEnumerable! text, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.AddAsync(TObject item, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.AddRangeAsync(System.Collections.Generic.IEnumerable! items, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.IFullTextIndex.BeginBatchChange() -> void Lifti.IFullTextIndex.CommitBatchChangeAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.IFullTextIndex.Count.get -> int @@ -83,9 +131,10 @@ Lifti.IFullTextIndex.CreateNavigator() -> Lifti.Querying.IIndexNavigator! Lifti.IFullTextIndex.DefaultTextExtractor.get -> Lifti.Tokenization.TextExtraction.ITextExtractor! Lifti.IFullTextIndex.DefaultThesaurus.get -> Lifti.Tokenization.IThesaurus! Lifti.IFullTextIndex.FieldLookup.get -> Lifti.IIndexedFieldLookup! -Lifti.IFullTextIndex.Items.get -> Lifti.IItemStore! +Lifti.IFullTextIndex.Items.get -> Lifti.IIndexMetadata! +Lifti.IFullTextIndex.Metadata.get -> Lifti.IIndexMetadata! Lifti.IFullTextIndex.QueryParser.get -> Lifti.Querying.IQueryParser! -Lifti.IFullTextIndex.RemoveAsync(TKey itemKey, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.IFullTextIndex.RemoveAsync(TKey key, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.IFullTextIndex.Search(Lifti.Querying.IQuery! query) -> Lifti.ISearchResults! Lifti.IFullTextIndex.Search(string! searchText) -> Lifti.ISearchResults! Lifti.IFullTextIndex.Snapshot.get -> Lifti.IIndexSnapshot! @@ -95,113 +144,106 @@ Lifti.IIndexedFieldLookup.DefaultField.get -> byte Lifti.IIndexedFieldLookup.GetFieldForId(byte id) -> string! Lifti.IIndexedFieldLookup.GetFieldInfo(string! fieldName) -> Lifti.IndexedFieldDetails! Lifti.IIndexedFieldLookup.IsKnownField(System.Type! objectType, string! fieldName) -> bool +Lifti.IIndexMetadata +Lifti.IIndexMetadata.Count.get -> int +Lifti.IIndexMetadata.DocumentCount.get -> int +Lifti.IIndexMetadata.GetDocumentMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetObjectTypeScoreBoostMetadata(byte objectTypeId) -> Lifti.ScoreBoostMetadata! +Lifti.IIndexMetadata.IndexStatistics.get -> Lifti.IndexStatistics! +Lifti.IIndexMetadata +Lifti.IIndexMetadata.Contains(TKey key) -> bool +Lifti.IIndexMetadata.GetDocumentMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetIndexedDocuments() -> System.Collections.Generic.IEnumerable!>! +Lifti.IIndexMetadata.GetMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IIndexMetadata.GetMetadata(TKey key) -> Lifti.DocumentMetadata! Lifti.IIndexNodeFactory -Lifti.IIndexNodeFactory.CreateNode(System.ReadOnlyMemory intraNodeText, System.Collections.Immutable.ImmutableDictionary! childNodes, System.Collections.Immutable.ImmutableDictionary!>! matches) -> Lifti.IndexNode! +Lifti.IIndexNodeFactory.CreateNode(System.ReadOnlyMemory intraNodeText, Lifti.ChildNodeMap childNodes, Lifti.DocumentTokenMatchMap matches) -> Lifti.IndexNode! Lifti.IIndexNodeFactory.CreateRootNode() -> Lifti.IndexNode! Lifti.IIndexNodeFactory.GetIndexSupportLevelForDepth(int depth) -> Lifti.IndexSupportLevelKind Lifti.IIndexSnapshot Lifti.IIndexSnapshot.CreateNavigator() -> Lifti.Querying.IIndexNavigator! Lifti.IIndexSnapshot.FieldLookup.get -> Lifti.IIndexedFieldLookup! -Lifti.IIndexSnapshot.Items.get -> Lifti.IItemStore! +Lifti.IIndexSnapshot.Items.get -> Lifti.IIndexMetadata! +Lifti.IIndexSnapshot.Metadata.get -> Lifti.IIndexMetadata! Lifti.IIndexSnapshot.Root.get -> Lifti.IndexNode! Lifti.IIndexSnapshot -Lifti.IIndexSnapshot.Items.get -> Lifti.IItemStore! +Lifti.IIndexSnapshot.Items.get -> Lifti.IIndexMetadata! +Lifti.IIndexSnapshot.Metadata.get -> Lifti.IIndexMetadata! Lifti.IIndexTokenizerProvider Lifti.IIndexTokenizerProvider.DefaultTokenizer.get -> Lifti.Tokenization.IIndexTokenizer! Lifti.IIndexTokenizerProvider.GetTokenizerForField(string! fieldName) -> Lifti.Tokenization.IIndexTokenizer! -Lifti.IItemMetadata -Lifti.IItemMetadata.DocumentStatistics.get -> Lifti.DocumentStatistics! -Lifti.IItemMetadata.Id.get -> int -Lifti.IItemMetadata -Lifti.IItemMetadata.Item.get -> TKey -Lifti.IItemStore -Lifti.IItemStore.Count.get -> int -Lifti.IItemStore.GetMetadata(int id) -> Lifti.IItemMetadata! -Lifti.IItemStore.IndexStatistics.get -> Lifti.IndexStatistics! -Lifti.IItemStore -Lifti.IItemStore.Contains(TKey item) -> bool -Lifti.IItemStore.GetIndexedItems() -> System.Collections.Generic.IEnumerable!>! -Lifti.IItemStore.GetMetadata(int id) -> Lifti.IItemMetadata! -Lifti.IItemStore.GetMetadata(TKey item) -> Lifti.IItemMetadata! -Lifti.IItemStore.Snapshot() -> Lifti.IItemStore! Lifti.IndexedFieldDetails Lifti.IndexedFieldDetails.DynamicFieldReaderName.get -> string? Lifti.IndexedFieldDetails.FieldKind.get -> Lifti.FieldKind Lifti.IndexedFieldDetails.Id.get -> byte Lifti.IndexedFieldDetails.Name.get -> string! Lifti.IndexedFieldDetails.ObjectType.get -> System.Type! +Lifti.IndexedFieldDetails.ScoreBoost.get -> double Lifti.IndexedFieldDetails.TextExtractor.get -> Lifti.Tokenization.TextExtraction.ITextExtractor! Lifti.IndexedFieldDetails.Thesaurus.get -> Lifti.Tokenization.IThesaurus! Lifti.IndexedFieldDetails.Tokenizer.get -> Lifti.Tokenization.IIndexTokenizer! -Lifti.IndexedFieldDetails +Lifti.IndexedFieldDetails Lifti.IndexedToken Lifti.IndexedToken.Equals(Lifti.IndexedToken other) -> bool Lifti.IndexedToken.FieldId.get -> byte Lifti.IndexedToken.IndexedToken() -> void -Lifti.IndexedToken.IndexedToken(byte fieldId, params Lifti.TokenLocation[]! locations) -> void -Lifti.IndexedToken.IndexedToken(byte fieldId, System.Collections.Generic.IReadOnlyList! locations) -> void -Lifti.IndexedToken.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.IndexedToken.IndexedToken(byte fieldId, params Lifti.TokenLocation![]! locations) -> void +Lifti.IndexedToken.IndexedToken(byte fieldId, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.IndexedToken.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.IndexMetadata +Lifti.IndexMetadata.Add(Lifti.DocumentMetadata! documentMetadata) -> void +Lifti.IndexMetadata.Add(TKey key, Lifti.DocumentStatistics documentStatistics) -> int +Lifti.IndexMetadata.Contains(TKey key) -> bool +Lifti.IndexMetadata.Count.get -> int +Lifti.IndexMetadata.DocumentCount.get -> int +Lifti.IndexMetadata.GetDocumentMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IndexMetadata.GetIndexedDocuments() -> System.Collections.Generic.IEnumerable!>! +Lifti.IndexMetadata.GetMetadata(int documentId) -> Lifti.DocumentMetadata! +Lifti.IndexMetadata.GetMetadata(TKey key) -> Lifti.DocumentMetadata! +Lifti.IndexMetadata.GetObjectTypeScoreBoostMetadata(byte objectTypeId) -> Lifti.ScoreBoostMetadata! +Lifti.IndexMetadata.IndexStatistics.get -> Lifti.IndexStatistics! Lifti.IndexNode -Lifti.IndexNode.ChildNodes.get -> System.Collections.Immutable.ImmutableDictionary! +Lifti.IndexNode.ChildNodes.get -> Lifti.ChildNodeMap Lifti.IndexNode.HasChildNodes.get -> bool Lifti.IndexNode.HasMatches.get -> bool Lifti.IndexNode.IntraNodeText.get -> System.ReadOnlyMemory Lifti.IndexNode.IsEmpty.get -> bool -Lifti.IndexNode.Matches.get -> System.Collections.Immutable.ImmutableDictionary!>! +Lifti.IndexNode.Matches.get -> Lifti.DocumentTokenMatchMap Lifti.IndexNodeFactory -Lifti.IndexNodeFactory.CreateNode(System.ReadOnlyMemory intraNodeText, System.Collections.Immutable.ImmutableDictionary! childNodes, System.Collections.Immutable.ImmutableDictionary!>! matches) -> Lifti.IndexNode! +Lifti.IndexNodeFactory.CreateNode(System.ReadOnlyMemory intraNodeText, Lifti.ChildNodeMap childNodes, Lifti.DocumentTokenMatchMap matches) -> Lifti.IndexNode! Lifti.IndexNodeFactory.CreateRootNode() -> Lifti.IndexNode! Lifti.IndexNodeFactory.GetIndexSupportLevelForDepth(int depth) -> Lifti.IndexSupportLevelKind Lifti.IndexNodeFactory.IndexNodeFactory(Lifti.IndexOptions! options) -> void Lifti.IndexOptions -Lifti.IndexOptions.DuplicateItemBehavior.get -> Lifti.DuplicateItemBehavior +Lifti.IndexOptions.DuplicateItemBehavior.get -> Lifti.DuplicateKeyBehavior +Lifti.IndexOptions.DuplicateKeyBehavior.get -> Lifti.DuplicateKeyBehavior Lifti.IndexOptions.SupportIntraNodeTextAfterIndexDepth.get -> int Lifti.IndexSnapshot Lifti.IndexSnapshot.CreateNavigator() -> Lifti.Querying.IIndexNavigator! Lifti.IndexSnapshot.FieldLookup.get -> Lifti.IIndexedFieldLookup! -Lifti.IndexSnapshot.Items.get -> Lifti.IItemStore! +Lifti.IndexSnapshot.Items.get -> Lifti.IIndexMetadata! +Lifti.IndexSnapshot.Metadata.get -> Lifti.IIndexMetadata! Lifti.IndexSnapshot.Root.get -> Lifti.IndexNode! Lifti.IndexStatistics -Lifti.IndexStatistics.TokenCountByField.get -> System.Collections.Immutable.ImmutableDictionary! +Lifti.IndexStatistics.GetFieldTokenCount(byte fieldId) -> long +Lifti.IndexStatistics.TokenCountByField.get -> System.Collections.Generic.IReadOnlyDictionary! Lifti.IndexStatistics.TotalTokenCount.get -> long Lifti.IndexSupportLevelKind Lifti.IndexSupportLevelKind.CharacterByCharacter = 0 -> Lifti.IndexSupportLevelKind Lifti.IndexSupportLevelKind.IntraNodeText = 1 -> Lifti.IndexSupportLevelKind Lifti.ISearchResults -Lifti.ISearchResults.CreateMatchPhrases(System.Func! loadText) -> System.Collections.Generic.IEnumerable!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadTextAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadTextAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Collections.Generic.IReadOnlyList!>! loadItems, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Threading.CancellationToken, System.Threading.Tasks.ValueTask!>>! loadItemsAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Threading.Tasks.ValueTask!>>! loadItemsAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadItemAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadItemAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! -Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func! loadItem, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.Count.get -> int +Lifti.ISearchResults.CreateMatchPhrases(System.Func! loadText) -> System.Collections.Generic.IEnumerable!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadTextAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadTextAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Collections.Generic.IReadOnlyList!>! loadItems, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Threading.CancellationToken, System.Threading.Tasks.ValueTask!>>! loadItemsAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func!, System.Threading.Tasks.ValueTask!>>! loadItemsAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadItemAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func>! loadItemAsync, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! +Lifti.ISearchResults.CreateMatchPhrasesAsync(System.Func! loadItem, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task!>!>! Lifti.ISearchResults.OrderByField(string! fieldName) -> Lifti.ISearchResults! -Lifti.ItemMetadata -Lifti.ItemMetadata.DocumentStatistics.get -> Lifti.DocumentStatistics! -Lifti.ItemMetadata.Id.get -> int -Lifti.ItemMetadata.Item.get -> T -Lifti.ItemMetadata.ItemMetadata(int id, T item, Lifti.DocumentStatistics! documentStatistics) -> void -Lifti.ItemPhrases -Lifti.ItemPhrases.Item.get -> TItem -Lifti.ItemPhrases.Item.init -> void -Lifti.ItemPhrases.ItemPhrases(TItem item, Lifti.SearchResult! SearchResult, System.Collections.Generic.IReadOnlyList!>! phrases) -> void -Lifti.ItemPhrases -Lifti.ItemPhrases.EnumeratePhrases() -> System.Collections.Generic.IEnumerable! -Lifti.ItemPhrases.FieldPhrases.get -> System.Collections.Generic.IReadOnlyList!>! -Lifti.ItemPhrases.FieldPhrases.init -> void -Lifti.ItemPhrases.ItemPhrases(Lifti.SearchResult! SearchResult, System.Collections.Generic.IReadOnlyList!>! FieldPhrases) -> void -Lifti.ItemPhrases.SearchResult.get -> Lifti.SearchResult! -Lifti.ItemPhrases.SearchResult.init -> void -Lifti.ItemStore -Lifti.ItemStore.Contains(T item) -> bool -Lifti.ItemStore.Count.get -> int -Lifti.ItemStore.GetIndexedItems() -> System.Collections.Generic.IEnumerable!>! -Lifti.ItemStore.GetMetadata(int id) -> Lifti.IItemMetadata! -Lifti.ItemStore.GetMetadata(T item) -> Lifti.IItemMetadata! -Lifti.ItemStore.IndexStatistics.get -> Lifti.IndexStatistics! -Lifti.ItemStore.Snapshot() -> Lifti.IItemStore! Lifti.LiftiException Lifti.LiftiException.LiftiException() -> void Lifti.LiftiException.LiftiException(string! message) -> void @@ -215,60 +257,49 @@ Lifti.PreprocessedInput.PreprocessedInput(char value) -> void Lifti.PreprocessedInput.PreprocessedInput(string! replacement) -> void Lifti.PreprocessedInput.Replacement.get -> string? Lifti.PreprocessedInput.Value.get -> char -Lifti.Querying.CompositePositionalIntersectMerger -Lifti.Querying.CompositePositionalIntersectMerger.CompositePositionalIntersectMerger() -> void -Lifti.Querying.CompositeTokenMatchLocation -Lifti.Querying.CompositeTokenMatchLocation.CompositeTokenMatchLocation() -> void -Lifti.Querying.CompositeTokenMatchLocation.CompositeTokenMatchLocation(Lifti.Querying.ITokenLocationMatch! leftToken, Lifti.Querying.ITokenLocationMatch! rightToken) -> void -Lifti.Querying.CompositeTokenMatchLocation.Equals(Lifti.Querying.CompositeTokenMatchLocation other) -> bool -Lifti.Querying.CompositeTokenMatchLocation.GetLocations() -> System.Collections.Generic.IEnumerable! -Lifti.Querying.CompositeTokenMatchLocation.MaxTokenIndex.get -> int -Lifti.Querying.CompositeTokenMatchLocation.MinTokenIndex.get -> int -Lifti.Querying.FieldMatch -Lifti.Querying.FieldMatch.Equals(Lifti.Querying.FieldMatch other) -> bool -Lifti.Querying.FieldMatch.FieldId.get -> byte -Lifti.Querying.FieldMatch.FieldMatch() -> void -Lifti.Querying.FieldMatch.FieldMatch(byte fieldId, System.Collections.Generic.IEnumerable! locations) -> void -Lifti.Querying.FieldMatch.FieldMatch(Lifti.IndexedToken token) -> void -Lifti.Querying.FieldMatch.GetTokenLocations() -> System.Collections.Generic.IReadOnlyList! -Lifti.Querying.FieldMatch.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Querying.FieldScoreBoostProvider +Lifti.Querying.FieldScoreBoostProvider.FieldScoreBoostProvider(Lifti.IIndexedFieldLookup! fieldLookup) -> void +Lifti.Querying.FieldScoreBoostProvider.GetScoreBoost(byte fieldId) -> double +Lifti.Querying.IFieldScoreBoostProvider +Lifti.Querying.IFieldScoreBoostProvider.GetScoreBoost(byte fieldId) -> double Lifti.Querying.IIndexNavigator +Lifti.Querying.IIndexNavigator.AddExactAndChildMatches(Lifti.Querying.QueryContext! queryContext, Lifti.Querying.DocumentMatchCollector! documentMatchCollector, double weighting = 1) -> void +Lifti.Querying.IIndexNavigator.AddExactMatches(Lifti.Querying.QueryContext! queryContext, Lifti.Querying.DocumentMatchCollector! documentMatchCollector, double weighting = 1) -> void Lifti.Querying.IIndexNavigator.CreateBookmark() -> Lifti.Querying.IIndexNavigatorBookmark! Lifti.Querying.IIndexNavigator.EnumerateIndexedTokens() -> System.Collections.Generic.IEnumerable! Lifti.Querying.IIndexNavigator.EnumerateNextCharacters() -> System.Collections.Generic.IEnumerable! +Lifti.Querying.IIndexNavigator.ExactMatchCount() -> int Lifti.Querying.IIndexNavigator.GetExactAndChildMatches(double weighting = 1) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IIndexNavigator.GetExactAndChildMatches(Lifti.Querying.QueryContext! queryContext, double weighting = 1) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.IIndexNavigator.GetExactMatches(double weighting = 1) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IIndexNavigator.GetExactMatches(Lifti.Querying.QueryContext! queryContext, double weighting = 1) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.IIndexNavigator.HasExactMatches.get -> bool Lifti.Querying.IIndexNavigator.Process(char value) -> bool Lifti.Querying.IIndexNavigator.Process(string! text) -> bool Lifti.Querying.IIndexNavigator.Process(System.ReadOnlySpan text) -> bool +Lifti.Querying.IIndexNavigator.Snapshot.get -> Lifti.IIndexSnapshot! Lifti.Querying.IIndexNavigatorBookmark Lifti.Querying.IIndexNavigatorBookmark.Apply() -> void Lifti.Querying.IIndexScorerFactory Lifti.Querying.IIndexScorerFactory.CreateIndexScorer(Lifti.IIndexSnapshot! indexSnapshot) -> Lifti.Querying.IScorer! Lifti.Querying.IntermediateQueryResult Lifti.Querying.IntermediateQueryResult.CompositePositionalIntersect(Lifti.Querying.IntermediateQueryResult results, int leftTolerance, int rightTolerance) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.IntermediateQueryResult.Equals(Lifti.Querying.IntermediateQueryResult other) -> bool Lifti.Querying.IntermediateQueryResult.IntermediateQueryResult() -> void -Lifti.Querying.IntermediateQueryResult.IntermediateQueryResult(System.Collections.Generic.IEnumerable! matches) -> void Lifti.Querying.IntermediateQueryResult.Intersect(Lifti.Querying.IntermediateQueryResult results) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.IntermediateQueryResult.Matches.get -> System.Collections.Generic.IReadOnlyList! Lifti.Querying.IntermediateQueryResult.PrecedingIntersect(Lifti.Querying.IntermediateQueryResult results) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.IntermediateQueryResult.Union(Lifti.Querying.IntermediateQueryResult results) -> Lifti.Querying.IntermediateQueryResult -Lifti.Querying.IntermediateQueryResultMerger -Lifti.Querying.IntermediateQueryResultMerger.IntermediateQueryResultMerger() -> void -Lifti.Querying.IntersectMerger -Lifti.Querying.IntersectMerger.IntersectMerger() -> void Lifti.Querying.IQuery Lifti.Querying.IQuery.Execute(Lifti.IIndexSnapshot! index) -> System.Collections.Generic.IEnumerable!>! Lifti.Querying.IQuery.Root.get -> Lifti.Querying.QueryParts.IQueryPart! Lifti.Querying.IQueryParser Lifti.Querying.IQueryParser.Parse(Lifti.IIndexedFieldLookup! fieldLookup, string! queryText, Lifti.IIndexTokenizerProvider! tokenizerProvider) -> Lifti.Querying.IQuery! Lifti.Querying.IScorer -Lifti.Querying.IScorer.Score(System.Collections.Generic.IReadOnlyList! tokens, double weighting) -> System.Collections.Generic.IReadOnlyList! -Lifti.Querying.ITokenLocationMatch -Lifti.Querying.ITokenLocationMatch.GetLocations() -> System.Collections.Generic.IEnumerable! -Lifti.Querying.ITokenLocationMatch.MaxTokenIndex.get -> int -Lifti.Querying.ITokenLocationMatch.MinTokenIndex.get -> int +Lifti.Querying.IScorer.CalculateScore(int totalMatchedDocuments, int documentId, byte fieldId, System.Collections.Generic.IReadOnlyList! tokenLocations, double weighting) -> double +Lifti.Querying.DocumentMatchCollector +Lifti.Querying.DocumentMatchCollector.DocumentMatchCollector() -> void +Lifti.Querying.DocumentMatchCollector.ToIntermediateQueryResult() -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.OkapiBm25ScorerFactory Lifti.Querying.OkapiBm25ScorerFactory.CreateIndexScorer(Lifti.IIndexSnapshot! indexSnapshot) -> Lifti.Querying.IScorer! Lifti.Querying.OkapiBm25ScorerFactory.OkapiBm25ScorerFactory(double k1 = 1.2, double b = 0.75) -> void @@ -276,12 +307,16 @@ Lifti.Querying.OperatorPrecedence Lifti.Querying.OperatorPrecedence.And = 1 -> Lifti.Querying.OperatorPrecedence Lifti.Querying.OperatorPrecedence.Or = 2 -> Lifti.Querying.OperatorPrecedence Lifti.Querying.OperatorPrecedence.Positional = 0 -> Lifti.Querying.OperatorPrecedence -Lifti.Querying.PrecedingIntersectMerger -Lifti.Querying.PrecedingIntersectMerger.PrecedingIntersectMerger() -> void Lifti.Querying.Query Lifti.Querying.Query.Execute(Lifti.IIndexSnapshot! index) -> System.Collections.Generic.IEnumerable!>! Lifti.Querying.Query.Query(Lifti.Querying.QueryParts.IQueryPart! root) -> void Lifti.Querying.Query.Root.get -> Lifti.Querying.QueryParts.IQueryPart! +Lifti.Querying.QueryContext +Lifti.Querying.QueryContext.FilterToDocumentIds.get -> System.Collections.Generic.ISet? +Lifti.Querying.QueryContext.FilterToDocumentIds.init -> void +Lifti.Querying.QueryContext.FilterToFieldId.get -> byte? +Lifti.Querying.QueryContext.FilterToFieldId.init -> void +Lifti.Querying.QueryContext.QueryContext(byte? FilterToFieldId = null, System.Collections.Generic.ISet? FilterToDocumentIds = null) -> void Lifti.Querying.QueryParserBuilder Lifti.Querying.QueryParserBuilder.AssumeFuzzySearchTerms(bool fuzzySearchByDefault = true) -> Lifti.Querying.QueryParserBuilder! Lifti.Querying.QueryParserBuilder.Build() -> Lifti.Querying.IQueryParser! @@ -302,42 +337,47 @@ Lifti.Querying.QueryParserOptions.FuzzySearchMaxEditDistance.get -> System.Func< Lifti.Querying.QueryParserOptions.FuzzySearchMaxSequentialEdits.get -> System.Func! Lifti.Querying.QueryParts.AdjacentWordsQueryOperator Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.AdjacentWordsQueryOperator(System.Collections.Generic.IReadOnlyList! words) -> void -Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.Words.get -> System.Collections.Generic.IReadOnlyList! Lifti.Querying.QueryParts.AndQueryOperator Lifti.Querying.QueryParts.AndQueryOperator.AndQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right) -> void Lifti.Querying.QueryParts.BinaryQueryOperator Lifti.Querying.QueryParts.BinaryQueryOperator.BinaryQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right) -> void +Lifti.Querying.QueryParts.BinaryQueryOperator.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.BinaryQueryOperator.EvaluateWithDocumentIntersection(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> (Lifti.Querying.IntermediateQueryResult leftResults, Lifti.Querying.IntermediateQueryResult rightResults) Lifti.Querying.QueryParts.BinaryQueryOperator.Left.get -> Lifti.Querying.QueryParts.IQueryPart! Lifti.Querying.QueryParts.BinaryQueryOperator.Left.set -> void Lifti.Querying.QueryParts.BinaryQueryOperator.Right.get -> Lifti.Querying.QueryParts.IQueryPart! Lifti.Querying.QueryParts.BinaryQueryOperator.Right.set -> void Lifti.Querying.QueryParts.BracketedQueryPart Lifti.Querying.QueryParts.BracketedQueryPart.BracketedQueryPart(Lifti.Querying.QueryParts.IQueryPart! statement) -> void -Lifti.Querying.QueryParts.BracketedQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.BracketedQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.BracketedQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.QueryParts.BracketedQueryPart.Statement.get -> Lifti.Querying.QueryParts.IQueryPart! Lifti.Querying.QueryParts.EmptyQueryPart -Lifti.Querying.QueryParts.EmptyQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.EmptyQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.EmptyQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.QueryParts.ExactWordQueryPart -Lifti.Querying.QueryParts.ExactWordQueryPart.ExactWordQueryPart(string! word) -> void +Lifti.Querying.QueryParts.ExactWordQueryPart.ExactWordQueryPart(string! word, double? scoreBoost = null) -> void Lifti.Querying.QueryParts.FieldFilterQueryOperator -Lifti.Querying.QueryParts.FieldFilterQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.FieldFilterQueryOperator.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.FieldFilterQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.QueryParts.FieldFilterQueryOperator.FieldFilterQueryOperator(string! fieldName, byte fieldId, Lifti.Querying.QueryParts.IQueryPart! statement) -> void Lifti.Querying.QueryParts.FieldFilterQueryOperator.FieldId.get -> byte Lifti.Querying.QueryParts.FieldFilterQueryOperator.FieldName.get -> string! Lifti.Querying.QueryParts.FieldFilterQueryOperator.Statement.get -> Lifti.Querying.QueryParts.IQueryPart! Lifti.Querying.QueryParts.FuzzyMatchQueryPart -Lifti.Querying.QueryParts.FuzzyMatchQueryPart.FuzzyMatchQueryPart(string! word, ushort maxEditDistance = 4, ushort maxSequentialEdits = 1) -> void +Lifti.Querying.QueryParts.FuzzyMatchQueryPart.FuzzyMatchQueryPart(string! word, ushort maxEditDistance = 4, ushort maxSequentialEdits = 1, double? scoreBoost = null) -> void Lifti.Querying.QueryParts.IBinaryQueryOperator Lifti.Querying.QueryParts.IBinaryQueryOperator.Left.get -> Lifti.Querying.QueryParts.IQueryPart! Lifti.Querying.QueryParts.IBinaryQueryOperator.Left.set -> void Lifti.Querying.QueryParts.IBinaryQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence Lifti.Querying.QueryParts.IBinaryQueryOperator.Right.get -> Lifti.Querying.QueryParts.IQueryPart! Lifti.Querying.QueryParts.IBinaryQueryOperator.Right.set -> void -Lifti.Querying.QueryParts.IQueryContext -Lifti.Querying.QueryParts.IQueryContext.ApplyTo(Lifti.Querying.IntermediateQueryResult intermediateQueryResult) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.QueryParts.IQueryPart -Lifti.Querying.QueryParts.IQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +Lifti.Querying.QueryParts.IQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.IQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.QueryParts.NearQueryOperator Lifti.Querying.QueryParts.NearQueryOperator.NearQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right, int tolerance = 5) -> void Lifti.Querying.QueryParts.NearQueryOperator.Tolerance.get -> int @@ -348,6 +388,10 @@ Lifti.Querying.QueryParts.PrecedingNearQueryOperator.PrecedingNearQueryOperator( Lifti.Querying.QueryParts.PrecedingNearQueryOperator.Tolerance.get -> int Lifti.Querying.QueryParts.PrecedingQueryOperator Lifti.Querying.QueryParts.PrecedingQueryOperator.PrecedingQueryOperator(Lifti.Querying.QueryParts.IQueryPart! left, Lifti.Querying.QueryParts.IQueryPart! right) -> void +Lifti.Querying.QueryParts.ScoreBoostedQueryPart +Lifti.Querying.QueryParts.ScoreBoostedQueryPart.CalculateWeighting(System.Func! navigatorCreator) -> double +Lifti.Querying.QueryParts.ScoreBoostedQueryPart.ScoreBoost.get -> double? +Lifti.Querying.QueryParts.ScoreBoostedQueryPart.ScoreBoostedQueryPart(double? scoreBoost) -> void Lifti.Querying.QueryParts.WildcardQueryFragment Lifti.Querying.QueryParts.WildcardQueryFragment.Equals(Lifti.Querying.QueryParts.WildcardQueryFragment other) -> bool Lifti.Querying.QueryParts.WildcardQueryFragment.Kind.get -> Lifti.Querying.QueryParts.WildcardQueryFragmentKind @@ -358,21 +402,14 @@ Lifti.Querying.QueryParts.WildcardQueryFragmentKind.MultiCharacter = 2 -> Lifti. Lifti.Querying.QueryParts.WildcardQueryFragmentKind.SingleCharacter = 1 -> Lifti.Querying.QueryParts.WildcardQueryFragmentKind Lifti.Querying.QueryParts.WildcardQueryFragmentKind.Text = 0 -> Lifti.Querying.QueryParts.WildcardQueryFragmentKind Lifti.Querying.QueryParts.WildcardQueryPart -Lifti.Querying.QueryParts.WildcardQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult Lifti.Querying.QueryParts.WildcardQueryPart.WildcardQueryPart(params Lifti.Querying.QueryParts.WildcardQueryFragment[]! fragments) -> void -Lifti.Querying.QueryParts.WildcardQueryPart.WildcardQueryPart(System.Collections.Generic.IEnumerable! fragments) -> void +Lifti.Querying.QueryParts.WildcardQueryPart.WildcardQueryPart(System.Collections.Generic.IEnumerable! fragments, double? scoreBoost = null) -> void Lifti.Querying.QueryParts.WordQueryPart Lifti.Querying.QueryParts.WordQueryPart.Word.get -> string! -Lifti.Querying.QueryParts.WordQueryPart.WordQueryPart(string! word) -> void +Lifti.Querying.QueryParts.WordQueryPart.WordQueryPart(string! word, double? scoreBoost) -> void Lifti.Querying.QueryTermJoinOperatorKind Lifti.Querying.QueryTermJoinOperatorKind.And = 0 -> Lifti.Querying.QueryTermJoinOperatorKind Lifti.Querying.QueryTermJoinOperatorKind.Or = 1 -> Lifti.Querying.QueryTermJoinOperatorKind -Lifti.Querying.QueryTokenMatch -Lifti.Querying.QueryTokenMatch.Equals(Lifti.Querying.QueryTokenMatch other) -> bool -Lifti.Querying.QueryTokenMatch.FieldMatches.get -> System.Collections.Generic.IReadOnlyList! -Lifti.Querying.QueryTokenMatch.ItemId.get -> int -Lifti.Querying.QueryTokenMatch.QueryTokenMatch() -> void -Lifti.Querying.QueryTokenMatch.QueryTokenMatch(int itemId, System.Collections.Generic.IReadOnlyList! fieldMatches) -> void Lifti.Querying.QueryTokenType Lifti.Querying.QueryTokenType.AndOperator = 1 -> Lifti.Querying.QueryTokenType Lifti.Querying.QueryTokenType.BeginAdjacentTextOperator = 5 -> Lifti.Querying.QueryTokenType @@ -386,42 +423,32 @@ Lifti.Querying.QueryTokenType.PrecedingNearOperator = 8 -> Lifti.Querying.QueryT Lifti.Querying.QueryTokenType.PrecedingOperator = 9 -> Lifti.Querying.QueryTokenType Lifti.Querying.QueryTokenType.Text = 0 -> Lifti.Querying.QueryTokenType Lifti.Querying.ScoredFieldMatch -Lifti.Querying.ScoredFieldMatch.Equals(Lifti.Querying.ScoredFieldMatch other) -> bool +Lifti.Querying.ScoredFieldMatch.Equals(Lifti.Querying.ScoredFieldMatch? other) -> bool Lifti.Querying.ScoredFieldMatch.FieldId.get -> byte -Lifti.Querying.ScoredFieldMatch.FieldMatch.get -> Lifti.Querying.FieldMatch -Lifti.Querying.ScoredFieldMatch.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Querying.ScoredFieldMatch.GetTokenLocations() -> System.Collections.Generic.IReadOnlyList! Lifti.Querying.ScoredFieldMatch.Score.get -> double -Lifti.Querying.ScoredFieldMatch.ScoredFieldMatch() -> void -Lifti.Querying.ScoredFieldMatch.ScoredFieldMatch(double score, Lifti.Querying.FieldMatch fieldMatch) -> void Lifti.Querying.ScoredToken +Lifti.Querying.ScoredToken.DocumentId.get -> int Lifti.Querying.ScoredToken.Equals(Lifti.Querying.ScoredToken other) -> bool -Lifti.Querying.ScoredToken.FieldMatches.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Querying.ScoredToken.FieldMatches.get -> System.Collections.Generic.IReadOnlyList! Lifti.Querying.ScoredToken.ItemId.get -> int Lifti.Querying.ScoredToken.ScoredToken() -> void -Lifti.Querying.ScoredToken.ScoredToken(int itemId, System.Collections.Generic.IReadOnlyList! fieldMatches) -> void +Lifti.Querying.ScoredToken.ScoredToken(int documentId, System.Collections.Generic.IReadOnlyList! fieldMatches) -> void Lifti.Querying.SimpleQueryParser Lifti.Querying.SimpleQueryParser.Parse(Lifti.IIndexedFieldLookup! fieldLookup, string! queryText, Lifti.IIndexTokenizerProvider! tokenizerProvider) -> Lifti.Querying.IQuery! Lifti.Querying.SimpleQueryParser.SimpleQueryParser(Lifti.Querying.QueryParserOptions! options) -> void -Lifti.Querying.SingleTokenLocationMatch -Lifti.Querying.SingleTokenLocationMatch.Equals(Lifti.Querying.SingleTokenLocationMatch other) -> bool -Lifti.Querying.SingleTokenLocationMatch.GetLocations() -> System.Collections.Generic.IEnumerable! -Lifti.Querying.SingleTokenLocationMatch.MaxTokenIndex.get -> int -Lifti.Querying.SingleTokenLocationMatch.MinTokenIndex.get -> int -Lifti.Querying.SingleTokenLocationMatch.SingleTokenLocationMatch() -> void -Lifti.Querying.SingleTokenLocationMatch.SingleTokenLocationMatch(Lifti.TokenLocation original) -> void -Lifti.Querying.UnionMerger -Lifti.Querying.UnionMerger.UnionMerger() -> void +Lifti.ScoreBoostMetadata Lifti.SearchResult Lifti.SearchResult.FieldMatches.get -> System.Collections.Generic.IReadOnlyList! Lifti.SearchResult.Key.get -> TKey Lifti.SearchResult.Score.get -> double -Lifti.SearchResult.SearchResult(TKey item, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.SearchResult.SearchResult(TKey key, System.Collections.Generic.IReadOnlyList! locations) -> void Lifti.Serialization.Binary.BinarySerializer Lifti.Serialization.Binary.BinarySerializer.BinarySerializer() -> void Lifti.Serialization.Binary.BinarySerializer.BinarySerializer(Lifti.Serialization.Binary.IKeySerializer! keySerializer) -> void -Lifti.Serialization.Binary.BinarySerializer.DeserializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true) -> System.Threading.Tasks.Task! -Lifti.Serialization.Binary.BinarySerializer.SerializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true) -> System.Threading.Tasks.Task! -Lifti.Serialization.Binary.BinarySerializer.SerializeAsync(Lifti.IIndexSnapshot! snapshot, System.IO.Stream! stream, bool disposeStream = true) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.BinarySerializer.DeserializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.BinarySerializer.SerializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.BinarySerializer.SerializeAsync(Lifti.IIndexSnapshot! snapshot, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.Serialization.Binary.DeserializationException Lifti.Serialization.Binary.DeserializationException.DeserializationException() -> void Lifti.Serialization.Binary.DeserializationException.DeserializationException(string! message) -> void @@ -429,12 +456,10 @@ Lifti.Serialization.Binary.DeserializationException.DeserializationException(str Lifti.Serialization.Binary.DeserializationException.DeserializationException(string! message, System.Exception! inner) -> void Lifti.Serialization.Binary.GuidFormatterKeySerializer Lifti.Serialization.Binary.GuidFormatterKeySerializer.GuidFormatterKeySerializer() -> void -Lifti.Serialization.Binary.IIndexReader -Lifti.Serialization.Binary.IIndexReader.ReadIntoAsync(Lifti.FullTextIndex! index) -> System.Threading.Tasks.Task! Lifti.Serialization.Binary.IIndexSerializer -Lifti.Serialization.Binary.IIndexSerializer.DeserializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true) -> System.Threading.Tasks.Task! -Lifti.Serialization.Binary.IIndexSerializer.SerializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true) -> System.Threading.Tasks.Task! -Lifti.Serialization.Binary.IIndexSerializer.SerializeAsync(Lifti.IIndexSnapshot! snapshot, System.IO.Stream! stream, bool disposeStream = true) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.IIndexSerializer.DeserializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.IIndexSerializer.SerializeAsync(Lifti.FullTextIndex! index, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! +Lifti.Serialization.Binary.IIndexSerializer.SerializeAsync(Lifti.IIndexSnapshot! snapshot, System.IO.Stream! stream, bool disposeStream = true, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.Task! Lifti.Serialization.Binary.IKeySerializer Lifti.Serialization.Binary.IKeySerializer.Read(System.IO.BinaryReader! reader) -> TKey Lifti.Serialization.Binary.IKeySerializer.Write(System.IO.BinaryWriter! writer, TKey key) -> void @@ -448,6 +473,39 @@ Lifti.Serialization.Binary.StringFormatterKeySerializer Lifti.Serialization.Binary.StringFormatterKeySerializer.StringFormatterKeySerializer() -> void Lifti.Serialization.Binary.UIntFormatterKeySerializer Lifti.Serialization.Binary.UIntFormatterKeySerializer.UIntFormatterKeySerializer() -> void +Lifti.Serialization.DeserializedDataCollector +Lifti.Serialization.DeserializedDataCollector.Add(T item) -> void +Lifti.Serialization.DeserializedDataCollector.DeserializedDataCollector(int expectedCount) -> void +Lifti.Serialization.DocumentMetadataCollector +Lifti.Serialization.DocumentMetadataCollector.DocumentMetadataCollector() -> void +Lifti.Serialization.DocumentMetadataCollector.DocumentMetadataCollector(int expectedCount) -> void +Lifti.Serialization.IIndexDeserializer +Lifti.Serialization.IIndexDeserializer.ReadAsync(Lifti.FullTextIndex! index, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) -> System.Threading.Tasks.ValueTask +Lifti.Serialization.IndexDeserializerBase +Lifti.Serialization.IndexDeserializerBase.Dispose() -> void +Lifti.Serialization.IndexDeserializerBase.IndexDeserializerBase() -> void +Lifti.Serialization.IndexDeserializerBase.ReadAsync(Lifti.FullTextIndex! index, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +Lifti.Serialization.IndexSerializerBase +Lifti.Serialization.IndexSerializerBase.Dispose() -> void +Lifti.Serialization.IndexSerializerBase.IndexSerializerBase() -> void +Lifti.Serialization.IndexSerializerBase.WriteAsync(Lifti.IIndexSnapshot! snapshot, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +Lifti.Serialization.SerializedFieldCollector +Lifti.Serialization.SerializedFieldCollector.SerializedFieldCollector() -> void +Lifti.Serialization.SerializedFieldCollector.SerializedFieldCollector(int expectedCount) -> void +Lifti.Serialization.SerializedFieldIdMap +Lifti.Serialization.SerializedFieldIdMap.Map(byte serializedFieldId) -> byte +Lifti.Serialization.SerializedFieldIdMap.SerializedFieldIdMap() -> void +Lifti.Serialization.SerializedFieldInfo +Lifti.Serialization.SerializedFieldInfo.DynamicFieldReaderName.get -> string? +Lifti.Serialization.SerializedFieldInfo.DynamicFieldReaderName.set -> void +Lifti.Serialization.SerializedFieldInfo.FieldId.get -> byte +Lifti.Serialization.SerializedFieldInfo.FieldId.set -> void +Lifti.Serialization.SerializedFieldInfo.Kind.get -> Lifti.FieldKind +Lifti.Serialization.SerializedFieldInfo.Kind.set -> void +Lifti.Serialization.SerializedFieldInfo.Name.get -> string! +Lifti.Serialization.SerializedFieldInfo.Name.set -> void +Lifti.Serialization.SerializedFieldInfo.SerializedFieldInfo() -> void +Lifti.Serialization.SerializedFieldInfo.SerializedFieldInfo(byte FieldId, string! Name, Lifti.FieldKind Kind, string? DynamicFieldReaderName) -> void Lifti.ThesaurusBuilder Lifti.ThesaurusBuilder.WithHypernyms(string! word, params string![]! hypernyms) -> Lifti.ThesaurusBuilder! Lifti.ThesaurusBuilder.WithHypernyms(string! word, System.Collections.Generic.IEnumerable! hypernyms) -> Lifti.ThesaurusBuilder! @@ -466,21 +524,38 @@ Lifti.Tokenization.IndexTokenizer.Normalize(System.ReadOnlySpan tokenText) Lifti.Tokenization.IndexTokenizer.Options.get -> Lifti.TokenizationOptions! Lifti.Tokenization.IndexTokenizer.Process(System.Collections.Generic.IEnumerable! input) -> System.Collections.Generic.IReadOnlyCollection! Lifti.Tokenization.IndexTokenizer.Process(System.ReadOnlySpan input) -> System.Collections.Generic.IReadOnlyCollection! +Lifti.Tokenization.IStemmer +Lifti.Tokenization.IStemmer.RequiresAccentInsensitivity.get -> bool +Lifti.Tokenization.IStemmer.RequiresCaseInsensitivity.get -> bool +Lifti.Tokenization.IStemmer.Stem(System.Text.StringBuilder! builder) -> void Lifti.Tokenization.IThesaurus Lifti.Tokenization.IThesaurus.Process(Lifti.Tokenization.Token! token) -> System.Collections.Generic.IEnumerable! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.ObjectTokenizationBuilder() -> void -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func!>!>! dynamicFieldReader, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, System.Func! getFieldName, System.Func! getFieldText, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, System.Func! getFieldName, System.Func!>! getFieldText, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! reader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>!>! fieldTextReader, System.Func? tokenizationOptions = null, System.Func? thesaurusOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! -Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithKey(System.Func! keyReader) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder.Freshness(System.Func! freshnessProvider, double multiplier) -> Lifti.Tokenization.Objects.ObjectScoreBoostBuilder! +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder.Magnitude(System.Func! magnitudeProvider, double multiplier) -> Lifti.Tokenization.Objects.ObjectScoreBoostBuilder! +Lifti.Tokenization.Objects.ObjectScoreBoostBuilder.ObjectScoreBoostBuilder() -> void +Lifti.Tokenization.Objects.ObjectScoreBoostOptions +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.FreshnessMultiplier.get -> double +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.MagnitudeMultiplier.get -> double +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.ObjectScoreBoostOptions(double magnitudeMultiplier, double freshnessMultiplier) -> void +Lifti.Tokenization.Objects.ObjectScoreBoostOptions +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.FreshnessProvider.get -> System.Func? +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.MagnitudeProvider.get -> System.Func? +Lifti.Tokenization.Objects.ObjectScoreBoostOptions.ObjectScoreBoostOptions(double magnitudeMultiplier, System.Func? magnitudeProvider, double freshnessMultiplier, System.Func? freshnessProvider) -> void +Lifti.Tokenization.Objects.ObjectTokenizationBuilder +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.ObjectTokenizationBuilder() -> void +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func!>!>! dynamicFieldReader, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, System.Func! getFieldName, System.Func! getFieldText, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithDynamicFields(string! dynamicFieldReaderName, System.Func?>! dynamicFieldReader, System.Func! getFieldName, System.Func!>! getFieldText, string? fieldNamePrefix = null, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! reader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>! fieldTextReader, System.Func? tokenizationOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, System.Func? thesaurusOptions = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithField(string! name, System.Func!>!>! fieldTextReader, System.Func? tokenizationOptions = null, System.Func? thesaurusOptions = null, Lifti.Tokenization.TextExtraction.ITextExtractor? textExtractor = null, double scoreBoost = 1) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithKey(System.Func! keyReader) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! +Lifti.Tokenization.Objects.ObjectTokenizationBuilder.WithScoreBoosting(System.Action!>! scoreBoostingOptions) -> Lifti.Tokenization.Objects.ObjectTokenizationBuilder! Lifti.Tokenization.Preprocessing.CaseInsensitiveNormalizer Lifti.Tokenization.Preprocessing.CaseInsensitiveNormalizer.CaseInsensitiveNormalizer() -> void Lifti.Tokenization.Preprocessing.CaseInsensitiveNormalizer.Preprocess(char input) -> Lifti.PreprocessedInput @@ -512,11 +587,11 @@ Lifti.Tokenization.TextExtraction.XmlTextExtractor Lifti.Tokenization.TextExtraction.XmlTextExtractor.Extract(System.ReadOnlyMemory document, int startOffset) -> System.Collections.Generic.IEnumerable! Lifti.Tokenization.TextExtraction.XmlTextExtractor.XmlTextExtractor() -> void Lifti.Tokenization.Token -Lifti.Tokenization.Token.AddLocation(Lifti.TokenLocation location) -> void -Lifti.Tokenization.Token.Locations.get -> System.Collections.Generic.IReadOnlyList! -Lifti.Tokenization.Token.Token(string! token, Lifti.TokenLocation location) -> void -Lifti.Tokenization.Token.Token(string! token, params Lifti.TokenLocation[]! locations) -> void -Lifti.Tokenization.Token.Token(string! token, System.Collections.Generic.IReadOnlyList! locations) -> void +Lifti.Tokenization.Token.AddLocation(Lifti.TokenLocation! location) -> void +Lifti.Tokenization.Token.Locations.get -> System.Collections.Generic.IReadOnlyList! +Lifti.Tokenization.Token.Token(string! token, Lifti.TokenLocation! location) -> void +Lifti.Tokenization.Token.Token(string! token, params Lifti.TokenLocation![]! locations) -> void +Lifti.Tokenization.Token.Token(string! token, System.Collections.Generic.IReadOnlyList! locations) -> void Lifti.Tokenization.Token.Value.get -> string! Lifti.TokenizationOptions Lifti.TokenizationOptions.AccentInsensitive.get -> bool @@ -524,79 +599,85 @@ Lifti.TokenizationOptions.AdditionalSplitCharacters.get -> System.Collections.Ge Lifti.TokenizationOptions.CaseInsensitive.get -> bool Lifti.TokenizationOptions.IgnoreCharacters.get -> System.Collections.Generic.IReadOnlyList! Lifti.TokenizationOptions.SplitOnPunctuation.get -> bool -Lifti.TokenizationOptions.Stemming.get -> bool +Lifti.TokenizationOptions.Stemmer.get -> Lifti.Tokenization.IStemmer? Lifti.TokenizerBuilder Lifti.TokenizerBuilder.AccentInsensitive(bool accentInsensitive = true) -> Lifti.TokenizerBuilder! Lifti.TokenizerBuilder.Build() -> Lifti.Tokenization.IIndexTokenizer! Lifti.TokenizerBuilder.CaseInsensitive(bool caseInsensitive = true) -> Lifti.TokenizerBuilder! Lifti.TokenizerBuilder.IgnoreCharacters(params char[]! ignoreCharacters) -> Lifti.TokenizerBuilder! Lifti.TokenizerBuilder.SplitOnCharacters(params char[]! additionalSplitCharacters) -> Lifti.TokenizerBuilder! -Lifti.TokenizerBuilder.SplitOnPunctuation(bool splitOnPunctionation = true) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.SplitOnPunctuation(bool splitOnPunctuation = true) -> Lifti.TokenizerBuilder! Lifti.TokenizerBuilder.TokenizerBuilder() -> void Lifti.TokenizerBuilder.WithFactory(System.Func! tokenizerFactory) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.WithStemming() -> Lifti.TokenizerBuilder! Lifti.TokenizerBuilder.WithStemming(bool stemming = true) -> Lifti.TokenizerBuilder! +Lifti.TokenizerBuilder.WithStemming(Lifti.Tokenization.IStemmer! stemmer) -> Lifti.TokenizerBuilder! Lifti.TokenLocation -Lifti.TokenLocation.CompareTo(Lifti.TokenLocation other) -> int Lifti.TokenLocation.Length.get -> ushort Lifti.TokenLocation.Start.get -> int Lifti.TokenLocation.TokenIndex.get -> int -Lifti.TokenLocation.TokenLocation() -> void Lifti.TokenLocation.TokenLocation(int tokenIndex, int start, ushort length) -> void +override Lifti.ChildNodeMap.Equals(object? obj) -> bool +override Lifti.ChildNodeMap.GetHashCode() -> int +override Lifti.DocumentTokenMatchMap.Equals(object? obj) -> bool +override Lifti.DocumentTokenMatchMap.GetHashCode() -> int override Lifti.FieldSearchResult.ToString() -> string! override Lifti.FullTextIndex.ToString() -> string! -override Lifti.IndexedFieldDetails.ReadAsync(object! item, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask!> +override Lifti.IndexedFieldDetails.ReadAsync(object! item, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask!> override Lifti.IndexedToken.Equals(object? obj) -> bool override Lifti.IndexedToken.GetHashCode() -> int override Lifti.IndexNode.ToString() -> string! override Lifti.PreprocessedInput.Equals(object? obj) -> bool override Lifti.PreprocessedInput.GetHashCode() -> int -override Lifti.Querying.CompositeTokenMatchLocation.Equals(object? obj) -> bool -override Lifti.Querying.CompositeTokenMatchLocation.GetHashCode() -> int -override Lifti.Querying.CompositeTokenMatchLocation.ToString() -> string! -override Lifti.Querying.FieldMatch.Equals(object? obj) -> bool -override Lifti.Querying.FieldMatch.GetHashCode() -> int +override Lifti.Querying.IntermediateQueryResult.Equals(object? obj) -> bool +override Lifti.Querying.IntermediateQueryResult.GetHashCode() -> int override Lifti.Querying.IntermediateQueryResult.ToString() -> string! override Lifti.Querying.Query.ToString() -> string? override Lifti.Querying.QueryParts.AdjacentWordsQueryOperator.ToString() -> string! -override Lifti.Querying.QueryParts.AndQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.AndQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.AndQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence override Lifti.Querying.QueryParts.AndQueryOperator.ToString() -> string! override Lifti.Querying.QueryParts.BracketedQueryPart.ToString() -> string! -override Lifti.Querying.QueryParts.ExactWordQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.ExactWordQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.ExactWordQueryPart.ToString() -> string! override Lifti.Querying.QueryParts.FieldFilterQueryOperator.ToString() -> string! -override Lifti.Querying.QueryParts.FuzzyMatchQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.FuzzyMatchQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.FuzzyMatchQueryPart.ToString() -> string! -override Lifti.Querying.QueryParts.NearQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.NearQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.NearQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence override Lifti.Querying.QueryParts.NearQueryOperator.ToString() -> string! -override Lifti.Querying.QueryParts.OrQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.OrQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.OrQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence override Lifti.Querying.QueryParts.OrQueryOperator.ToString() -> string! -override Lifti.Querying.QueryParts.PrecedingNearQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.PrecedingNearQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.PrecedingNearQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence override Lifti.Querying.QueryParts.PrecedingNearQueryOperator.ToString() -> string! -override Lifti.Querying.QueryParts.PrecedingQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryParts.IQueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult +override Lifti.Querying.QueryParts.PrecedingQueryOperator.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.PrecedingQueryOperator.Precedence.get -> Lifti.Querying.OperatorPrecedence override Lifti.Querying.QueryParts.PrecedingQueryOperator.ToString() -> string! override Lifti.Querying.QueryParts.WildcardQueryFragment.Equals(object? obj) -> bool override Lifti.Querying.QueryParts.WildcardQueryFragment.GetHashCode() -> int +override Lifti.Querying.QueryParts.WildcardQueryPart.Evaluate(System.Func! navigatorCreator, Lifti.Querying.QueryContext! queryContext) -> Lifti.Querying.IntermediateQueryResult override Lifti.Querying.QueryParts.WildcardQueryPart.ToString() -> string! -override Lifti.Querying.QueryTokenMatch.Equals(object? obj) -> bool -override Lifti.Querying.QueryTokenMatch.GetHashCode() -> int +override Lifti.Querying.QueryParts.WordQueryPart.RunWeightingCalculation(System.Func! navigatorCreator) -> double override Lifti.Querying.ScoredFieldMatch.Equals(object? obj) -> bool override Lifti.Querying.ScoredFieldMatch.GetHashCode() -> int override Lifti.Querying.ScoredToken.Equals(object? obj) -> bool override Lifti.Querying.ScoredToken.GetHashCode() -> int -override Lifti.Querying.SingleTokenLocationMatch.Equals(object? obj) -> bool -override Lifti.Querying.SingleTokenLocationMatch.GetHashCode() -> int -override Lifti.Querying.SingleTokenLocationMatch.ToString() -> string! override Lifti.SearchResult.ToString() -> string! override Lifti.Tokenization.TextExtraction.DocumentTextFragment.Equals(object? obj) -> bool override Lifti.Tokenization.TextExtraction.DocumentTextFragment.GetHashCode() -> int override Lifti.TokenLocation.Equals(object? obj) -> bool override Lifti.TokenLocation.GetHashCode() -> int override Lifti.TokenLocation.ToString() -> string! +static Lifti.ChildNodeMap.Empty.get -> Lifti.ChildNodeMap +static Lifti.ChildNodeMap.operator !=(Lifti.ChildNodeMap left, Lifti.ChildNodeMap right) -> bool +static Lifti.ChildNodeMap.operator ==(Lifti.ChildNodeMap left, Lifti.ChildNodeMap right) -> bool +static Lifti.DocumentMetadata.ForLooseText(int documentId, TKey key, Lifti.DocumentStatistics documentStatistics) -> Lifti.DocumentMetadata! +static Lifti.DocumentMetadata.ForObject(byte objectTypeId, int documentId, TKey key, Lifti.DocumentStatistics documentStatistics, System.DateTime? scoringFreshnessDate, double? scoringMagnitude) -> Lifti.DocumentMetadata! +static Lifti.DocumentTokenMatchMap.Empty.get -> Lifti.DocumentTokenMatchMap +static Lifti.DocumentTokenMatchMap.operator !=(Lifti.DocumentTokenMatchMap left, Lifti.DocumentTokenMatchMap right) -> bool +static Lifti.DocumentTokenMatchMap.operator ==(Lifti.DocumentTokenMatchMap left, Lifti.DocumentTokenMatchMap right) -> bool static Lifti.FullTextIndexExtensions.ParseQuery(this Lifti.IFullTextIndex! index, string! queryText) -> Lifti.Querying.IQuery! static Lifti.IndexedToken.operator !=(Lifti.IndexedToken left, Lifti.IndexedToken right) -> bool static Lifti.IndexedToken.operator ==(Lifti.IndexedToken left, Lifti.IndexedToken right) -> bool @@ -607,18 +688,11 @@ static Lifti.PreprocessedInput.operator !=(Lifti.PreprocessedInput left, Lifti.P static Lifti.PreprocessedInput.operator ==(Lifti.PreprocessedInput left, Lifti.PreprocessedInput right) -> bool static Lifti.PreprocessedInput.ToPreprocessedInput(char value) -> Lifti.PreprocessedInput static Lifti.PreprocessedInput.ToPreprocessedInput(string! value) -> Lifti.PreprocessedInput -static Lifti.Querying.CompositePositionalIntersectMerger.Apply(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right, int leftTolerance, int rightTolerance) -> System.Collections.Generic.IEnumerable! -static Lifti.Querying.CompositeTokenMatchLocation.operator !=(Lifti.Querying.CompositeTokenMatchLocation left, Lifti.Querying.CompositeTokenMatchLocation right) -> bool -static Lifti.Querying.CompositeTokenMatchLocation.operator ==(Lifti.Querying.CompositeTokenMatchLocation left, Lifti.Querying.CompositeTokenMatchLocation right) -> bool -static Lifti.Querying.FieldMatch.operator !=(Lifti.Querying.FieldMatch left, Lifti.Querying.FieldMatch right) -> bool -static Lifti.Querying.FieldMatch.operator ==(Lifti.Querying.FieldMatch left, Lifti.Querying.FieldMatch right) -> bool static Lifti.Querying.IntermediateQueryResult.Empty.get -> Lifti.Querying.IntermediateQueryResult -static Lifti.Querying.IntermediateQueryResultMerger.JoinFields(System.Collections.Generic.IEnumerable! leftFields, System.Collections.Generic.IEnumerable! rightFields) -> System.Collections.Generic.IList<(byte fieldId, double score, System.Collections.Generic.IReadOnlyList! leftLocations, System.Collections.Generic.IReadOnlyList! rightLocations)>! -static Lifti.Querying.IntermediateQueryResultMerger.MergeFields(Lifti.Querying.ScoredToken leftMatch, Lifti.Querying.ScoredToken rightMatch) -> System.Collections.Generic.IEnumerable! -static Lifti.Querying.IntermediateQueryResultMerger.SwapIf(bool condition, ref T left, ref T right) -> void -static Lifti.Querying.IntersectMerger.Apply(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right) -> System.Collections.Generic.IEnumerable! -static Lifti.Querying.PrecedingIntersectMerger.Apply(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right) -> System.Collections.Generic.IEnumerable! +static Lifti.Querying.IntermediateQueryResult.operator !=(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right) -> bool +static Lifti.Querying.IntermediateQueryResult.operator ==(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right) -> bool static Lifti.Querying.Query.Empty.get -> Lifti.Querying.IQuery! +static Lifti.Querying.QueryContext.Empty.get -> Lifti.Querying.QueryContext! static Lifti.Querying.QueryParts.AndQueryOperator.CombineAll(System.Collections.Generic.IEnumerable! queryParts) -> Lifti.Querying.QueryParts.IQueryPart! static Lifti.Querying.QueryParts.EmptyQueryPart.Instance.get -> Lifti.Querying.QueryParts.EmptyQueryPart! static Lifti.Querying.QueryParts.FieldFilterQueryOperator.CreateForField(Lifti.IIndexedFieldLookup! fieldLookup, string! fieldName, Lifti.Querying.QueryParts.IQueryPart! statement) -> Lifti.Querying.QueryParts.FieldFilterQueryOperator! @@ -628,25 +702,31 @@ static Lifti.Querying.QueryParts.WildcardQueryFragment.MultiCharacter.get -> Lif static Lifti.Querying.QueryParts.WildcardQueryFragment.operator !=(Lifti.Querying.QueryParts.WildcardQueryFragment left, Lifti.Querying.QueryParts.WildcardQueryFragment right) -> bool static Lifti.Querying.QueryParts.WildcardQueryFragment.operator ==(Lifti.Querying.QueryParts.WildcardQueryFragment left, Lifti.Querying.QueryParts.WildcardQueryFragment right) -> bool static Lifti.Querying.QueryParts.WildcardQueryFragment.SingleCharacter.get -> Lifti.Querying.QueryParts.WildcardQueryFragment -static Lifti.Querying.QueryTokenMatch.operator !=(Lifti.Querying.QueryTokenMatch left, Lifti.Querying.QueryTokenMatch right) -> bool -static Lifti.Querying.QueryTokenMatch.operator ==(Lifti.Querying.QueryTokenMatch left, Lifti.Querying.QueryTokenMatch right) -> bool -static Lifti.Querying.ScoredFieldMatch.operator !=(Lifti.Querying.ScoredFieldMatch left, Lifti.Querying.ScoredFieldMatch right) -> bool -static Lifti.Querying.ScoredFieldMatch.operator ==(Lifti.Querying.ScoredFieldMatch left, Lifti.Querying.ScoredFieldMatch right) -> bool +static Lifti.Querying.ScoredFieldMatch.operator !=(Lifti.Querying.ScoredFieldMatch? left, Lifti.Querying.ScoredFieldMatch? right) -> bool +static Lifti.Querying.ScoredFieldMatch.operator ==(Lifti.Querying.ScoredFieldMatch? left, Lifti.Querying.ScoredFieldMatch? right) -> bool static Lifti.Querying.ScoredToken.operator !=(Lifti.Querying.ScoredToken left, Lifti.Querying.ScoredToken right) -> bool static Lifti.Querying.ScoredToken.operator ==(Lifti.Querying.ScoredToken left, Lifti.Querying.ScoredToken right) -> bool -static Lifti.Querying.SingleTokenLocationMatch.operator !=(Lifti.Querying.SingleTokenLocationMatch left, Lifti.Querying.SingleTokenLocationMatch right) -> bool -static Lifti.Querying.SingleTokenLocationMatch.operator ==(Lifti.Querying.SingleTokenLocationMatch left, Lifti.Querying.SingleTokenLocationMatch right) -> bool -static Lifti.Querying.UnionMerger.Apply(Lifti.Querying.IntermediateQueryResult left, Lifti.Querying.IntermediateQueryResult right) -> System.Collections.Generic.IEnumerable! static Lifti.Tokenization.IndexTokenizer.Default.get -> Lifti.Tokenization.IndexTokenizer! static Lifti.Tokenization.TextExtraction.DocumentTextFragment.operator !=(Lifti.Tokenization.TextExtraction.DocumentTextFragment left, Lifti.Tokenization.TextExtraction.DocumentTextFragment right) -> bool static Lifti.Tokenization.TextExtraction.DocumentTextFragment.operator ==(Lifti.Tokenization.TextExtraction.DocumentTextFragment left, Lifti.Tokenization.TextExtraction.DocumentTextFragment right) -> bool static Lifti.TokenizationOptions.Default.get -> Lifti.TokenizationOptions! -static Lifti.TokenLocation.operator !=(Lifti.TokenLocation left, Lifti.TokenLocation right) -> bool -static Lifti.TokenLocation.operator <(Lifti.TokenLocation left, Lifti.TokenLocation right) -> bool -static Lifti.TokenLocation.operator <=(Lifti.TokenLocation left, Lifti.TokenLocation right) -> bool -static Lifti.TokenLocation.operator ==(Lifti.TokenLocation left, Lifti.TokenLocation right) -> bool -static Lifti.TokenLocation.operator >(Lifti.TokenLocation left, Lifti.TokenLocation right) -> bool -static Lifti.TokenLocation.operator >=(Lifti.TokenLocation left, Lifti.TokenLocation right) -> bool +static Lifti.TokenLocation.operator !=(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator <(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator <=(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator ==(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator >(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool +static Lifti.TokenLocation.operator >=(Lifti.TokenLocation? left, Lifti.TokenLocation? right) -> bool System.Runtime.CompilerServices.IsExternalInit System.Runtime.CompilerServices.IsExternalInit.IsExternalInit() -> void -virtual Lifti.Tokenization.IndexTokenizer.IsSplitCharacter(char character) -> bool \ No newline at end of file +virtual Lifti.Querying.QueryParts.BinaryQueryOperator.RunWeightingCalculation(System.Func! navigatorCreator) -> double +virtual Lifti.Querying.QueryParts.ScoreBoostedQueryPart.ToString(string! searchTerm) -> string! +virtual Lifti.ScoreBoostMetadata.CalculateScoreBoost(Lifti.DocumentMetadata! documentMetadata) -> double +virtual Lifti.Serialization.IndexDeserializerBase.Dispose(bool disposing) -> void +virtual Lifti.Serialization.IndexDeserializerBase.OnDeserializationCompleteAsync(Lifti.FullTextIndex! index, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Serialization.IndexDeserializerBase.OnDeserializationStartingAsync(System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Serialization.IndexSerializerBase.Dispose(bool disposing) -> void +virtual Lifti.Serialization.IndexSerializerBase.OnSerializationComplete(Lifti.IIndexSnapshot! snapshot, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Serialization.IndexSerializerBase.OnSerializationStart(Lifti.IIndexSnapshot! snapshot, System.Threading.CancellationToken cancellationToken) -> System.Threading.Tasks.ValueTask +virtual Lifti.Tokenization.IndexTokenizer.IsSplitCharacter(char character) -> bool +virtual Lifti.TokenLocation.CompareTo(Lifti.TokenLocation? other) -> int +virtual Lifti.TokenLocation.Equals(Lifti.TokenLocation? other) -> bool diff --git a/src/Lifti.Core/PublicApi/netstandard2/PublicAPI.Unshipped.txt b/src/Lifti.Core/PublicApi/netstandard2/PublicAPI.Unshipped.txt new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/src/Lifti.Core/PublicApi/netstandard2/PublicAPI.Unshipped.txt @@ -0,0 +1 @@ + diff --git a/src/Lifti.Core/Querying/CompositePositionalIntersectMerger.cs b/src/Lifti.Core/Querying/CompositePositionalIntersectMerger.cs index 333c3976..3b89e07b 100644 --- a/src/Lifti.Core/Querying/CompositePositionalIntersectMerger.cs +++ b/src/Lifti.Core/Querying/CompositePositionalIntersectMerger.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; namespace Lifti.Querying @@ -7,27 +8,36 @@ namespace Lifti.Querying /// Provides logic for intersecting the results in two s where the fields /// locations on the left must be within a specified positional tolerance of the the matching field locations on the right. /// - public class CompositePositionalIntersectMerger : IntermediateQueryResultMerger + internal sealed class CompositePositionalIntersectMerger : IntermediateQueryResultMerger { /// /// Applies the intersection to the instances. /// - public static IEnumerable Apply(IntermediateQueryResult left, IntermediateQueryResult right, int leftTolerance, int rightTolerance) + public static List Apply(IntermediateQueryResult left, IntermediateQueryResult right, int leftTolerance, int rightTolerance) { - // Swap over the variables to ensure we're performing as few iterations as possible in the intersection - // Also swap the tolerance values around, otherwise we reverse the tolerance directionality. - var swapLeftAndRight = left.Matches.Count > right.Matches.Count; - SwapIf(swapLeftAndRight, ref left, ref right); - SwapIf(swapLeftAndRight, ref leftTolerance, ref rightTolerance); + // track two pointers through the lists on each side. The document ids are ordered on both sides, so we can + // move through the lists in a single pass - var rightItems = right.Matches.ToDictionary(m => m.ItemId); + var leftIndex = 0; + var rightIndex = 0; - foreach (var leftMatch in left.Matches) + var leftMatches = left.Matches; + var rightMatches = right.Matches; + var leftCount = leftMatches.Count; + var rightCount = rightMatches.Count; + + List results = new(Math.Min(leftCount, rightCount)); + + List positionalMatches = []; + while (leftIndex < leftCount && rightIndex < rightCount) { - if (rightItems.TryGetValue(leftMatch.ItemId, out var rightMatch)) + var leftMatch = leftMatches[leftIndex]; + var rightMatch = rightMatches[rightIndex]; + + if (leftMatch.DocumentId == rightMatch.DocumentId) { - var positionalMatches = PositionallyMatchAndCombineTokens( - swapLeftAndRight, + PositionallyMatchAndCombineTokens( + positionalMatches, leftMatch.FieldMatches, rightMatch.FieldMatches, leftTolerance, @@ -35,37 +45,38 @@ public static IEnumerable Apply(IntermediateQueryResult left, Inter if (positionalMatches.Count > 0) { - yield return new ScoredToken(leftMatch.ItemId, positionalMatches); + results.Add(new ScoredToken(leftMatch.DocumentId, positionalMatches.ToList())); + positionalMatches.Clear(); } + + leftIndex++; + rightIndex++; } - } + else if (leftMatch.DocumentId < rightMatch.DocumentId) + { + leftIndex++; + } + else + { + rightIndex++; + } + } + + return results; } - private static List PositionallyMatchAndCombineTokens( - bool leftAndRightSwapped, - IEnumerable leftFields, - IEnumerable rightFields, + private static void PositionallyMatchAndCombineTokens( + List positionalMatches, + IReadOnlyList leftFields, + IReadOnlyList rightFields, int leftTolerance, int rightTolerance) { var matchedFields = JoinFields(leftFields, rightFields); - var fieldResults = new List(matchedFields.Count); - var fieldTokenMatches = new List(); + var fieldTokenMatches = new List(); foreach (var (fieldId, score, leftLocations, rightLocations) in matchedFields) { - fieldTokenMatches.Clear(); - - static CompositeTokenMatchLocation CreateCompositeTokenMatchLocation(bool swapTokens, ITokenLocationMatch currentToken, ITokenLocationMatch nextToken) - { - if (swapTokens) - { - return new CompositeTokenMatchLocation(nextToken, currentToken); - } - - return new CompositeTokenMatchLocation(currentToken, nextToken); - } - var leftIndex = 0; var rightIndex = 0; @@ -78,7 +89,7 @@ static CompositeTokenMatchLocation CreateCompositeTokenMatchLocation(bool swapTo { if ((currentToken.MinTokenIndex - nextToken.MaxTokenIndex).IsPositiveAndLessThanOrEqualTo(leftTolerance)) { - fieldTokenMatches.Add(CreateCompositeTokenMatchLocation(leftAndRightSwapped, currentToken, nextToken)); + fieldTokenMatches.Add(currentToken.ComposeWith(nextToken)); } } @@ -86,7 +97,7 @@ static CompositeTokenMatchLocation CreateCompositeTokenMatchLocation(bool swapTo { if ((nextToken.MinTokenIndex - currentToken.MaxTokenIndex).IsPositiveAndLessThanOrEqualTo(rightTolerance)) { - fieldTokenMatches.Add(CreateCompositeTokenMatchLocation(leftAndRightSwapped, currentToken, nextToken)); + fieldTokenMatches.Add(currentToken.ComposeWith(nextToken)); } } @@ -102,14 +113,15 @@ static CompositeTokenMatchLocation CreateCompositeTokenMatchLocation(bool swapTo if (fieldTokenMatches.Count > 0) { - fieldResults.Add( - new ScoredFieldMatch( - score, - new FieldMatch(fieldId, fieldTokenMatches))); + positionalMatches.Add( + ScoredFieldMatch.CreateFromPresorted( + score, + fieldId, + fieldTokenMatches)); + + fieldTokenMatches = []; } } - - return fieldResults; } } } diff --git a/src/Lifti.Core/Querying/CompositeTokenLocation.cs b/src/Lifti.Core/Querying/CompositeTokenLocation.cs new file mode 100644 index 00000000..d17137d9 --- /dev/null +++ b/src/Lifti.Core/Querying/CompositeTokenLocation.cs @@ -0,0 +1,108 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; + +namespace Lifti.Querying +{ + /// + /// Represents the result of a positional query intersection. This keeps track of all the locations + /// that matched positionally, giving quick access to the max and min locations for reference + /// in further positional intersections if needed. + /// + internal sealed class CompositeTokenLocation : ITokenLocation + { + private readonly TokenLocation[] locations; + + /// + /// Constructs a new instance of . + /// + internal CompositeTokenLocation(TokenLocation[] locations, int minTokenIndex, int maxTokenIndex) + { + this.locations = locations; + this.MinTokenIndex = minTokenIndex; + this.MaxTokenIndex = maxTokenIndex; + } + + public int MaxTokenIndex { get; } + + public int MinTokenIndex { get; } + + public void AddTo(HashSet collector) + { + for (var i = 0; i < this.locations.Length; i++) + { + collector.Add(this.locations[i]); + } + } + + public CompositeTokenLocation ComposeWith(ITokenLocation other) + { + var currentLength = this.locations.Length; + switch (other) + { + case CompositeTokenLocation composite: + // We need to build a new array capable of storing both sets of locations + var additionLength = composite.locations.Length; + var newLocations = new TokenLocation[currentLength + additionLength]; + Array.Copy(this.locations, newLocations, currentLength); + Array.Copy(composite.locations, 0, newLocations, currentLength, additionLength); + + return new CompositeTokenLocation( + newLocations, + Math.Min(this.MinTokenIndex, composite.MinTokenIndex), + Math.Max(this.MaxTokenIndex, composite.MaxTokenIndex)); + + + case TokenLocation location: + // Just one more element to add + newLocations = new TokenLocation[currentLength + 1]; + Array.Copy(this.locations, newLocations, currentLength); + newLocations[currentLength] = location; + + var newTokenIndex = location.TokenIndex; + return new CompositeTokenLocation( + newLocations, + Math.Min(this.MinTokenIndex, newTokenIndex), + Math.Max(this.MaxTokenIndex, newTokenIndex)); + + default: + throw new InvalidOperationException($"Cannot compose a {nameof(TokenLocation)} with a {other.GetType().Name}"); + } + } + + /// + public bool Equals(ITokenLocation? other) + { + return other switch + { + CompositeTokenLocation composite => this.locations.SequenceEqual(composite.locations), + _ => false, + }; + } + + /// + public int CompareTo(ITokenLocation? other) + { + if (other is { } ITokenLocation) + { + var result = this.MinTokenIndex.CompareTo(other.MinTokenIndex); + if (result == 0) + { + result = this.MaxTokenIndex.CompareTo(other.MaxTokenIndex); + } + + return result; + } + + return -1; + } + + /// + public override string ToString() + { + return "Composite location - min:" + this.MinTokenIndex.ToString(CultureInfo.InvariantCulture) + + " max: " + this.MaxTokenIndex.ToString(CultureInfo.InvariantCulture); + } + } +} \ No newline at end of file diff --git a/src/Lifti.Core/Querying/CompositeTokenMatchLocation.cs b/src/Lifti.Core/Querying/CompositeTokenMatchLocation.cs deleted file mode 100644 index d05d91fd..00000000 --- a/src/Lifti.Core/Querying/CompositeTokenMatchLocation.cs +++ /dev/null @@ -1,82 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Globalization; -using System.Linq; - -namespace Lifti.Querying -{ - /// - /// Represents the result of a positional query intersection. This keeps track of all the locations - /// that matched positionally, giving quick access to the max and min locations for reference - /// in further positional intersections if needed. - /// - public readonly struct CompositeTokenMatchLocation : ITokenLocationMatch, IEquatable - { - private readonly ITokenLocationMatch leftToken; - private readonly ITokenLocationMatch rightToken; - private readonly Lazy minTokenIndex; - private readonly Lazy maxTokenIndex; - - /// - /// Constructs a new instance of . - /// - public CompositeTokenMatchLocation(ITokenLocationMatch leftToken, ITokenLocationMatch rightToken) - { - this.leftToken = leftToken; - this.rightToken = rightToken; - this.minTokenIndex = new Lazy(() => Math.Min(leftToken.MinTokenIndex, rightToken.MinTokenIndex)); - this.maxTokenIndex = new Lazy(() => Math.Max(leftToken.MaxTokenIndex, rightToken.MaxTokenIndex)); - } - - /// - public int MaxTokenIndex => this.maxTokenIndex.Value; - - /// - public int MinTokenIndex => this.minTokenIndex.Value; - - /// - public override bool Equals(object? obj) - { - return obj is CompositeTokenMatchLocation location && - this.Equals(location); - } - - /// - public override int GetHashCode() - { - return HashCode.Combine(this.leftToken, this.rightToken); - } - - /// - public IEnumerable GetLocations() - { - return this.leftToken.GetLocations().Concat(this.rightToken.GetLocations()); - } - - /// - public bool Equals(CompositeTokenMatchLocation other) - { - return this.leftToken.Equals(other.leftToken) && - this.rightToken.Equals(other.rightToken); - } - - /// - public static bool operator ==(CompositeTokenMatchLocation left, CompositeTokenMatchLocation right) - { - return left.Equals(right); - } - - /// - public static bool operator !=(CompositeTokenMatchLocation left, CompositeTokenMatchLocation right) - { - return !(left == right); - } - - /// - public override string ToString() - { - return "Composite location - min:" + this.MinTokenIndex.ToString(CultureInfo.InvariantCulture) + - " max: " + this.MaxTokenIndex.ToString(CultureInfo.InvariantCulture); - } - } -} \ No newline at end of file diff --git a/src/Lifti.Core/Querying/FieldMatch.cs b/src/Lifti.Core/Querying/FieldMatch.cs deleted file mode 100644 index d80988b5..00000000 --- a/src/Lifti.Core/Querying/FieldMatch.cs +++ /dev/null @@ -1,88 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Lifti.Querying -{ - /// - /// Describes a match that occurred for a query within a field. - /// - public readonly struct FieldMatch : IEquatable - { - /// - /// Constructs a new instance of from a . - /// - public FieldMatch(IndexedToken token) - { - this.FieldId = token.FieldId; - this.Locations = token.Locations.Select(l => (ITokenLocationMatch)new SingleTokenLocationMatch(l)).ToList(); - } - - /// - /// Constructs a new instance of . - /// - public FieldMatch(byte fieldId, IEnumerable locations) - { - this.FieldId = fieldId; - this.Locations = CreateLocationsList(locations); - } - - /// - /// Gets the id of the matched field. - /// - public byte FieldId { get; } - - /// - /// Gets the set of that describe where in the document the matches occurred. - /// - public IReadOnlyList Locations { get; } - - /// - /// Enumerates through all the and expands them to a set of s. - /// - public IReadOnlyList GetTokenLocations() - { - return this.Locations.SelectMany(l => l.GetLocations()) - .Distinct() - .OrderBy(l => l.TokenIndex) - .ToList(); - } - - /// - public override bool Equals(object? obj) - { - return obj is FieldMatch match && - this.Equals(match); - } - - /// - public override int GetHashCode() - { - return HashCode.Combine(this.FieldId, this.Locations); - } - - /// - public bool Equals(FieldMatch other) - { - return this.FieldId == other.FieldId && - this.Locations.SequenceEqual(other.Locations); - } - - /// - public static bool operator ==(FieldMatch left, FieldMatch right) - { - return left.Equals(right); - } - - /// - public static bool operator !=(FieldMatch left, FieldMatch right) - { - return !(left == right); - } - - private static List CreateLocationsList(IEnumerable matches) - { - return matches.OrderBy(x => x.MinTokenIndex).ToList(); - } - } -} diff --git a/src/Lifti.Core/Querying/FieldScoreBoostProvider.cs b/src/Lifti.Core/Querying/FieldScoreBoostProvider.cs new file mode 100644 index 00000000..919ed6ea --- /dev/null +++ b/src/Lifti.Core/Querying/FieldScoreBoostProvider.cs @@ -0,0 +1,45 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti.Querying +{ + /// + /// The default implementation of the interface. + /// + public class FieldScoreBoostProvider : IFieldScoreBoostProvider + { + private readonly Dictionary fieldBoosts; + + /// + /// Constructs a new instance of the class. + /// + /// + /// The to load the defined fields from. + /// + public FieldScoreBoostProvider(IIndexedFieldLookup fieldLookup) + { + if (fieldLookup is null) + { + throw new ArgumentNullException(nameof(fieldLookup)); + } + + this.fieldBoosts = fieldLookup.AllFieldNames + .Select(fieldName => fieldLookup.GetFieldInfo(fieldName)) + .ToDictionary(f => f.Id, f => f.ScoreBoost); + + this.fieldBoosts.Add(fieldLookup.DefaultField, 1D); + } + + /// + public double GetScoreBoost(byte fieldId) + { + if (!this.fieldBoosts.TryGetValue(fieldId, out var boost)) + { + throw new LiftiException(ExceptionMessages.UnknownField, fieldId); + } + + return boost; + } + } +} diff --git a/src/Lifti.Core/Querying/IFieldScoreBoostProvider.cs b/src/Lifti.Core/Querying/IFieldScoreBoostProvider.cs new file mode 100644 index 00000000..c3667a87 --- /dev/null +++ b/src/Lifti.Core/Querying/IFieldScoreBoostProvider.cs @@ -0,0 +1,13 @@ +namespace Lifti.Querying +{ + /// + /// Implemented by classes capable of providing the score boost for a given field. + /// + public interface IFieldScoreBoostProvider + { + /// + /// Gets the boost for the specified field. + /// + double GetScoreBoost(byte fieldId); + } +} diff --git a/src/Lifti.Core/Querying/IIndexNavigator.cs b/src/Lifti.Core/Querying/IIndexNavigator.cs index 7a2cbccd..f142fe40 100644 --- a/src/Lifti.Core/Querying/IIndexNavigator.cs +++ b/src/Lifti.Core/Querying/IIndexNavigator.cs @@ -11,30 +11,85 @@ public interface IIndexNavigator : IDisposable /// /// Gets a value indicating whether the navigator has matches exactly at its current position. /// - bool HasExactMatches { get; } - + bool HasExactMatches { get; } + + /// + /// Gets the index snapshot the navigator is currently navigating. + /// + IIndexSnapshot Snapshot { get; } + /// /// Enumerates all the tokens that are indexed under the current position in the navigator. This method can be used /// to reverse-engineer the words (tokens) that have been indexed. Note that this method will throw a /// if called after a bookmark obtained by is applied. /// - IEnumerable EnumerateIndexedTokens(); - + IEnumerable EnumerateIndexedTokens(); + + /// + /// Gets all the matches that are indexed under from where the navigator is located. + /// + /// + /// The current query context. + /// + /// + /// The document match collector to add the matches to. + /// + /// + /// The weighting to apply to the matched tokens. This can be used to adjust the resulting score for the match. + /// + void AddExactAndChildMatches(QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting = 1D); + /// - /// Gets all the items that are indexed under from where the navigator is located. + /// Gets all the matches that are indexed exactly at the point of the navigators current location. + /// + /// + /// The current query context. + /// + /// + /// The document match collector to add the matches to. + /// + /// + /// The weighting to apply to the matched tokens. This can be used to adjust the resulting score for the match. + /// + void AddExactMatches(QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting = 1D); + + /// + /// Gets all the matches that are indexed under from where the navigator is located. /// /// /// The weighting to apply to the matched tokens. This can be used to adjust the resulting score for the match. /// - IntermediateQueryResult GetExactAndChildMatches(double weighting = 1D); - + IntermediateQueryResult GetExactAndChildMatches(double weighting = 1D); + /// - /// Gets all the items that are indexed exactly at the point of the navigators current location. + /// Gets all the matches that are indexed exactly at the point of the navigators current location. /// /// /// The weighting to apply to the matched tokens. This can be used to adjust the resulting score for the match. /// - IntermediateQueryResult GetExactMatches(double weighting = 1D); + IntermediateQueryResult GetExactMatches(double weighting = 1D); + + /// + /// Gets all the matches that are indexed under from where the navigator is located. + /// + /// + /// The current query context. + /// + /// + /// The weighting to apply to the matched tokens. This can be used to adjust the resulting score for the match. + /// + IntermediateQueryResult GetExactAndChildMatches(QueryContext queryContext, double weighting = 1D); + + /// + /// Gets all the matches that are indexed exactly at the point of the navigators current location. + /// + /// + /// The current query context. + /// + /// + /// The weighting to apply to the matched tokens. This can be used to adjust the resulting score for the match. + /// + IntermediateQueryResult GetExactMatches(QueryContext queryContext, double weighting = 1D); /// /// Processes a single character, moving the navigator along the index. @@ -59,10 +114,15 @@ public interface IIndexNavigator : IDisposable /// Creates an for the current state of this instance. /// IIndexNavigatorBookmark CreateBookmark(); - + /// /// Enumerates all the characters that are available as options to process from the navigators current location. /// - IEnumerable EnumerateNextCharacters(); + IEnumerable EnumerateNextCharacters(); + + /// + /// Gets the number of exact matches that are indexed at the current location. + /// + int ExactMatchCount(); } } \ No newline at end of file diff --git a/src/Lifti.Core/Querying/IIndexNavigatorBookmark.cs b/src/Lifti.Core/Querying/IIndexNavigatorBookmark.cs index 3ee035e6..929aec2f 100644 --- a/src/Lifti.Core/Querying/IIndexNavigatorBookmark.cs +++ b/src/Lifti.Core/Querying/IIndexNavigatorBookmark.cs @@ -1,15 +1,17 @@ -namespace Lifti.Querying +using System; + +namespace Lifti.Querying { /// /// A bookmark for the state of an , allowing for subsequent query state /// to be reset to a certain point in time. /// - public interface IIndexNavigatorBookmark + public interface IIndexNavigatorBookmark : IDisposable { /// /// Resets the that this bookmark was created for to the navigation /// state it was in when this instance was created. /// - void Apply(); + void Apply(); } } \ No newline at end of file diff --git a/src/Lifti.Core/Querying/IScorer.cs b/src/Lifti.Core/Querying/IScorer.cs index b1332b35..6e4d8f97 100644 --- a/src/Lifti.Core/Querying/IScorer.cs +++ b/src/Lifti.Core/Querying/IScorer.cs @@ -6,20 +6,28 @@ namespace Lifti.Querying /// Provides methods for scoring search results. /// public interface IScorer - { - /// - /// Scores the set of that have been matched in the document. - /// - /// - /// The instances to score. - /// - /// - /// The weighting multiplier to apply to the score. - /// - /// - /// The represenations of the input . There will be a 1:1 - /// mapping of input -> output and the order will be preserved. - /// - IReadOnlyList Score(IReadOnlyList tokens, double weighting); + { + /// + /// Calculates the score for a match in a single document's field. + /// + /// + /// The total number of documents that matched the query. + /// + /// + /// The id of the document that the match is in. + /// + /// + /// The id of the field that the match is in. + /// + /// + /// The complete set of locations that the token was found in the document. + /// + /// + /// The weighting multiplier to apply to the score. + /// + /// + /// The score for the match. + /// + double CalculateScore(int totalMatchedDocuments, int documentId, byte fieldId, IReadOnlyList tokenLocations, double weighting); } } diff --git a/src/Lifti.Core/Querying/ITokenLocationMatch.cs b/src/Lifti.Core/Querying/ITokenLocationMatch.cs deleted file mode 100644 index 522f916c..00000000 --- a/src/Lifti.Core/Querying/ITokenLocationMatch.cs +++ /dev/null @@ -1,26 +0,0 @@ -using System.Collections.Generic; - -namespace Lifti.Querying -{ - /// - /// Describes information about a token matched whilst executing a query. - /// - public interface ITokenLocationMatch - { - /// - /// Gets the maximum index in the field that the token matched. - /// - int MaxTokenIndex { get; } - - /// - /// Gets the minimum index in the field that the token matched. - /// - int MinTokenIndex { get; } - - /// - /// Gets all the s at which this token matched. - /// - /// - IEnumerable GetLocations(); - } -} diff --git a/src/Lifti.Core/Querying/IndexNavigator.cs b/src/Lifti.Core/Querying/IndexNavigator.cs index 219c9d2c..66aeaeb7 100644 --- a/src/Lifti.Core/Querying/IndexNavigator.cs +++ b/src/Lifti.Core/Querying/IndexNavigator.cs @@ -1,39 +1,58 @@ using System; -using System.Buffers; using System.Collections.Generic; -using System.Collections.Immutable; using System.Diagnostics; -using System.Linq; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; namespace Lifti.Querying { internal sealed class IndexNavigator : IIndexNavigator - { - private readonly StringBuilder navigatedWith = new StringBuilder(16); + { + private readonly Queue bookmarkPool = new(10); + private readonly StringBuilder navigatedWith = new(16); private IIndexNavigatorPool? pool; - private IScorer? scorer; + private IScorer? scorer; + private IIndexSnapshot? snapshot; + private IndexNode? currentNode; private int intraNodeTextPosition; private bool bookmarkApplied; - internal void Initialize(IndexNode node, IIndexNavigatorPool pool, IScorer scorer) + internal void Initialize(IIndexSnapshot indexSnapshot, IIndexNavigatorPool pool, IScorer scorer) { this.pool = pool; - this.scorer = scorer; - this.currentNode = node; + this.scorer = scorer; + this.snapshot = indexSnapshot; + this.currentNode = indexSnapshot.Root; this.intraNodeTextPosition = 0; this.navigatedWith.Length = 0; this.bookmarkApplied = false; } - - private bool HasIntraNodeTextLeftToProcess => this.currentNode != null && this.intraNodeTextPosition < this.currentNode.IntraNodeText.Length; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool HasIntraNodeTextLeftToProcess(int intraNodeTextPosition, IndexNode node) => intraNodeTextPosition < node.IntraNodeText.Length; + + public int ExactMatchCount() + { + return this.HasExactMatches ? this.currentNode!.Matches.Count : 0; + } + + /// + public IIndexSnapshot Snapshot + { + get + { + return this.snapshot ?? throw new LiftiException(ExceptionMessages.NoSnapshotInitialized); + } + } public bool HasExactMatches { get { - if (this.currentNode == null || this.HasIntraNodeTextLeftToProcess || !this.currentNode.HasMatches) + if (this.currentNode == null || HasIntraNodeTextLeftToProcess(this.intraNodeTextPosition, this.currentNode) || !this.currentNode.HasMatches) { return false; } @@ -41,64 +60,61 @@ public bool HasExactMatches return this.currentNode.HasMatches; } } + + public IntermediateQueryResult GetExactMatches(double weighting = 1D) + { + return this.GetExactMatches(QueryContext.Empty, weighting); + } - public IntermediateQueryResult GetExactMatches(double weighting = 1D) + public IntermediateQueryResult GetExactMatches(QueryContext queryContext, double weighting = 1D) { - if (this.currentNode == null || this.HasIntraNodeTextLeftToProcess || !this.currentNode.HasMatches) + if (this.currentNode == null || HasIntraNodeTextLeftToProcess(this.intraNodeTextPosition, this.currentNode) || !this.currentNode.HasMatches) { return IntermediateQueryResult.Empty; } - - var matches = this.currentNode.Matches.Select(CreateQueryTokenMatch); - - return CreateIntermediateQueryResult(matches, weighting); - } - - public IntermediateQueryResult GetExactAndChildMatches(double weighting = 1D) - { + + var collector = new DocumentMatchCollector(); + + this.AddExactMatches(this.currentNode, queryContext, collector, weighting); + + return collector.ToIntermediateQueryResult(); + } + + public void AddExactMatches(QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting = 1D) + { + if (this.currentNode == null || HasIntraNodeTextLeftToProcess(this.intraNodeTextPosition, this.currentNode) || !this.currentNode.HasMatches) + { + return; + } + + this.AddExactMatches(this.currentNode, queryContext, documentMatchCollector, weighting); + } + + public IntermediateQueryResult GetExactAndChildMatches(double weighting = 1D) + { + return this.GetExactAndChildMatches(QueryContext.Empty, weighting); + } + + public IntermediateQueryResult GetExactAndChildMatches(QueryContext queryContext, double weighting = 1D) + { if (this.currentNode == null) { return IntermediateQueryResult.Empty; } - var matches = new Dictionary>(); - var childNodeStack = new Queue(); - childNodeStack.Enqueue(this.currentNode); - - while (childNodeStack.Count > 0) - { - var node = childNodeStack.Dequeue(); - if (node.HasMatches) - { - foreach (var match in node.Matches) - { - var fieldMatches = match.Value.Select(v => new FieldMatch(v)); - if (!matches.TryGetValue(match.Key, out var mergedItemResults)) - { - mergedItemResults = new List(fieldMatches); - matches[match.Key] = mergedItemResults; - } - else - { - mergedItemResults.AddRange(fieldMatches); - } - } - } - - if (node.HasChildNodes) - { - foreach (var childNode in node.ChildNodes.Values) - { - childNodeStack.Enqueue(childNode); - } - } - } - - var queryTokenMatches = matches.Select(m => new QueryTokenMatch( - m.Key, - MergeItemMatches(m.Value).ToList())); - - return CreateIntermediateQueryResult(queryTokenMatches, weighting); + var collector = new DocumentMatchCollector(); + + this.AddExactAndChildMatches(this.currentNode, queryContext, collector, weighting); + + return collector.ToIntermediateQueryResult(); + } + + public void AddExactAndChildMatches(QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting = 1) + { + if (this.currentNode != null) + { + this.AddExactAndChildMatches(this.currentNode, queryContext, documentMatchCollector, weighting); + } } public bool Process(string text) @@ -131,7 +147,7 @@ public bool Process(char value) this.navigatedWith.Append(value); } - if (this.HasIntraNodeTextLeftToProcess) + if (HasIntraNodeTextLeftToProcess(this.intraNodeTextPosition, this.currentNode)) { if (value == this.currentNode.IntraNodeText.Span[this.intraNodeTextPosition]) { @@ -188,24 +204,32 @@ public IEnumerable EnumerateIndexedTokens() public IEnumerable EnumerateNextCharacters() { if (this.currentNode != null) - { - if (this.HasIntraNodeTextLeftToProcess) - { - yield return this.currentNode.IntraNodeText.Span[this.intraNodeTextPosition]; - } - else if (this.currentNode.HasChildNodes) - { - foreach (var character in this.currentNode.ChildNodes.Keys) - { - yield return character; - } - } - } - } - + { + if (HasIntraNodeTextLeftToProcess(this.intraNodeTextPosition, this.currentNode)) + { + return MemoryMarshal.ToEnumerable(this.currentNode.IntraNodeText.Slice(this.intraNodeTextPosition, 1)); + } + else if (this.currentNode.HasChildNodes) + { + return this.currentNode.ChildNodes.CharacterMap.Select(static x => x.ChildChar); + } + } + + return Array.Empty(); + } + public IIndexNavigatorBookmark CreateBookmark() - { - return new IndexNavigatorBookmark(this); + { + var bookmark = this.GetCachedBookmarkOrCreate(); + bookmark.Capture(); + return bookmark; + } + + private IndexNavigatorBookmark GetCachedBookmarkOrCreate() + { + return this.bookmarkPool.Count == 0 + ? new IndexNavigatorBookmark(this) + : this.bookmarkPool.Dequeue(); } public void Dispose() @@ -219,18 +243,6 @@ public void Dispose() this.pool.Return(this); } - private IntermediateQueryResult CreateIntermediateQueryResult(IEnumerable matches, double weighting) - { - if (this.scorer == null) - { - throw new InvalidOperationException(ExceptionMessages.NoScorerInitialized); - } - - var matchList = matches as IReadOnlyList ?? matches.ToList(); - var scoredMatches = this.scorer.Score(matchList, weighting); - return new IntermediateQueryResult(scoredMatches); - } - private IEnumerable EnumerateIndexedTokens(IndexNode node) { if (node.IntraNodeText.Length > 0) @@ -245,10 +257,10 @@ private IEnumerable EnumerateIndexedTokens(IndexNode node) if (node.HasChildNodes) { - foreach (var childNode in node.ChildNodes) + foreach (var (character, childNode) in node.ChildNodes.CharacterMap) { - this.navigatedWith.Append(childNode.Key); - foreach (var result in this.EnumerateIndexedTokens(childNode.Value)) + this.navigatedWith.Append(character); + foreach (var result in this.EnumerateIndexedTokens(childNode)) { yield return result; } @@ -261,45 +273,112 @@ private IEnumerable EnumerateIndexedTokens(IndexNode node) { this.navigatedWith.Length -= node.IntraNodeText.Length; } - } - - private static IEnumerable MergeItemMatches(List fieldMatches) - { - return fieldMatches.ToLookup(m => m.FieldId) - .Select(m => new FieldMatch( - m.Key, - m.SelectMany(w => w.Locations))); - } + } + + private void AddExactAndChildMatches(IndexNode startNode, QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting) + { + var childNodeStack = new Queue(); + childNodeStack.Enqueue(startNode); - private static QueryTokenMatch CreateQueryTokenMatch( - KeyValuePair> match) - { - return new QueryTokenMatch( - match.Key, - match.Value.Select(v => new FieldMatch(v)).ToList()); - } + while (childNodeStack.Count > 0) + { + var node = childNodeStack.Dequeue(); + if (node.HasMatches) + { + AddExactMatches(node, queryContext, documentMatchCollector, weighting); + } - internal readonly struct IndexNavigatorBookmark : IIndexNavigatorBookmark, IEquatable + if (node.HasChildNodes) + { + foreach (var (_, childNode) in node.ChildNodes.CharacterMap) + { + childNodeStack.Enqueue(childNode); + } + } + } + } + + private void AddExactMatches(IndexNode node, QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting) + { + if (this.scorer == null) + { + throw new InvalidOperationException(ExceptionMessages.NoScorerInitialized); + } + + var documentMatches = node.Matches.Enumerate(); + if (queryContext.FilterToDocumentIds != null) + { + documentMatches = documentMatches.Where(m => queryContext.FilterToDocumentIds.Contains(m.documentId)); + } + + var filterToFieldId = queryContext.FilterToFieldId; + var matchedDocumentCount = node.Matches.Count; + var scorer = this.scorer; + foreach (var (documentId, indexedTokens) in documentMatches) + { + foreach (var indexedToken in indexedTokens) + { + var fieldId = indexedToken.FieldId; + if (filterToFieldId.HasValue && filterToFieldId.GetValueOrDefault() != fieldId) + { + continue; + } + + var score = scorer.CalculateScore( + matchedDocumentCount, + documentId, + fieldId, + indexedToken.Locations, + weighting); + + documentMatchCollector.Add(documentId, fieldId, indexedToken.Locations, score); + } + } + } + + internal sealed class IndexNavigatorBookmark : IIndexNavigatorBookmark, IEquatable { private readonly IndexNavigator indexNavigator; - private readonly IndexNode? currentNode; - private readonly int intraNodeTextPosition; + private IndexNode? currentNode; + private int intraNodeTextPosition; + private bool disposed; public IndexNavigatorBookmark(IndexNavigator indexNavigator) { - this.currentNode = indexNavigator.currentNode; - this.intraNodeTextPosition = indexNavigator.intraNodeTextPosition; this.indexNavigator = indexNavigator; + } + + public void Capture() + { + this.currentNode = indexNavigator.currentNode; + this.intraNodeTextPosition = indexNavigator.intraNodeTextPosition; + this.disposed = false; } /// public void Apply() - { - this.indexNavigator.bookmarkApplied = true; - this.indexNavigator.currentNode = this.currentNode; - this.indexNavigator.intraNodeTextPosition = this.intraNodeTextPosition; - } - + { + if (this.disposed) + { + throw new LiftiException(ExceptionMessages.BookmarkDisposed); + } + + var indexNavigator = this.indexNavigator; + indexNavigator.bookmarkApplied = true; + indexNavigator.currentNode = this.currentNode; + indexNavigator.intraNodeTextPosition = this.intraNodeTextPosition; + } + + public void Dispose() + { + if (this.indexNavigator.bookmarkPool.Count < 10) + { + this.indexNavigator.bookmarkPool.Enqueue(this); + } + + this.disposed = true; + } + public override bool Equals(object? obj) { if (obj is IndexNavigatorBookmark other) @@ -312,15 +391,15 @@ public override bool Equals(object? obj) public override int GetHashCode() { - return HashCode.Combine(this.indexNavigator, this.currentNode, this.intraNodeTextPosition); + return HashCode.Combine(this.currentNode, this.intraNodeTextPosition); } - public bool Equals(IndexNavigatorBookmark bookmark) + public bool Equals(IndexNavigatorBookmark? bookmark) { - return this.indexNavigator == bookmark.indexNavigator && + return bookmark != null && this.currentNode == bookmark.currentNode && this.intraNodeTextPosition == bookmark.intraNodeTextPosition; } - } + } } } diff --git a/src/Lifti.Core/Querying/IndexNavigatorPool.cs b/src/Lifti.Core/Querying/IndexNavigatorPool.cs index 631f7c45..d31a2cdd 100644 --- a/src/Lifti.Core/Querying/IndexNavigatorPool.cs +++ b/src/Lifti.Core/Querying/IndexNavigatorPool.cs @@ -4,7 +4,7 @@ namespace Lifti.Querying { internal sealed class IndexNavigatorPool : IIndexNavigatorPool { - private readonly ConcurrentBag pool = new ConcurrentBag(); + private readonly ConcurrentBag pool = []; private readonly IIndexScorerFactory scorer; public IndexNavigatorPool(IIndexScorerFactory scorer) @@ -14,14 +14,12 @@ public IndexNavigatorPool(IIndexScorerFactory scorer) public IIndexNavigator Create(IIndexSnapshot indexSnapshot) { - var node = indexSnapshot.Root; - if (!this.pool.TryTake(out var navigator)) { navigator = new IndexNavigator(); } - navigator.Initialize(node, this, this.scorer.CreateIndexScorer(indexSnapshot)); + navigator.Initialize(indexSnapshot, this, this.scorer.CreateIndexScorer(indexSnapshot)); return navigator; } diff --git a/src/Lifti.Core/Querying/IntermediateQueryResult.cs b/src/Lifti.Core/Querying/IntermediateQueryResult.cs index cf39b4d7..888cd45c 100644 --- a/src/Lifti.Core/Querying/IntermediateQueryResult.cs +++ b/src/Lifti.Core/Querying/IntermediateQueryResult.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Linq; using System.Text; @@ -9,42 +10,81 @@ namespace Lifti.Querying /// A partial search result that can subsequently be combined with other instances /// materialized as part of a query. /// - [System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1815:Override equals and operator equals on value types", Justification = "Should not be compared")] - public readonly struct IntermediateQueryResult + public readonly struct IntermediateQueryResult : IEquatable { - /// - /// Creates a new instance of . - /// - public IntermediateQueryResult(IEnumerable matches) + internal IntermediateQueryResult(List matches, bool assumeSorted) { - this.Matches = matches as IReadOnlyList ?? matches.ToList(); + if (!assumeSorted) + { + matches.Sort((x, y) => x.DocumentId.CompareTo(y.DocumentId)); + } + + this.Matches = matches; + +#if DEBUG + // Verify that we are in document id order, and that there are no duplicates + for (var i = 0; i < this.Matches.Count; i++) + { + if (i > 0) + { + var previous = this.Matches[i - 1].DocumentId; + var next = this.Matches[i].DocumentId; + if (previous > next) + { + System.Diagnostics.Debug.Fail("Intermediate query results must be in document id order"); + } + else if (previous == next) + { + System.Diagnostics.Debug.Fail("Duplicate document id encountered in intermediate query results"); + } + } + } +#endif } /// /// Gets an with no matches. /// - public static IntermediateQueryResult Empty { get; } = new IntermediateQueryResult(Array.Empty()); + public static IntermediateQueryResult Empty { get; } = new IntermediateQueryResult([], true); /// /// Gets the set of matches that this instance captured. /// - public IReadOnlyList Matches { get; } + public IReadOnlyList Matches { get; } /// /// Intersects this and the specified instance, but only when the positions of the matched tokens on the left are preceding the tokens on the right. /// public IntermediateQueryResult PrecedingIntersect(IntermediateQueryResult results) { - return new IntermediateQueryResult(PrecedingIntersectMerger.Apply(this, results)); + // If either of the two results sets involved are empty, then there is no intersection, so + // we can just return an empty result set + if (this.Matches.Count == 0 || results.Matches.Count == 0) + { + return Empty; + } + + return new IntermediateQueryResult( + PrecedingIntersectMerger.Apply(this, results), + true); } /// /// Intersects this and the specified instance, but only when the positions of the matched tokens are within a given tolerance. Matching tokens are combined - /// into instances. + /// into instances. /// public IntermediateQueryResult CompositePositionalIntersect(IntermediateQueryResult results, int leftTolerance, int rightTolerance) { - return new IntermediateQueryResult(CompositePositionalIntersectMerger.Apply(this, results, leftTolerance, rightTolerance)); + // If either of the two results sets involved are empty, then there is no intersection, so + // we can just return an empty result set + if (this.Matches.Count == 0 || results.Matches.Count == 0) + { + return Empty; + } + + return new IntermediateQueryResult( + CompositePositionalIntersectMerger.Apply(this, results, leftTolerance, rightTolerance), + true); } /// @@ -52,7 +92,16 @@ public IntermediateQueryResult CompositePositionalIntersect(IntermediateQueryRes /// public IntermediateQueryResult Intersect(IntermediateQueryResult results) { - return new IntermediateQueryResult(IntersectMerger.Apply(this, results)); + // If either of the two results sets involved are empty, then there is no intersection, so + // we can just return an empty result set + if (this.Matches.Count == 0 || results.Matches.Count == 0) + { + return Empty; + } + + return new IntermediateQueryResult( + IntersectMerger.Apply(this, results), + true); } /// @@ -60,7 +109,21 @@ public IntermediateQueryResult Intersect(IntermediateQueryResult results) /// public IntermediateQueryResult Union(IntermediateQueryResult results) { - return new IntermediateQueryResult(UnionMerger.Apply(this, results)); + // We can shortcut the unioning logic if either of the two results sets involved are empty + // In this case we can just return the other result set + if (this.Matches.Count == 0) + { + return results; + } + + if (results.Matches.Count == 0) + { + return this; + } + + return new IntermediateQueryResult( + UnionMerger.Apply(this, results), + true); } /// @@ -73,6 +136,51 @@ public override string ToString() } return builder.ToString(); - } + } + + internal HashSet ToDocumentIdLookup() + { + var lookup = new HashSet(); +#if !NETSTANDARD + lookup.EnsureCapacity(this.Matches.Count); +#endif + for (var i = 0; i < this.Matches.Count; i++) + { + lookup.Add(this.Matches[i].DocumentId); + } + + return lookup; + } + + /// + public override int GetHashCode() + { + return HashCode.Combine(this.Matches); + } + + /// + public override bool Equals([NotNullWhen(true)] object? obj) + { + return obj is IntermediateQueryResult result && + this.Equals(result); + } + + /// + public bool Equals(IntermediateQueryResult other) + { + return this.Matches.SequenceEqual(other.Matches); + } + + /// + public static bool operator ==(IntermediateQueryResult left, IntermediateQueryResult right) + { + return left.Equals(right); + } + + /// + public static bool operator !=(IntermediateQueryResult left, IntermediateQueryResult right) + { + return !(left == right); + } } } diff --git a/src/Lifti.Core/Querying/IntermediateQueryResultMerger.cs b/src/Lifti.Core/Querying/IntermediateQueryResultMerger.cs index 3271d3e1..38102acb 100644 --- a/src/Lifti.Core/Querying/IntermediateQueryResultMerger.cs +++ b/src/Lifti.Core/Querying/IntermediateQueryResultMerger.cs @@ -1,12 +1,11 @@ using System.Collections.Generic; -using System.Linq; namespace Lifti.Querying { /// /// A base helper class for classes capable of merging two instances. /// - public abstract class IntermediateQueryResultMerger + internal abstract class IntermediateQueryResultMerger { /// /// Performs an inner join on two sets of field results. @@ -18,74 +17,105 @@ public abstract class IntermediateQueryResultMerger /// * leftLocations: The locations from the left match /// * rightLocations: The locations from the right match /// - protected static IList<( + internal static IEnumerable<( byte fieldId, double score, - IReadOnlyList leftLocations, - IReadOnlyList rightLocations + IReadOnlyList leftLocations, + IReadOnlyList rightLocations )> JoinFields( - IEnumerable leftFields, - IEnumerable rightFields) => leftFields.Join( - rightFields, - o => o.FieldId, - o => o.FieldId, - (inner, outer) => ( - fieldId: inner.FieldId, - score: inner.Score + outer.Score, - leftLocations: inner.Locations, - rightLocations: outer.Locations)) - .ToList(); + IReadOnlyList leftFields, + IReadOnlyList rightFields) + { + var leftIndex = 0; + var rightIndex = 0; + + var leftCount = leftFields.Count; + var rightCount = rightFields.Count; + + while (leftIndex < leftCount && rightIndex < rightCount) + { + var leftField = leftFields[leftIndex]; + var rightField = rightFields[rightIndex]; + + if (leftField.FieldId == rightField.FieldId) + { + yield return ( + leftField.FieldId, + leftField.Score + rightField.Score, + leftField.Locations, + rightField.Locations); + + leftIndex++; + rightIndex++; + } + else if (leftField.FieldId < rightField.FieldId) + { + leftIndex++; + } + else + { + rightIndex++; + } + } + } /// /// Merges the matched locations in two s. Field matches that only appear in one or the other /// are included but unaltered, field matches appearing in both s /// are unioned. /// - protected static IEnumerable MergeFields(ScoredToken leftMatch, ScoredToken rightMatch) + protected static IReadOnlyList MergeFields(ScoredToken left, ScoredToken right) { - // We will always iterate through the total number of merged field records, so we want to optimise - // for the smallest number of fields on the right to keep the dictionary as small as possible - SwapIf(leftMatch.FieldMatches.Count < rightMatch.FieldMatches.Count, ref leftMatch, ref rightMatch); + var leftIndex = 0; + var rightIndex = 0; - var rightFields = rightMatch.FieldMatches.ToDictionary(m => m.FieldId); + var leftMatches = left.FieldMatches; + var rightMatches = right.FieldMatches; + var leftCount = leftMatches.Count; + var rightCount = rightMatches.Count; + + List results = new(leftCount + rightCount); - foreach (var leftField in leftMatch.FieldMatches) + while (leftIndex < leftCount && rightIndex < rightCount) { - if (rightFields.TryGetValue(leftField.FieldId, out var rightField)) - { - yield return new ScoredFieldMatch( - leftField.Score + rightField.Score, - new FieldMatch( - leftField.FieldId, - leftField.Locations.Concat(rightField.Locations))); + var leftField = leftMatches[leftIndex]; + var rightField = rightMatches[rightIndex]; - rightFields.Remove(leftField.FieldId); + if (leftField.FieldId == rightField.FieldId) + { + results.Add(ScoredFieldMatch.Merge(leftField, rightField)); + + leftIndex++; + rightIndex++; + } + else if (leftField.FieldId < rightField.FieldId) + { + results.Add(leftField); + leftIndex++; } else { - yield return leftField; + results.Add(rightField); + rightIndex++; } } - // Return any remaining right fields - foreach (var rightField in rightFields.Values) + // Add any remaining matches from the left + while (leftIndex < leftCount) { - yield return rightField; + results.Add(leftMatches[leftIndex]); + leftIndex++; } - } - /// - /// A helper method to swap two fields when is true. - /// - protected static void SwapIf(bool condition, ref T left, ref T right) - { - if (condition) + // Add any remaining matches from the right + while (rightIndex < rightCount) { - var temp = left; - left = right; - right = temp; - } - } + results.Add(rightMatches[rightIndex]); + rightIndex++; + } + + return results; + } } } diff --git a/src/Lifti.Core/Querying/IntersectMerger.cs b/src/Lifti.Core/Querying/IntersectMerger.cs index 3680e248..f7738591 100644 --- a/src/Lifti.Core/Querying/IntersectMerger.cs +++ b/src/Lifti.Core/Querying/IntersectMerger.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; namespace Lifti.Querying @@ -6,28 +7,51 @@ namespace Lifti.Querying /// /// Provides logic for intersecting the results in two s. /// - public class IntersectMerger : IntermediateQueryResultMerger + internal sealed class IntersectMerger : IntermediateQueryResultMerger { /// /// Applies the intersection to the two s. /// - public static IEnumerable Apply(IntermediateQueryResult left, IntermediateQueryResult right) + public static List Apply(IntermediateQueryResult left, IntermediateQueryResult right) { - // Swap over left and right to ensure we're performing as few iterations as possible in the intersection - // The trade-off here is that we're building a larger dictionary - SwapIf(left.Matches.Count > right.Matches.Count, ref left, ref right); + // track two pointers through the lists on each side. The document ids are ordered on both sides, so we can + // move through the lists in a single pass - var rightItems = right.Matches.ToDictionary(m => m.ItemId); + var leftIndex = 0; + var rightIndex = 0; - foreach (var leftMatch in left.Matches) + var leftMatches = left.Matches; + var rightMatches = right.Matches; + var leftCount = leftMatches.Count; + var rightCount = rightMatches.Count; + + var results = new List(Math.Min(leftCount, rightCount)); + + while (leftIndex < leftCount && rightIndex < rightCount) { - if (rightItems.TryGetValue(leftMatch.ItemId, out var rightMatch)) + var leftMatch = leftMatches[leftIndex]; + var rightMatch = rightMatches[rightIndex]; + + if (leftMatch.DocumentId == rightMatch.DocumentId) + { + results.Add(new ScoredToken( + leftMatch.DocumentId, + MergeFields(leftMatch, rightMatch))); + + leftIndex++; + rightIndex++; + } + else if (leftMatch.DocumentId < rightMatch.DocumentId) + { + leftIndex++; + } + else { - yield return new ScoredToken( - leftMatch.ItemId, - MergeFields(leftMatch, rightMatch).ToList()); + rightIndex++; } - } + } + + return results; } } -} +} \ No newline at end of file diff --git a/src/Lifti.Core/Querying/OkapiBm25Scorer.cs b/src/Lifti.Core/Querying/OkapiBm25Scorer.cs index c4b5edc4..a93efffa 100644 --- a/src/Lifti.Core/Querying/OkapiBm25Scorer.cs +++ b/src/Lifti.Core/Querying/OkapiBm25Scorer.cs @@ -1,87 +1,102 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; namespace Lifti.Querying -{ +{ /// /// An implementation of the Okapi BM-25 scorer. /// internal class OkapiBm25Scorer : IScorer { - private readonly Dictionary averageTokenCountByField; + private readonly Dictionary averageTokenCountByField; + private readonly ConcurrentDictionary<(int documentId, byte fieldId), (double scoreBoost, double tokensInDocumentWeighting)> documentFieldCache = new(); + private readonly ConcurrentDictionary idfCache = new(); private readonly double documentCount; private readonly double k1; private readonly double k1PlusOne; private readonly double b; - private readonly IItemStore snapshot; - + private readonly IIndexMetadata indexMetadata; + private readonly IFieldScoreBoostProvider fieldScoreBoosts; + /// /// Constructs a new instance of the . /// /// The "k1" parameter for the scorer. /// The "b" parameter for the scorer. - /// - /// The of the index snapshot being queried. + /// + /// The of the index snapshot being queried. + /// + /// + /// The to use to get the score boost for a field. /// - internal OkapiBm25Scorer(double k1, double b, IItemStore snapshot) + internal OkapiBm25Scorer(double k1, double b, IIndexMetadata indexMetadata, IFieldScoreBoostProvider fieldScoreBoosts) { - if (snapshot is null) + if (indexMetadata is null) { - throw new ArgumentNullException(nameof(snapshot)); + throw new ArgumentNullException(nameof(indexMetadata)); } - var documentCount = (double)snapshot.Count; - this.averageTokenCountByField = snapshot.IndexStatistics.TokenCountByField.ToDictionary(k => k.Key, k => k.Value / documentCount); + var documentCount = (double)indexMetadata.DocumentCount; + this.averageTokenCountByField = indexMetadata.IndexStatistics.TokenCountByField.ToDictionary(k => k.Key, k => k.Value / documentCount); this.documentCount = documentCount; this.k1 = k1; this.k1PlusOne = k1 + 1D; this.b = b; - this.snapshot = snapshot; - } - - /// - public IReadOnlyList Score(IReadOnlyList tokenMatches, double weighting) - { - if (tokenMatches is null) - { - throw new ArgumentNullException(nameof(tokenMatches)); - } - - var idf = CalculateInverseDocumentFrequency(tokenMatches); - - return tokenMatches.Select(t => - { - var itemTokenCounts = this.snapshot.GetMetadata(t.ItemId).DocumentStatistics.TokenCountByField; - var scoredFieldMatches = new List(t.FieldMatches.Count); - foreach (var fieldMatch in t.FieldMatches) - { - var frequencyInDocument = fieldMatch.Locations.Count; - var fieldId = fieldMatch.FieldId; - var tokensInDocument = itemTokenCounts[fieldId]; - var tokensInDocumentWeighting = tokensInDocument / this.averageTokenCountByField[fieldId]; - - var numerator = frequencyInDocument * this.k1PlusOne; - var denominator = frequencyInDocument + this.k1 * (1 - this.b + this.b * tokensInDocumentWeighting); - - var fieldScore = idf * (numerator / denominator); - - var weightedScore = fieldScore * weighting; - - scoredFieldMatches.Add(new ScoredFieldMatch(weightedScore, fieldMatch)); - } - - return new ScoredToken(t.ItemId, scoredFieldMatches); - }).ToList(); + this.indexMetadata = indexMetadata; + this.fieldScoreBoosts = fieldScoreBoosts; } + + public double CalculateScore(int totalMatchedDocuments, int documentId, byte fieldId, IReadOnlyList tokenLocations, double weighting) + { + var idf = this.CalculateInverseDocumentFrequency(totalMatchedDocuments); + + double scoreBoost; + double tokensInDocumentWeighting; + if (this.documentFieldCache.TryGetValue((documentId, fieldId), out var cacheEntry)) + { + (scoreBoost, tokensInDocumentWeighting) = cacheEntry; + } + else + { + var documentMetadata = this.indexMetadata.GetDocumentMetadata(documentId); + var documentTokenCounts = documentMetadata.DocumentStatistics.TokenCountByField; + var tokensInDocument = documentTokenCounts[fieldId]; + tokensInDocumentWeighting = tokensInDocument / this.averageTokenCountByField[fieldId]; + + // We can cache the score boost for the field and object type (if applicable) because it won't + // change for the lifetime of the associated index snapshot. + scoreBoost = this.fieldScoreBoosts.GetScoreBoost(fieldId); + if (documentMetadata.ObjectTypeId is { } objectTypeId) + { + var objectScoreBoostMetadata = this.indexMetadata.GetObjectTypeScoreBoostMetadata(objectTypeId); + scoreBoost *= objectScoreBoostMetadata.CalculateScoreBoost(documentMetadata); + } + + this.documentFieldCache.TryAdd((documentId, fieldId), (scoreBoost, tokensInDocumentWeighting)); + } + + var frequencyInDocument = tokenLocations.Count; + var numerator = frequencyInDocument * this.k1PlusOne; + var denominator = frequencyInDocument + (this.k1 * (1 - this.b + (this.b * tokensInDocumentWeighting))); + + var fieldScore = idf * (numerator / denominator); + + return fieldScore * weighting * scoreBoost; + } - private double CalculateInverseDocumentFrequency(IReadOnlyList tokens) - { - var tokenCount = tokens.Count; - var idf = (this.documentCount - tokenCount + 0.5D) - / (tokenCount + 0.5D); - - idf = Math.Log(1D + idf); + private double CalculateInverseDocumentFrequency(int matchedDocumentCount) + { + if (!this.idfCache.TryGetValue(matchedDocumentCount, out var idf)) + { + idf = (this.documentCount - matchedDocumentCount + 0.5D) + / (matchedDocumentCount + 0.5D); + + idf = Math.Log(1D + idf); + + this.idfCache.TryAdd(matchedDocumentCount, idf); + } return idf; } diff --git a/src/Lifti.Core/Querying/OkapiBm25ScorerFactory.cs b/src/Lifti.Core/Querying/OkapiBm25ScorerFactory.cs index 0bc3f23b..c95981e0 100644 --- a/src/Lifti.Core/Querying/OkapiBm25ScorerFactory.cs +++ b/src/Lifti.Core/Querying/OkapiBm25ScorerFactory.cs @@ -1,5 +1,5 @@ using System; - + namespace Lifti.Querying { /// @@ -8,7 +8,9 @@ namespace Lifti.Querying public class OkapiBm25ScorerFactory : IIndexScorerFactory { private readonly double k1; - private readonly double b; + private readonly double b; + + private (IIndexSnapshot snapshot, IScorer scorer)? cachedScorer; /// /// Constructs a new instance of the class. @@ -27,9 +29,23 @@ public IScorer CreateIndexScorer(IIndexSnapshot indexSnapshot) if (indexSnapshot is null) { throw new ArgumentNullException(nameof(indexSnapshot)); + } + + var cached = this.cachedScorer.GetValueOrDefault(); + if (cached.snapshot == indexSnapshot) + { + return cached.scorer; } + + var scorer = new OkapiBm25Scorer( + this.k1, + this.b, + indexSnapshot.Metadata, + new FieldScoreBoostProvider(indexSnapshot.FieldLookup)); - return new OkapiBm25Scorer(this.k1, this.b, indexSnapshot.Items); + this.cachedScorer = (indexSnapshot, scorer); + + return scorer; } - } + } } diff --git a/src/Lifti.Core/Querying/PrecedingIntersectMerger.cs b/src/Lifti.Core/Querying/PrecedingIntersectMerger.cs index 4e19fc89..ea660dc4 100644 --- a/src/Lifti.Core/Querying/PrecedingIntersectMerger.cs +++ b/src/Lifti.Core/Querying/PrecedingIntersectMerger.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; namespace Lifti.Querying @@ -7,46 +8,64 @@ namespace Lifti.Querying /// Provides logic for intersecting the results in two s where the fields /// locations on the left must precede the matching field locations on the right. /// - public class PrecedingIntersectMerger : IntermediateQueryResultMerger + internal sealed class PrecedingIntersectMerger : IntermediateQueryResultMerger { /// /// Applies the intersection logic. /// - public static IEnumerable Apply(IntermediateQueryResult left, IntermediateQueryResult right) + public static List Apply(IntermediateQueryResult left, IntermediateQueryResult right) { - // Swap over the variables to ensure we're performing as few iterations as possible in the intersection - // "left" and "right" have no special meaning when performing an intersection - var swapLeftAndRight = left.Matches.Count > right.Matches.Count; - SwapIf(swapLeftAndRight, ref left, ref right); + // track two pointers through the lists on each side. The document ids are ordered on both sides, so we can + // move through the lists in a single pass - var rightItems = right.Matches.ToDictionary(m => m.ItemId); + var leftIndex = 0; + var rightIndex = 0; - foreach (var leftMatch in left.Matches) + var leftMatches = left.Matches; + var rightMatches = right.Matches; + var leftCount = leftMatches.Count; + var rightCount = rightMatches.Count; + + var results = new List(Math.Min(leftCount, rightCount)); + + List positionalMatches = []; + while (leftIndex < leftCount && rightIndex < rightCount) { - if (rightItems.TryGetValue(leftMatch.ItemId, out var rightMatch)) - { - var positionalMatches = EnumerateFieldMatches( - (swapLeftAndRight ? rightMatch : leftMatch).FieldMatches, - (swapLeftAndRight ? leftMatch : rightMatch).FieldMatches); + var leftMatch = leftMatches[leftIndex]; + var rightMatch = rightMatches[rightIndex]; + if (leftMatch.DocumentId == rightMatch.DocumentId) + { + EnumerateFieldMatches(positionalMatches, leftMatch.FieldMatches, rightMatch.FieldMatches); if (positionalMatches.Count > 0) { - yield return new ScoredToken(leftMatch.ItemId, positionalMatches); + results.Add(new ScoredToken(leftMatch.DocumentId, positionalMatches.ToList())); + positionalMatches.Clear(); } + + leftIndex++; + rightIndex++; } - } + else if (leftMatch.DocumentId < rightMatch.DocumentId) + { + leftIndex++; + } + else + { + rightIndex++; + } + } + + return results; } - private static List EnumerateFieldMatches(IReadOnlyList leftFields, IReadOnlyList rightFields) + private static void EnumerateFieldMatches(List fieldResults, IReadOnlyList leftFields, IReadOnlyList rightFields) { var matchedFields = JoinFields(leftFields, rightFields); - var fieldResults = new List(matchedFields.Count); - var fieldTokenMatches = new List(); + var fieldTokenMatches = new List(); foreach (var (fieldId, score, leftLocations, rightLocations) in matchedFields) { - fieldTokenMatches.Clear(); - var furthestRightTokenStart = rightLocations[rightLocations.Count - 1].MinTokenIndex; var earliestLeftTokenStart = leftLocations[0].MinTokenIndex; @@ -67,13 +86,15 @@ private static List EnumerateFieldMatches(IReadOnlyList 0) { fieldResults.Add( - new ScoredFieldMatch( + ScoredFieldMatch.CreateFromUnsorted( score, - new FieldMatch(fieldId, fieldTokenMatches))); + fieldId, + // We need to copy the list here as we're going to reuse it + fieldTokenMatches)); + + fieldTokenMatches = []; } } - - return fieldResults; } } } diff --git a/src/Lifti.Core/Querying/Query.cs b/src/Lifti.Core/Querying/Query.cs index 0d330f5c..6542d9f9 100644 --- a/src/Lifti.Core/Querying/Query.cs +++ b/src/Lifti.Core/Querying/Query.cs @@ -41,7 +41,7 @@ public IEnumerable> Execute(IIndexSnapshot index) return Enumerable.Empty>(); } - var idLookup = index.Items; + var indexMetadata = index.Metadata; var fieldLookup = index.FieldLookup; var evaluationResult = this.Root.Evaluate(index.CreateNavigator, QueryContext.Empty); var matches = evaluationResult.Matches; @@ -49,27 +49,27 @@ public IEnumerable> Execute(IIndexSnapshot index) foreach (var match in matches) { - if (!results.TryGetValue(match.ItemId, out var itemResults)) + if (!results.TryGetValue(match.DocumentId, out var documentResults)) { - itemResults = new List(); - results[match.ItemId] = itemResults; + documentResults = []; + results[match.DocumentId] = documentResults; } - itemResults.AddRange(match.FieldMatches); + documentResults.AddRange(match.FieldMatches); } var searchResults = new List>(matches.Count); - foreach (var itemResults in matches) + foreach (var documentResults in matches) { - var item = idLookup.GetMetadata(itemResults.ItemId); + var documentMetadata = indexMetadata.GetDocumentMetadata(documentResults.DocumentId); searchResults.Add( new SearchResult( - item.Item, - itemResults.FieldMatches.Select(m => new FieldSearchResult( + documentMetadata.Key, + documentResults.FieldMatches.Select(m => new FieldSearchResult( fieldLookup.GetFieldForId(m.FieldId), m.Score, - m.FieldMatch.GetTokenLocations())) + m.GetTokenLocations())) .ToList())); } diff --git a/src/Lifti.Core/Querying/QueryContext.cs b/src/Lifti.Core/Querying/QueryContext.cs new file mode 100644 index 00000000..6af29644 --- /dev/null +++ b/src/Lifti.Core/Querying/QueryContext.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti.Querying +{ +#if NETSTANDARD + using DocumentIdSet = ISet; +#else + using DocumentIdSet = IReadOnlySet; +#endif + + /// + /// Manages context during the execution of a query, allowing for aspects like field filters to be applied. + /// + public sealed record QueryContext(byte? FilterToFieldId = null, DocumentIdSet? FilterToDocumentIds = null) + { + /// + /// Gets an empty query context. + /// + public static QueryContext Empty { get; } = new(); + } +} diff --git a/src/Lifti.Core/Querying/QueryParser.cs b/src/Lifti.Core/Querying/QueryParser.cs index 0a41aec2..5ae32afd 100644 --- a/src/Lifti.Core/Querying/QueryParser.cs +++ b/src/Lifti.Core/Querying/QueryParser.cs @@ -108,16 +108,20 @@ private IQueryPart CreateWordQueryPart(QueryToken queryToken) var indexTokenizer = queryToken.IndexTokenizer ?? throw new InvalidOperationException(ExceptionMessages.TextTokensMustHaveIndexTokenizers); var tokenText = queryToken.TokenText.AsSpan(); - + var scoreBoost = queryToken.ScoreBoost; var fuzzyMatchInfo = ExplicitFuzzySearchTerm.Parse(tokenText); - if (!fuzzyMatchInfo.IsFuzzyMatch && WildcardQueryPartParser.TryParse(tokenText, indexTokenizer, out var wildcardQueryPart)) + if (!fuzzyMatchInfo.IsFuzzyMatch && WildcardQueryPartParser.TryParse( + tokenText, + indexTokenizer, + scoreBoost, + out var wildcardQueryPart)) { return wildcardQueryPart; } // We hand off any matched text in the query to the tokenizer (either the index default, - // or the one associated to the specific field being queryied) because we need to ensure + // or the one associated to the specific field being queried) because we need to ensure // that it is: // a) Normalized in the same way as the tokens as they were added to the index // b) Any additional processing, e.g. stemming is applied to them @@ -129,12 +133,13 @@ private IQueryPart CreateWordQueryPart(QueryToken queryToken) token => new FuzzyMatchQueryPart( token.Value, fuzzyMatchInfo.MaxEditDistance ?? this.options.FuzzySearchMaxEditDistance(token.Value.Length), - fuzzyMatchInfo.MaxSequentialEdits ?? this.options.FuzzySearchMaxSequentialEdits(token.Value.Length))); + fuzzyMatchInfo.MaxSequentialEdits ?? this.options.FuzzySearchMaxSequentialEdits(token.Value.Length), + scoreBoost)); } else { result = indexTokenizer.Process(tokenText) - .Select(token => new ExactWordQueryPart(token.Value)); + .Select(token => new ExactWordQueryPart(token.Value, scoreBoost)); } return ComposeParts(result); diff --git a/src/Lifti.Core/Querying/QueryParserException.cs b/src/Lifti.Core/Querying/QueryParserException.cs index 1a07f087..bb65bc80 100644 --- a/src/Lifti.Core/Querying/QueryParserException.cs +++ b/src/Lifti.Core/Querying/QueryParserException.cs @@ -17,13 +17,15 @@ public class QueryParserException : LiftiException /// public QueryParserException(string message, params object[] args) : base(message, args) { } - + /// public QueryParserException(string message, Exception inner) : base(message, inner) { } +#if NETSTANDARD /// protected QueryParserException( SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } } diff --git a/src/Lifti.Core/Querying/QueryParts/AdjacentWordsQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/AdjacentWordsQueryOperator.cs index 3c08a257..edf04c3e 100644 --- a/src/Lifti.Core/Querying/QueryParts/AdjacentWordsQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/AdjacentWordsQueryOperator.cs @@ -6,8 +6,10 @@ namespace Lifti.Querying.QueryParts /// /// A query part requiring that a series of words must appear in a document in sequence. /// - public class AdjacentWordsQueryOperator : IQueryPart - { + public sealed class AdjacentWordsQueryOperator : IQueryPart + { + private double? weighting; + /// /// Constructs a new instance. /// @@ -15,7 +17,17 @@ public class AdjacentWordsQueryOperator : IQueryPart /// The s that must appear in sequence. /// public AdjacentWordsQueryOperator(IReadOnlyList words) - { + { + if (words is null) + { + throw new ArgumentNullException(nameof(words)); + } + + if (words.Count == 0) + { + throw new LiftiException(ExceptionMessages.EmptyAdjacentWordsQueryPart); + } + this.Words = words; } @@ -25,8 +37,13 @@ public AdjacentWordsQueryOperator(IReadOnlyList words) public IReadOnlyList Words { get; } /// - public IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) - { + public IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) + { + if (queryContext is null) + { + throw new ArgumentNullException(nameof(queryContext)); + } + var i = 0; var results = IntermediateQueryResult.Empty; do @@ -34,18 +51,31 @@ public IntermediateQueryResult Evaluate(Func navigatorCreator, var nextResults = this.Words[i].Evaluate(navigatorCreator, queryContext); if (results.Matches.Count == 0) { + // Special case the first word, as we don't want to intersect with the initial empty set results = nextResults; } else { results = results.CompositePositionalIntersect(nextResults, 0, 1); } + + // Filter subsequent words to only those that match the document ids we have so far + queryContext = queryContext with { FilterToDocumentIds = results.ToDocumentIdLookup() }; i++; } while (i < this.Words.Count && results.Matches.Count > 0); return results; + } + + /// + public double CalculateWeighting(Func navigatorCreator) + { + // Each result is intersected in sequence, so we approximate the total weighting by calculating + // the first and dividing by the number of words + this.weighting ??= this.Words[0].CalculateWeighting(navigatorCreator) / this.Words.Count; + return this.weighting.GetValueOrDefault(); } /// diff --git a/src/Lifti.Core/Querying/QueryParts/AndQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/AndQueryOperator.cs index a66f6978..11fa1a5e 100644 --- a/src/Lifti.Core/Querying/QueryParts/AndQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/AndQueryOperator.cs @@ -4,9 +4,9 @@ namespace Lifti.Querying.QueryParts { /// - /// A query part that filters matched items to only those matched as an intersection of two s. + /// A query part that filters matched documents to only those matched as an intersection of two s. /// - public class AndQueryOperator : BinaryQueryOperator + public sealed class AndQueryOperator : BinaryQueryOperator { /// /// Constructs a new instance of . @@ -20,10 +20,11 @@ public AndQueryOperator(IQueryPart left, IQueryPart right) public override OperatorPrecedence Precedence => OperatorPrecedence.And; /// - public override IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { - return this.Left.Evaluate(navigatorCreator, queryContext) - .Intersect(this.Right.Evaluate(navigatorCreator, queryContext)); + var (leftResults, rightResults) = this.EvaluateWithDocumentIntersection(navigatorCreator, queryContext); + + return leftResults.Intersect(rightResults); } /// diff --git a/src/Lifti.Core/Querying/QueryParts/BinaryQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/BinaryQueryOperator.cs index 077940d3..24fa436a 100644 --- a/src/Lifti.Core/Querying/QueryParts/BinaryQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/BinaryQueryOperator.cs @@ -1,4 +1,7 @@ -using System; +using Lifti.Querying.QueryParts; +using System; +using System.Collections.Generic; +using System.Linq; namespace Lifti.Querying.QueryParts { @@ -6,7 +9,9 @@ namespace Lifti.Querying.QueryParts /// An abstract representing a binary query operator, e.g. an AND or an OR operator. /// public abstract class BinaryQueryOperator : IBinaryQueryOperator - { + { + private double? weighting; + /// /// Constructs a new instance of . /// @@ -37,7 +42,64 @@ public IQueryPart Right } /// - public abstract IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext); + public abstract IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext); + + /// + public double CalculateWeighting(Func navigatorCreator) + { + this.weighting ??= this.RunWeightingCalculation(navigatorCreator); + return this.weighting.GetValueOrDefault(); + } + + /// + /// Runs the weighting calculation for this query part. + /// + protected virtual double RunWeightingCalculation(Func navigatorCreator) + { + // Most binary operators are intersections, so we will use the weighting of the cheapest side + return Math.Min(this.Left.CalculateWeighting(navigatorCreator), this.Right.CalculateWeighting(navigatorCreator)); + } + + /// + /// Evaluates the left and right sides of the query, attempting to optimise the results so that the second evaluation + /// is filtered to only those documents that matched the first. + /// + protected (IntermediateQueryResult leftResults, IntermediateQueryResult rightResults) EvaluateWithDocumentIntersection( + Func navigatorCreator, + QueryContext queryContext) + { + if (queryContext is null) + { + throw new ArgumentNullException(nameof(queryContext)); + } + + // Work out which side is cheapest to evaluate first + var leftWeighting = this.Left.CalculateWeighting(navigatorCreator); + var rightWeighting = this.Right.CalculateWeighting(navigatorCreator); + + IntermediateQueryResult leftResults; + IntermediateQueryResult rightResults; + + if (leftWeighting <= rightWeighting) + { + leftResults = this.Left.Evaluate(navigatorCreator, queryContext); + rightResults = this.Right.Evaluate( + navigatorCreator, + // Filter the right side to only those documents that matched the left side + queryContext with { FilterToDocumentIds = leftResults.ToDocumentIdLookup() }); + } + else + { + rightResults = this.Right.Evaluate(navigatorCreator, queryContext); + leftResults = this.Left.Evaluate( + navigatorCreator, + // Filter the left side to only those documents that matched the right side + queryContext with { FilterToDocumentIds = rightResults.ToDocumentIdLookup() }); + } + + + return (leftResults, rightResults); + } } } diff --git a/src/Lifti.Core/Querying/QueryParts/BracketedQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/BracketedQueryPart.cs index 5fce235d..62315ac0 100644 --- a/src/Lifti.Core/Querying/QueryParts/BracketedQueryPart.cs +++ b/src/Lifti.Core/Querying/QueryParts/BracketedQueryPart.cs @@ -5,7 +5,7 @@ namespace Lifti.Querying.QueryParts /// /// An that groups other statements together. /// - public class BracketedQueryPart : IQueryPart + public sealed class BracketedQueryPart : IQueryPart { /// /// Constructs a new instance of . @@ -25,9 +25,16 @@ public IQueryPart Statement } /// - public IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + public IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { return this.Statement.Evaluate(navigatorCreator, queryContext); + } + + /// + public double CalculateWeighting(Func navigatorCreator) + { + // Just defer to the weighting of the statement + return this.Statement.CalculateWeighting(navigatorCreator); } /// diff --git a/src/Lifti.Core/Querying/QueryParts/DoubleBufferedList.cs b/src/Lifti.Core/Querying/QueryParts/DoubleBufferedList.cs index c3c1c46d..e63a53d0 100644 --- a/src/Lifti.Core/Querying/QueryParts/DoubleBufferedList.cs +++ b/src/Lifti.Core/Querying/QueryParts/DoubleBufferedList.cs @@ -1,13 +1,15 @@ -using System; -using System.Collections; +using System.Collections; using System.Collections.Generic; namespace Lifti.Querying.QueryParts { + /// + /// A list that allows for mutations to take place on a separate list to the one being enumerated, and then swapped. + /// internal class DoubleBufferedList : IEnumerable { - private List current = new List(); - private List swap = new List(); + private List current = []; + private List swap = []; public DoubleBufferedList() { @@ -16,6 +18,11 @@ public DoubleBufferedList() public DoubleBufferedList(params T[] initialData) { this.current.AddRange(initialData); + } + + public void AddToCurrent(T item) + { + this.current.Add(item); } public void Add(T item) @@ -28,14 +35,20 @@ public void AddRange(IEnumerable items) this.swap.AddRange(items); } - public int Count => this.current.Count; - + public int Count => this.current.Count; + public void Swap() { var tempStack = this.current; tempStack.Clear(); this.current = this.swap; this.swap = tempStack; + } + + public void Clear() + { + this.current.Clear(); + this.swap.Clear(); } public IEnumerator GetEnumerator() diff --git a/src/Lifti.Core/Querying/QueryParts/EmptyQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/EmptyQueryPart.cs index 389f40c3..f4f46392 100644 --- a/src/Lifti.Core/Querying/QueryParts/EmptyQueryPart.cs +++ b/src/Lifti.Core/Querying/QueryParts/EmptyQueryPart.cs @@ -5,7 +5,7 @@ namespace Lifti.Querying.QueryParts /// /// A special case used to represent an empty query. /// - public class EmptyQueryPart : IQueryPart + public sealed class EmptyQueryPart : IQueryPart { private EmptyQueryPart() { @@ -17,9 +17,15 @@ private EmptyQueryPart() public static EmptyQueryPart Instance { get; } = new EmptyQueryPart(); /// - public IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + public IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { return IntermediateQueryResult.Empty; + } + + /// + public double CalculateWeighting(Func navigatorCreator) + { + return 0; } } } diff --git a/src/Lifti.Core/Querying/QueryParts/ExactWordQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/ExactWordQueryPart.cs index d2d3a740..561deece 100644 --- a/src/Lifti.Core/Querying/QueryParts/ExactWordQueryPart.cs +++ b/src/Lifti.Core/Querying/QueryParts/ExactWordQueryPart.cs @@ -3,40 +3,40 @@ namespace Lifti.Querying.QueryParts { /// - /// An that only matches items that contain an exact match for the given text. + /// An that only matches documents that contain an exact match for the given text. /// - public class ExactWordQueryPart : WordQueryPart + public sealed class ExactWordQueryPart : WordQueryPart { /// /// Constructs a new instance of . /// - public ExactWordQueryPart(string word) - : base(word) + public ExactWordQueryPart(string word, double? scoreBoost = null) + : base(word, scoreBoost) { - } + } /// - public override IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { if (navigatorCreator == null) { throw new ArgumentNullException(nameof(navigatorCreator)); - } - - if (queryContext is null) - { - throw new ArgumentNullException(nameof(queryContext)); - } + } + + if (queryContext is null) + { + throw new ArgumentNullException(nameof(queryContext)); + } using var navigator = navigatorCreator(); navigator.Process(this.Word.AsSpan()); - return queryContext.ApplyTo(navigator.GetExactMatches()); - } - + return navigator.GetExactMatches(queryContext, this.ScoreBoost ?? 1D); + } + /// public override string ToString() { - return this.Word; + return base.ToString(this.Word); } } } diff --git a/src/Lifti.Core/Querying/QueryParts/FieldFilterQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/FieldFilterQueryOperator.cs index c00587dc..1044be89 100644 --- a/src/Lifti.Core/Querying/QueryParts/FieldFilterQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/FieldFilterQueryOperator.cs @@ -3,10 +3,10 @@ namespace Lifti.Querying.QueryParts { /// - /// An that restricts the resulting item matches to only those + /// An that restricts the resulting matches to only those /// that include matching tokens in a specific field. /// - public class FieldFilterQueryOperator : IQueryPart + public sealed class FieldFilterQueryOperator : IQueryPart { /// /// Constructs a new instance of . @@ -34,11 +34,24 @@ public FieldFilterQueryOperator(string fieldName, byte fieldId, IQueryPart state public IQueryPart Statement { get; } /// - public IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) - { + public IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) + { + if (queryContext is null) + { + throw new ArgumentNullException(nameof(queryContext)); + } + return this.Statement.Evaluate( navigatorCreator, - QueryContext.Create(queryContext, this.FieldId)); + queryContext with { FilterToFieldId = this.FieldId }); + } + + /// + public double CalculateWeighting(Func navigatorCreator) + { + // We're applying an additional level of filtering here, so reduce the weighting of the + // child statement by 50% to reflect this. + return this.Statement.CalculateWeighting(navigatorCreator) * 0.5D; } /// @@ -57,8 +70,8 @@ public override string ToString() public static FieldFilterQueryOperator CreateForField(IIndexedFieldLookup fieldLookup, string fieldName, IQueryPart statement) { return new FieldFilterQueryOperator( - fieldName, - fieldLookup?.GetFieldInfo(fieldName).Id ?? throw new ArgumentNullException(nameof(fieldLookup)), + fieldName, + fieldLookup?.GetFieldInfo(fieldName).Id ?? throw new ArgumentNullException(nameof(fieldLookup)), statement); } } diff --git a/src/Lifti.Core/Querying/QueryParts/FuzzyMatchQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/FuzzyMatchQueryPart.cs index 3fb87778..08eb0986 100644 --- a/src/Lifti.Core/Querying/QueryParts/FuzzyMatchQueryPart.cs +++ b/src/Lifti.Core/Querying/QueryParts/FuzzyMatchQueryPart.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using System.Runtime.CompilerServices; namespace Lifti.Querying.QueryParts { @@ -25,10 +26,15 @@ internal bool IsTransposition(SubstitutedCharacters substituted) } /// - /// An that matches items that contain an fuzzy match for the given text. + /// An that matches documents that contain a fuzzy match for the given text. /// - public class FuzzyMatchQueryPart : WordQueryPart + public sealed class FuzzyMatchQueryPart : WordQueryPart { + private static readonly SharedPool fuzzyMatchStateStorePool = new( + static () => new(), + static s => s.Clear(), + 3); + internal const ushort DefaultMaxEditDistance = 4; internal const ushort DefaultMaxSequentialEdits = 1; @@ -37,31 +43,53 @@ public class FuzzyMatchQueryPart : WordQueryPart private class FuzzyMatchStateStore { - private readonly ushort maxEditDistance; - private readonly ushort maxSequentialEdits; - private readonly DoubleBufferedList state; + private ushort maxEditDistance; + private ushort maxSequentialEdits; + private readonly DoubleBufferedList state = []; // It's very likely that we'll encounter the same point in the index multiple times while processing a fuzzy match. // There's no point in traversing the same part multiple times for a given point in the search term, so this hashset keeps track of each logical // location that has been reached at each index within the search term. - private readonly HashSet<(int wordIndex, IIndexNavigatorBookmark bookmark)> processedBookmarks = new(); + // Note that because we're reusing bookmarks, we can't use the bookmark itself as the hashcode as it will change as the bookmark is reused, + // so instead we use the hashcode of the bookmark directly. Although this is not guaranteed to be unique, it's good enough for our purposes and + // saves thousands of allocations per query. + private readonly HashSet<(int wordIndex, int bookmarkHash)> processedBookmarks = []; + + public FuzzyMatchStateStore() + { + } - public FuzzyMatchStateStore(IIndexNavigator navigator, ushort maxEditDistance, ushort maxSequentialEdits) + public void Initialize(IIndexNavigator navigator, ushort maxEditDistance, ushort maxSequentialEdits) { - this.state = new DoubleBufferedList(new FuzzyMatchState(navigator.CreateBookmark())); + this.state.AddToCurrent(new FuzzyMatchState(navigator.CreateBookmark())); this.maxEditDistance = maxEditDistance; this.maxSequentialEdits = maxSequentialEdits; } - public bool HasEntries => this.state.Count > 0; + public bool HasEntries + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get { return this.state.Count > 0; } + } - public void Add(FuzzyMatchState state) + public bool Add(FuzzyMatchState state, bool disposeIfNotUsed) { if (state.TotalEditCount <= this.maxEditDistance && state.SequentialEdits <= this.maxSequentialEdits && - this.processedBookmarks.Add((state.WordIndex, state.Bookmark))) + this.processedBookmarks.Add((state.WordIndex, state.Bookmark.GetHashCode()))) { this.state.Add(state); + return true; + } + else + { + if (disposeIfNotUsed) + { + // Dispose of the bookmark as we're not going to use it + state.Dispose(); + } + + return false; } } @@ -74,16 +102,22 @@ public void PrepareNextEntries() { this.state.Swap(); } + + public void Clear() + { + this.state.Clear(); + this.processedBookmarks.Clear(); + } } - private readonly struct FuzzyMatchState + private readonly struct FuzzyMatchState : IDisposable { /// /// Creates a new instance. /// /// The for the state of the index that this instance is for. /// The current number number of edits this required to reach this point in the match. - /// The Levelshtein distance accumulated so far. This will differ from + /// The Levenshtein distance accumulated so far. This will differ from /// only when substitutions are encountered, in which case an extra 1 per substitution will be accumulated here. /// The number of sequential edits that have accumulated so far to reach this point. When /// an exact match is processed, this will be reset to zero. @@ -221,6 +255,11 @@ IIndexNavigatorBookmark nextBookmark #endif ); } + + public void Dispose() + { + this.Bookmark.Dispose(); + } } /// @@ -231,18 +270,30 @@ IIndexNavigatorBookmark nextBookmark /// matches will be. /// The maximum number of edits that are allowed to appear sequentially. By default this is 1, /// which forces matches to be more similar to the search criteria. + /// + /// The score boost to apply to any matches of the search term. This is multiplied with any score boosts + /// applied to matching fields. A null value indicates that no additional score boost should be applied. + /// /// - /// A transposition of neighboring characters is considered as single edit, not two distinct substitutions. + /// A transposition of neighbouring characters is considered as single edit, not two distinct substitutions. /// - public FuzzyMatchQueryPart(string word, ushort maxEditDistance = DefaultMaxEditDistance, ushort maxSequentialEdits = DefaultMaxSequentialEdits) - : base(word) + public FuzzyMatchQueryPart(string word, ushort maxEditDistance = DefaultMaxEditDistance, ushort maxSequentialEdits = DefaultMaxSequentialEdits, double? scoreBoost = null) + : base(word, scoreBoost) { this.maxEditDistance = maxEditDistance; this.maxSequentialEdits = maxSequentialEdits; } /// - public override IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + protected override double RunWeightingCalculation(Func navigatorCreator) + { + return base.RunWeightingCalculation(navigatorCreator) + + this.maxEditDistance + + ((this.maxSequentialEdits - 1) << 1); + } + + /// + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { if (navigatorCreator == null) { @@ -255,18 +306,21 @@ public override IntermediateQueryResult Evaluate(Func navigator } using var navigator = navigatorCreator(); - var results = IntermediateQueryResult.Empty; - var stateStore = new FuzzyMatchStateStore(navigator, this.maxEditDistance, this.maxSequentialEdits); + var resultCollector = new DocumentMatchCollector(); + var stateStore = fuzzyMatchStateStorePool.Take(); + stateStore.Initialize(navigator, this.maxEditDistance, this.maxSequentialEdits); var characterCount = 0; var searchTermLength = this.Word.Length; + var scoreBoost = this.ScoreBoost ?? 1D; + do { foreach (var state in stateStore.GetNextStateEntries()) { var wordIndex = state.WordIndex; - var bookmark = state.Bookmark; - bookmark.Apply(); + state.Bookmark.Apply(); + var disposeBookmark = true; if (state.WordIndex == searchTermLength) { @@ -285,7 +339,9 @@ public override IntermediateQueryResult Evaluate(Func navigator // All other weightings will be less than 1, with more edits drawing the weighting towards zero var lengthTotal = searchTermLength + characterCount; var weighting = (double)(lengthTotal - state.LevenshteinDistance) / lengthTotal; - results = results.Union(navigator.GetExactMatches(weighting)); + weighting *= scoreBoost; + + navigator.AddExactMatches(queryContext, resultCollector, weighting); } // Always assume there could be missing characters at the end @@ -303,16 +359,18 @@ public override IntermediateQueryResult Evaluate(Func navigator #if DEBUG && TRACK_MATCH_STATE_TEXT , currentCharacter #endif - )); + ), true); } else { // First skip this character (assume extra character inserted), but don't move the navigator on - stateStore.Add(state.ApplyInsertion( + // We'll handle this case specially if the state wasn't added as this will allow us to dispose of the + // current bookmark after it has been used to add new deletion bookmarks + disposeBookmark = !stateStore.Add(state.ApplyInsertion( #if DEBUG && TRACK_MATCH_STATE_TEXT currentCharacter #endif - )); + ), false); // Also try skipping this character (assume omission) by just moving on in the navigator AddDeletionBookmarks(navigator, stateStore, state); @@ -321,6 +379,13 @@ public override IntermediateQueryResult Evaluate(Func navigator // Always assume this could be a substituted character AddSubstitutionBookmarks(navigator, stateStore, currentCharacter, state); } + + // We're done with this entry now. Disposing it causes the bookmark to get disposed and returned + // to the pool in the index navigator for reuse. + if (disposeBookmark) + { + state.Dispose(); + } } stateStore.PrepareNextEntries(); @@ -329,7 +394,9 @@ public override IntermediateQueryResult Evaluate(Func navigator } while (stateStore.HasEntries); - return queryContext.ApplyTo(results); + fuzzyMatchStateStorePool.Return(stateStore); + + return resultCollector.ToIntermediateQueryResult(); } private static void AddSubstitutionBookmarks(IIndexNavigator navigator, FuzzyMatchStateStore stateStore, char currentCharacter, FuzzyMatchState currentState) @@ -347,7 +414,11 @@ private static void AddSubstitutionBookmarks(IIndexNavigator navigator, FuzzyMat bookmark.Apply(); navigator.Process(c); - stateStore.Add(currentState.ApplySubstitution(navigator.CreateBookmark(), new SubstitutedCharacters(currentCharacter, c))); + stateStore.Add( + currentState.ApplySubstitution( + navigator.CreateBookmark(), + new SubstitutedCharacters(currentCharacter, c)), + true); } } @@ -360,12 +431,14 @@ private static void AddDeletionBookmarks(IIndexNavigator navigator, FuzzyMatchSt { bookmark.Apply(); navigator.Process(c); - stateStore.Add(currentState.ApplyDeletion( - navigator.CreateBookmark() + stateStore.Add( + currentState.ApplyDeletion( + navigator.CreateBookmark() #if DEBUG && TRACK_MATCH_STATE_TEXT - , c + , c #endif - )); + ), + true); } } @@ -373,12 +446,17 @@ private static void AddDeletionBookmarks(IIndexNavigator navigator, FuzzyMatchSt /// public override string ToString() { + string searchTerm; if (this.maxEditDistance != DefaultMaxEditDistance || this.maxSequentialEdits != DefaultMaxSequentialEdits) { - return $"?{(this.maxEditDistance != DefaultMaxEditDistance ? this.maxEditDistance : "")},{(this.maxSequentialEdits != DefaultMaxSequentialEdits ? this.maxSequentialEdits : "")}?{this.Word}"; + searchTerm = $"?{(this.maxEditDistance != DefaultMaxEditDistance ? this.maxEditDistance : "")},{(this.maxSequentialEdits != DefaultMaxSequentialEdits ? this.maxSequentialEdits : "")}?{this.Word}"; + } + else + { + searchTerm = "?" + this.Word; } - return "?" + this.Word; + return base.ToString(searchTerm); } } } diff --git a/src/Lifti.Core/Querying/QueryParts/IQueryContext.cs b/src/Lifti.Core/Querying/QueryParts/IQueryContext.cs deleted file mode 100644 index 5b4e2f45..00000000 --- a/src/Lifti.Core/Querying/QueryParts/IQueryContext.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Lifti.Querying.QueryParts -{ - /// - /// Manages context during the execution of a query, allowing for aspects like field filters to be appled. - /// - public interface IQueryContext - { - /// - /// Applies any additional filters present in the current query context, e.g. field filters, - /// to the given , returning a new instance. - /// - IntermediateQueryResult ApplyTo(IntermediateQueryResult intermediateQueryResult); - } -} diff --git a/src/Lifti.Core/Querying/QueryParts/IQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/IQueryPart.cs index 8252f729..3e90a3b0 100644 --- a/src/Lifti.Core/Querying/QueryParts/IQueryPart.cs +++ b/src/Lifti.Core/Querying/QueryParts/IQueryPart.cs @@ -15,11 +15,21 @@ public interface IQueryPart /// being queried. /// /// - /// The current . + /// The current . /// /// /// The that contains the matches. /// - IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext); + IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext); + + /// + /// Calculates the execution cost weighting for this query part. The lower the weighting the + /// cheaper the query part is to execute. + /// + /// + /// A delegate capable of creating an for the index + /// being queried. + /// + double CalculateWeighting(Func navigatorCreator); } } diff --git a/src/Lifti.Core/Querying/QueryParts/NearQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/NearQueryOperator.cs index af12e4b3..4927f285 100644 --- a/src/Lifti.Core/Querying/QueryParts/NearQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/NearQueryOperator.cs @@ -5,10 +5,10 @@ namespace Lifti.Querying.QueryParts { /// /// An that produces an intersection of two s, restricting - /// an item's field matches such that the locations are close to one another. Items that result in no field matches + /// a document's field matches such that the locations are close to one another. Documents that result in no field matches /// are filtered out. /// - public class NearQueryOperator : BinaryQueryOperator + public sealed class NearQueryOperator : BinaryQueryOperator { /// /// Constructs a new instance of . @@ -28,13 +28,12 @@ public NearQueryOperator(IQueryPart left, IQueryPart right, int tolerance = 5) public int Tolerance { get; } /// - public override IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) - { - return this.Left.Evaluate(navigatorCreator, queryContext) - .CompositePositionalIntersect( - this.Right.Evaluate(navigatorCreator, queryContext), - this.Tolerance, - this.Tolerance); + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) + { + var (leftResults, rightResults) = this.EvaluateWithDocumentIntersection(navigatorCreator, queryContext); + + return leftResults + .CompositePositionalIntersect(rightResults, this.Tolerance, this.Tolerance); } /// diff --git a/src/Lifti.Core/Querying/QueryParts/OrQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/OrQueryOperator.cs index c9920b96..47e84a3a 100644 --- a/src/Lifti.Core/Querying/QueryParts/OrQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/OrQueryOperator.cs @@ -1,5 +1,4 @@ using System; -using System.Collections; using System.Collections.Generic; namespace Lifti.Querying.QueryParts @@ -7,7 +6,7 @@ namespace Lifti.Querying.QueryParts /// /// An that produces a union of the results in two other s. /// - public class OrQueryOperator : BinaryQueryOperator + public sealed class OrQueryOperator : BinaryQueryOperator { /// /// Constructs a new . @@ -21,7 +20,7 @@ public OrQueryOperator(IQueryPart left, IQueryPart right) public override OperatorPrecedence Precedence => OperatorPrecedence.And; /// - public override IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { return this.Left.Evaluate(navigatorCreator, queryContext) .Union(this.Right.Evaluate(navigatorCreator, queryContext)); @@ -33,6 +32,13 @@ public override string ToString() return $"{this.Left} | {this.Right}"; } + /// + protected override double RunWeightingCalculation(Func navigatorCreator) + { + // Both sides of the OR operator are evaluated, so the weighting is the sum of the two + return this.Left.CalculateWeighting(navigatorCreator) + this.Right.CalculateWeighting(navigatorCreator); + } + /// /// Combines all the given query parts with s. If contains a single element, then /// that query part is returned unaltered, making this effectively a no-op. diff --git a/src/Lifti.Core/Querying/QueryParts/PrecedingNearQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/PrecedingNearQueryOperator.cs index d24c225c..182c014f 100644 --- a/src/Lifti.Core/Querying/QueryParts/PrecedingNearQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/PrecedingNearQueryOperator.cs @@ -5,11 +5,11 @@ namespace Lifti.Querying.QueryParts { /// /// An that produces an intersection of two s, restricting - /// an item's field matches such that the locations of the first appear before the locations of the second and + /// a document's field matches such that the locations of the first appear before the locations of the second and /// within a specified tolerance. - /// Items that result in no field matches are filtered out. + /// Documents that result in no field matches are filtered out. /// - public class PrecedingNearQueryOperator : BinaryQueryOperator + public sealed class PrecedingNearQueryOperator : BinaryQueryOperator { /// /// Constructs a new instance of . @@ -29,13 +29,12 @@ public PrecedingNearQueryOperator(IQueryPart left, IQueryPart right, int toleran public int Tolerance { get; } /// - public override IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) - { - return this.Left.Evaluate(navigatorCreator, queryContext) - .CompositePositionalIntersect( - this.Right.Evaluate(navigatorCreator, queryContext), - 0, - this.Tolerance); + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) + { + var (leftResults, rightResults) = this.EvaluateWithDocumentIntersection(navigatorCreator, queryContext); + + return leftResults + .CompositePositionalIntersect(rightResults, 0, this.Tolerance); } /// diff --git a/src/Lifti.Core/Querying/QueryParts/PrecedingQueryOperator.cs b/src/Lifti.Core/Querying/QueryParts/PrecedingQueryOperator.cs index 63790fa3..95ff4a5e 100644 --- a/src/Lifti.Core/Querying/QueryParts/PrecedingQueryOperator.cs +++ b/src/Lifti.Core/Querying/QueryParts/PrecedingQueryOperator.cs @@ -4,10 +4,10 @@ namespace Lifti.Querying.QueryParts { /// /// An that produces an intersection of two s, restricting - /// an item's field matches such that the locations of the first appear before the locations of the second. - /// Items that result in no field matches are filtered out. + /// a document's field matches such that the locations of the first appear before the locations of the second. + /// Documents that result in no field matches are filtered out. /// - public class PrecedingQueryOperator : BinaryQueryOperator + public sealed class PrecedingQueryOperator : BinaryQueryOperator { /// /// Constructs a new instance of . @@ -21,10 +21,11 @@ public PrecedingQueryOperator(IQueryPart left, IQueryPart right) public override OperatorPrecedence Precedence => OperatorPrecedence.Positional; /// - public override IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) - { - return this.Left.Evaluate(navigatorCreator, queryContext) - .PrecedingIntersect(this.Right.Evaluate(navigatorCreator, queryContext)); + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) + { + var (leftResults, rightResults) = this.EvaluateWithDocumentIntersection(navigatorCreator, queryContext); + + return leftResults.PrecedingIntersect(rightResults); } /// diff --git a/src/Lifti.Core/Querying/QueryParts/QueryContext.cs b/src/Lifti.Core/Querying/QueryParts/QueryContext.cs deleted file mode 100644 index 488a3dfb..00000000 --- a/src/Lifti.Core/Querying/QueryParts/QueryContext.cs +++ /dev/null @@ -1,42 +0,0 @@ -using System.Linq; - -namespace Lifti.Querying.QueryParts -{ - internal class QueryContext : IQueryContext - { - private readonly byte? filterToFieldId; - - public static IQueryContext Empty { get; } = new QueryContext(null); - - private QueryContext(byte? filterToFieldId) - { - this.filterToFieldId = filterToFieldId; - } - - /// - public IntermediateQueryResult ApplyTo(IntermediateQueryResult intermediateQueryResult) - { - if (this.filterToFieldId == null) - { - return intermediateQueryResult; - } - - return new IntermediateQueryResult( - intermediateQueryResult.Matches - .Select(m => new ScoredToken( - m.ItemId, - m.FieldMatches.Where(fm => fm.FieldId == this.filterToFieldId).ToList())) - .Where(m => m.FieldMatches.Count > 0)); - } - - public static IQueryContext Create(IQueryContext currentContext, byte? filterToFieldId = null) - { - if (filterToFieldId == null) - { - return currentContext; - } - - return new QueryContext(filterToFieldId); - } - } -} diff --git a/src/Lifti.Core/Querying/QueryParts/ScoreBoostedQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/ScoreBoostedQueryPart.cs new file mode 100644 index 00000000..4207af74 --- /dev/null +++ b/src/Lifti.Core/Querying/QueryParts/ScoreBoostedQueryPart.cs @@ -0,0 +1,56 @@ +using System; + +namespace Lifti.Querying.QueryParts +{ + /// + public abstract class ScoreBoostedQueryPart : IQueryPart + { + private double? weighting; + + /// + /// Constructs a new instance of . + /// + /// + /// The score boost to apply to any matches that this query part finds. This is multiplied with any score boosts + /// applied to matching fields. A null value indicates that no additional score boost should be applied. + /// + protected ScoreBoostedQueryPart(double? scoreBoost) + { + this.ScoreBoost = scoreBoost; + } + + /// + /// The score boost to apply to any matches that this query part finds. This is multiplied with any score boosts + /// applied to matching fields. A null value indicates that no additional score boost should be applied. + /// + public double? ScoreBoost { get; } + + /// + public double CalculateWeighting(Func navigatorCreator) + { + this.weighting ??= this.RunWeightingCalculation(navigatorCreator); + return this.weighting.GetValueOrDefault(); + } + + /// + public abstract IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext); + + /// + /// Runs the weighting calculation for this query part. + /// + protected abstract double RunWeightingCalculation(Func navigatorCreator); + + /// + /// Returns a string representation of this query part. + /// + protected virtual string ToString(string searchTerm) + { + if (this.ScoreBoost.HasValue) + { + return $"{searchTerm}^{this.ScoreBoost.Value}"; + } + + return searchTerm; + } + } +} diff --git a/src/Lifti.Core/Querying/QueryParts/WildcardQueryFragment.cs b/src/Lifti.Core/Querying/QueryParts/WildcardQueryFragment.cs index 8855aba6..9b25d1a2 100644 --- a/src/Lifti.Core/Querying/QueryParts/WildcardQueryFragment.cs +++ b/src/Lifti.Core/Querying/QueryParts/WildcardQueryFragment.cs @@ -37,7 +37,10 @@ private WildcardQueryFragment(WildcardQueryFragmentKind kind, string? text) /// Creates a new instance of representing a textual part of a wildcard query. /// /// The text that must be explicitly matched. - public static WildcardQueryFragment CreateText(string text) => new WildcardQueryFragment(WildcardQueryFragmentKind.Text, text); + public static WildcardQueryFragment CreateText(string text) + { + return new(WildcardQueryFragmentKind.Text, text); + } /// public override bool Equals(object? obj) diff --git a/src/Lifti.Core/Querying/QueryParts/WildcardQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/WildcardQueryPart.cs index 2244a0ef..e2ed44cc 100644 --- a/src/Lifti.Core/Querying/QueryParts/WildcardQueryPart.cs +++ b/src/Lifti.Core/Querying/QueryParts/WildcardQueryPart.cs @@ -2,18 +2,17 @@ using System.Collections.Generic; using System.Linq; using System.Text; -using System.Threading; namespace Lifti.Querying.QueryParts { /// - /// An that matches items following wildcard rules: - /// "*" matches any number of characters - /// "%" matches a single character + /// An that matches text in a document using the following wildcard rules: + /// "*" matches any number of characters () + /// "%" matches a single character () /// - public class WildcardQueryPart : IQueryPart + public sealed class WildcardQueryPart : ScoreBoostedQueryPart { - private static readonly IIndexNavigatorBookmark[] QueryCompleted = Array.Empty(); + private static readonly IIndexNavigatorBookmark[] QueryCompleted = []; internal IReadOnlyList Fragments { get; } /// @@ -28,18 +27,24 @@ public WildcardQueryPart(params WildcardQueryFragment[] fragments) /// /// Creates a new instance of . /// - public WildcardQueryPart(IEnumerable fragments) + public WildcardQueryPart(IEnumerable fragments, double? scoreBoost = null) + : base(scoreBoost) { if (fragments is null) { throw new ArgumentNullException(nameof(fragments)); } - this.Fragments = NormalizeFragmentSequence(fragments).ToList(); + this.Fragments = NormalizeFragmentSequence(fragments).ToList(); + + if (this.Fragments.Count == 0) + { + throw new QueryParserException(ExceptionMessages.EmptyWildcardQuery); + } } /// - public IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + public override IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { if (navigatorCreator is null) { @@ -52,31 +57,74 @@ public IntermediateQueryResult Evaluate(Func navigatorCreator, } using var navigator = navigatorCreator(); - var results = IntermediateQueryResult.Empty; + var resultCollector = new DocumentMatchCollector(); var bookmarks = new DoubleBufferedList(navigator.CreateBookmark()); - - for (var i = 0; i < Fragments.Count && bookmarks.Count > 0; i++) + var scoreBoost = this.ScoreBoost ?? 1D; + for (var i = 0; i < this.Fragments.Count && bookmarks.Count > 0; i++) { - var nextFragment = i == Fragments.Count - 1 ? (WildcardQueryFragment?)null : Fragments[i + 1]; + var nextFragment = i == this.Fragments.Count - 1 ? (WildcardQueryFragment?)null : this.Fragments[i + 1]; foreach (var bookmark in bookmarks) { bookmark.Apply(); var nextBookmarks = ProcessFragment( - navigator, - Fragments[i], - nextFragment, - ref results); - - bookmarks.AddRange(nextBookmarks); + navigator, + resultCollector, + scoreBoost, + queryContext, + this.Fragments[i], + nextFragment); + + bookmarks.AddRange(nextBookmarks); + + bookmark.Dispose(); } bookmarks.Swap(); } - return queryContext.ApplyTo(results); + return resultCollector.ToIntermediateQueryResult(); } + + /// + protected override double RunWeightingCalculation(Func navigatorCreator) + { + var firstFragment = this.Fragments[0]; + if (this.Fragments.Count == 1 && firstFragment.Kind == WildcardQueryFragmentKind.MultiCharacter) + { + // Penalise the use of a full document search + return 1000D; + } + + int weight = 0; + + for (var i = 0; i < this.Fragments.Count; i++) + { + switch (this.Fragments[i].Kind) + { + case WildcardQueryFragmentKind.MultiCharacter: + weight += 4; + break; + case WildcardQueryFragmentKind.SingleCharacter: + weight += 1; + break; + case WildcardQueryFragmentKind.Text: + weight += 1; + break; + + } + } + + return firstFragment.Kind switch + { + // Don't penalise wildcard searches that start with a text fragment as badly + WildcardQueryFragmentKind.Text => weight * 0.8D, + // Penalise the use of leading multi-character wildcards + WildcardQueryFragmentKind.MultiCharacter => weight * 1.2D, + _ => weight, + }; + } /// public override string ToString() @@ -94,14 +142,16 @@ public override string ToString() }); } - return builder.ToString(); + return base.ToString(builder.ToString()); } private static IEnumerable ProcessFragment( - IIndexNavigator navigator, + IIndexNavigator navigator, + DocumentMatchCollector resultCollector, + double scoreBoost, + QueryContext queryContext, WildcardQueryFragment fragment, - WildcardQueryFragment? nextFragment, - ref IntermediateQueryResult results) + WildcardQueryFragment? nextFragment) { switch (fragment.Kind) { @@ -115,7 +165,7 @@ public override string ToString() if (nextFragment == null) { // This is the end of the query and we've ended up on some exact matches - results = results.Union(navigator.GetExactMatches()); + navigator.AddExactMatches(queryContext, resultCollector, scoreBoost); return QueryCompleted; } @@ -127,7 +177,7 @@ public override string ToString() { // This wildcard is the last in the pattern - just return any exact and child matches under the current position // I.e. as per the classic "starts with" operator - results = results.Union(navigator.GetExactAndChildMatches()); + navigator.AddExactAndChildMatches(queryContext, resultCollector, scoreBoost); // No other work to process - no more bookmarks required. return QueryCompleted; @@ -156,11 +206,11 @@ public override string ToString() if (nextFragment == null) { // Add all exact matches for every character under the current position - var bookmark = navigator.CreateBookmark(); + using var bookmark = navigator.CreateBookmark(); foreach (var character in navigator.EnumerateNextCharacters()) { navigator.Process(character); - results = results.Union(navigator.GetExactMatches()); + navigator.AddExactMatches(queryContext, resultCollector, scoreBoost); bookmark.Apply(); } @@ -178,7 +228,7 @@ public override string ToString() private static IEnumerable CreateBookmarksForAllChildCharacters(IIndexNavigator navigator) { - var bookmark = navigator.CreateBookmark(); + using var bookmark = navigator.CreateBookmark(); foreach (var character in navigator.EnumerateNextCharacters()) { navigator.Process(character); @@ -188,23 +238,36 @@ private static IEnumerable CreateBookmarksForAllChildCh } private static IEnumerable RecursivelyCreateBookmarksAtMatchingCharacter(IIndexNavigator navigator, char terminatingCharacter) - { - var bookmark = navigator.CreateBookmark(); - foreach (var character in navigator.EnumerateNextCharacters()) - { - if (character == terminatingCharacter) - { - yield return navigator.CreateBookmark(); - } - - navigator.Process(character); - - foreach (var recursedBookmark in RecursivelyCreateBookmarksAtMatchingCharacter(navigator, terminatingCharacter)) - { - yield return recursedBookmark; - } - - bookmark.Apply(); + { + var bookmarkStack = new Stack(); + bookmarkStack.Push(navigator.CreateBookmark()); + + while (bookmarkStack.Count > 0) + { + using var bookmark = bookmarkStack.Pop(); + bookmark.Apply(); + + foreach (var character in navigator.EnumerateNextCharacters()) + { + if (character == terminatingCharacter) + { + // This node has a child node that matches the terminating character - return a bookmark at this point + // so the next text fragment can be processed from here + yield return navigator.CreateBookmark(); + } + + // Even if the character matches the terminating character, we still need to process it, as it may not be + // the start of a successfully matched sequence. E.g. if the query is "*ERS" and we're in a hierarchy for the token "CENTERS" + // the "E" would be the terminating character, but we need to keep going to find the second "E" for "ERS" to match. + // Process the character from this node + navigator.Process(character); + + // Push a bookmark so we can return to this point after processing all the characters from this node + bookmarkStack.Push(navigator.CreateBookmark()); + + // Return to the node we just processed the character from + bookmark.Apply(); + } } } diff --git a/src/Lifti.Core/Querying/QueryParts/WordQueryPart.cs b/src/Lifti.Core/Querying/QueryParts/WordQueryPart.cs index e2b2a238..5c86fe78 100644 --- a/src/Lifti.Core/Querying/QueryParts/WordQueryPart.cs +++ b/src/Lifti.Core/Querying/QueryParts/WordQueryPart.cs @@ -1,14 +1,16 @@ using System; namespace Lifti.Querying.QueryParts -{ +{ + /// - public abstract class WordQueryPart : IQueryPart + public abstract class WordQueryPart : ScoreBoostedQueryPart { /// /// Constructs a new instance of . /// - protected WordQueryPart(string word) + protected WordQueryPart(string word, double? scoreBoost) + : base(scoreBoost) { this.Word = word; } @@ -17,12 +19,27 @@ protected WordQueryPart(string word) public string Word { get; + } + + /// + protected override double RunWeightingCalculation(Func navigatorCreator) + { + if (navigatorCreator is null) + { + throw new ArgumentNullException(nameof(navigatorCreator)); + } + + using var navigator = navigatorCreator(); + navigator.Process(this.Word.AsSpan()); + + var totalDocumentCount = navigator.Snapshot.Metadata.DocumentCount; + if (totalDocumentCount == 0) + { + // Edge case for an empty index + return 0; + } + + return navigator.ExactMatchCount() / (double)totalDocumentCount; } - - /// - /// Evaluates this instance against the index within the given , returning an - /// that contains the matches. - /// - public abstract IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext); } } diff --git a/src/Lifti.Core/Querying/QueryToken.cs b/src/Lifti.Core/Querying/QueryToken.cs index ea450231..869658f5 100644 --- a/src/Lifti.Core/Querying/QueryToken.cs +++ b/src/Lifti.Core/Querying/QueryToken.cs @@ -8,11 +8,17 @@ namespace Lifti.Querying /// internal class QueryToken : IEquatable { - private QueryToken(string tokenText, QueryTokenType tokenType, int tolerance, IIndexTokenizer? indexTokenizer) + private QueryToken( + string tokenText, + QueryTokenType tokenType, + int tolerance, + IIndexTokenizer? indexTokenizer, + double? scoreBoost = null) { this.TokenText = tokenText; this.TokenType = tokenType; this.Tolerance = tolerance; + this.ScoreBoost = scoreBoost; this.IndexTokenizer = indexTokenizer; } @@ -30,6 +36,12 @@ private QueryToken(string tokenText, QueryTokenType tokenType, int tolerance, II /// public int Tolerance { get; } + /// + /// The score boost to apply to any documents matching the search term. This is multiplied with any score boosts + /// applied to matching fields. A null value indicates that no additional score boost should be applied. + /// + public double? ScoreBoost { get; } + /// /// The to use when further tokenizing the text in this instance. /// @@ -38,8 +50,8 @@ private QueryToken(string tokenText, QueryTokenType tokenType, int tolerance, II /// /// Gets the that this instance represents. /// - public QueryTokenType TokenType { get; } - + public QueryTokenType TokenType { get; } + /// /// Creates a new instance representing a textual part of the query. /// @@ -48,25 +60,38 @@ private QueryToken(string tokenText, QueryTokenType tokenType, int tolerance, II /// /// /// The to use when further tokenizing the captured text. + /// + /// + /// The score boost to apply to any documents matching the search term. This is multiplied with any score boosts + /// applied to matching fields. A null value indicates that no additional score boost should be applied. /// - public static QueryToken ForText(string text, IIndexTokenizer indexTokenizer) => new QueryToken(text, QueryTokenType.Text, 0, indexTokenizer); - + public static QueryToken ForText(string text, IIndexTokenizer indexTokenizer, double? scoreBoost) + { + return new(text, QueryTokenType.Text, 0, indexTokenizer, scoreBoost); + } + /// /// Creates a new instance representing a field filter. /// /// /// The name of the field to match. /// - public static QueryToken ForFieldFilter(string fieldName) => new QueryToken(fieldName, QueryTokenType.FieldFilter, 0, null); - + public static QueryToken ForFieldFilter(string fieldName) + { + return new(fieldName, QueryTokenType.FieldFilter, 0, null); + } + /// /// Creates a new instance representing a query operator. /// /// /// The type of operator the token should represent. /// - public static QueryToken ForOperator(QueryTokenType operatorType) => new QueryToken(string.Empty, operatorType, 0, null); - + public static QueryToken ForOperator(QueryTokenType operatorType) + { + return new(string.Empty, operatorType, 0, null); + } + /// /// Creates a new instance representing a query operator that has additional positional constraints. /// @@ -76,9 +101,11 @@ private QueryToken(string tokenText, QueryTokenType tokenType, int tolerance, II /// /// The number of tokens to use as the tolerance for the operator. /// - public static QueryToken ForOperatorWithTolerance(QueryTokenType operatorType, int tolerance) => - new QueryToken(string.Empty, operatorType, tolerance == 0 ? 5 : tolerance, null); - + public static QueryToken ForOperatorWithTolerance(QueryTokenType operatorType, int tolerance) + { + return new(string.Empty, operatorType, tolerance == 0 ? 5 : tolerance, null, null); + } + /// public override bool Equals(object? obj) { diff --git a/src/Lifti.Core/Querying/QueryTokenMatch.cs b/src/Lifti.Core/Querying/QueryTokenMatch.cs deleted file mode 100644 index 6e762875..00000000 --- a/src/Lifti.Core/Querying/QueryTokenMatch.cs +++ /dev/null @@ -1,63 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Lifti.Querying -{ - /// - /// Provides information about an item that was matched whilst executing a query. - /// - public readonly struct QueryTokenMatch : IEquatable - { - /// - /// Constructs a new instance. - /// - public QueryTokenMatch(int itemId, IReadOnlyList fieldMatches) - { - this.ItemId = itemId; - this.FieldMatches = fieldMatches; - } - - /// - /// Gets the id of the item that was matched. - /// - public int ItemId { get; } - - /// - /// Gets the fields in which the tokens were matched. - /// - public IReadOnlyList FieldMatches { get; } - - /// - public override bool Equals(object? obj) - { - return obj is QueryTokenMatch match && - this.Equals(match); - } - - /// - public override int GetHashCode() - { - return HashCode.Combine(this.ItemId, this.FieldMatches); - } - - /// - public static bool operator ==(QueryTokenMatch left, QueryTokenMatch right) - { - return left.Equals(right); - } - - /// - public static bool operator !=(QueryTokenMatch left, QueryTokenMatch right) - { - return !(left == right); - } - - /// - public bool Equals(QueryTokenMatch other) - { - return this.ItemId == other.ItemId && - this.FieldMatches.SequenceEqual(other.FieldMatches); - } - } -} diff --git a/src/Lifti.Core/Querying/QueryTokenizer.cs b/src/Lifti.Core/Querying/QueryTokenizer.cs index be705d8d..40dca949 100644 --- a/src/Lifti.Core/Querying/QueryTokenizer.cs +++ b/src/Lifti.Core/Querying/QueryTokenizer.cs @@ -1,7 +1,7 @@ using Lifti.Tokenization; using System; using System.Collections.Generic; -using System.Collections.Immutable; +using System.Text.RegularExpressions; namespace Lifti.Querying { @@ -10,25 +10,31 @@ namespace Lifti.Querying /// internal class QueryTokenizer : IQueryTokenizer { - private static readonly HashSet wildcardPunctuation = new() - { + private static readonly Regex escapeCharacterReplacer = new(@"\\(.)", RegexOptions.Compiled); + + private static readonly HashSet wildcardPunctuation = + [ '*', '?', '%' - }; + ]; // Punctuation characters that shouldn't cause a token to be automatically split - these // are part of the LIFTI query syntax and processed on a case by case basis. private static readonly HashSet generalNonSplitPunctuation = new(wildcardPunctuation) { - '&', - '|', - '>', - '=', - '(', - ')', - '~', - '"' + '&', // And operator + '|', // Or operator + '>', // Preceding operator + '=', // Field filter + '(', // Open expression group + ')', // Close expression group + '~', // Near operator + '"', // Quoted sections + '[', // Field filter open + ']', // Field filter close + '^', // Score boost + '\\' // Escaped characters }; // Punctuation characters that shouldn't cause a token to be automatically split when processing @@ -42,25 +48,28 @@ private enum OperatorParseState { None = 0, ProcessingString = 1, - ProcessingNearOperator = 2 + ProcessingNearOperator = 2, + ProcessingEscapedCharacter = 3, } - private enum TokenParseState + private enum FuzzyMatchParseState { None = 0, ProcessingFuzzyMatch = 1, ProcessingFuzzyMatchTerm = 2, } - private record IndexTokenizerStackState(int BracketCaptureDepth, IIndexTokenizer IndexTokenizer); + private record QueryTokenizerStackState(int BracketCaptureDepth, IIndexTokenizer IndexTokenizer); private record QueryTokenizerState(IIndexTokenizer IndexTokenizer) { public OperatorParseState OperatorState { get; init; } = OperatorParseState.None; - public TokenParseState TokenState { get; init; } = TokenParseState.None; + public FuzzyMatchParseState FuzzyMatchState { get; init; } = FuzzyMatchParseState.None; public int BracketDepth { get; init; } + public int ScoreBoostStartIndex { get; init; } + public double? ScoreBoost { get; init; } - private ImmutableStack TokenizerStack { get; init; } = ImmutableStack.Empty; + private Stack? SharedTokenizerStack { get; set; } public QueryTokenizerState OpenBracket() { @@ -82,36 +91,42 @@ public QueryTokenizerState CloseBracket() public QueryTokenizerState PushTokenizer(IIndexTokenizer tokenizer) { + this.SharedTokenizerStack ??= new(); + this.SharedTokenizerStack.Push(new QueryTokenizerStackState(this.BracketDepth, this.IndexTokenizer)); + return this with { - IndexTokenizer = tokenizer, - TokenizerStack = this.TokenizerStack.Push(new IndexTokenizerStackState(this.BracketDepth, this.IndexTokenizer)) + IndexTokenizer = tokenizer }; } public QueryTokenizerState UpdateForYieldedToken() { - if (this.OperatorState != OperatorParseState.ProcessingString && this.TokenizerStack.IsEmpty == false) + if (this.OperatorState != OperatorParseState.ProcessingString && this.SharedTokenizerStack?.Count > 0) { - var peeked = this.TokenizerStack.Peek(); + var peeked = this.SharedTokenizerStack.Peek(); if (peeked.BracketCaptureDepth >= this.BracketDepth) { // We've reached the same bracket depth now that the tokenizer was captured at, so revert to the // previous one on the stack. - var poppedStack = this.TokenizerStack.Pop(out var previousState); + var previousState = this.SharedTokenizerStack.Pop(); return this with { IndexTokenizer = previousState.IndexTokenizer, - TokenizerStack = poppedStack + ScoreBoost = null }; } } // Once token processing complete, reset any token state flags - if (this.TokenState != TokenParseState.None) + if (this.FuzzyMatchState != FuzzyMatchParseState.None || this.ScoreBoost != null) { - return this with { TokenState = TokenParseState.None }; + return this with + { + FuzzyMatchState = FuzzyMatchParseState.None, + ScoreBoost = null + }; } // We're still deeper in brackets than when we first captured the tokenizer, so don't revert @@ -135,8 +150,21 @@ public IEnumerable ParseQueryTokens(string queryText, IIndexTokenize { if (tokenStart != null) { - var tokenText = queryText.Substring(tokenStart.Value, endIndex - tokenStart.Value); - var token = QueryToken.ForText(tokenText, state.IndexTokenizer); + + string tokenText; + if (state.ScoreBoost == null) + { + tokenText = queryText.Substring(tokenStart.Value, endIndex - tokenStart.Value); + } + else + { + // Don't return the score boost information as part of the token text + tokenText = queryText.Substring(tokenStart.Value, state.ScoreBoostStartIndex - tokenStart.Value); + } + + tokenText = StripEscapeIndicators(tokenText); + + var token = QueryToken.ForText(tokenText, state.IndexTokenizer, state.ScoreBoost); tokenStart = null; state = state.UpdateForYieldedToken(); @@ -159,14 +187,14 @@ public IEnumerable ParseQueryTokens(string queryText, IIndexTokenize for (var i = 0; i < queryText.Length; i++) { var current = queryText[i]; - if (state.TokenState == TokenParseState.ProcessingFuzzyMatch + if (state.FuzzyMatchState == FuzzyMatchParseState.ProcessingFuzzyMatch && current != ',' && char.IsDigit(current) == false) { // As soon as we encounter a non digit or comma when processing a fuzzy match, // assume that we're not processing the fuzzy match parameters - this way a comma can // subsequently be treated as a split character. - state = state with { TokenState = TokenParseState.ProcessingFuzzyMatchTerm }; + state = state with { FuzzyMatchState = FuzzyMatchParseState.ProcessingFuzzyMatchTerm }; } if (IsSplitChar(current, state)) @@ -184,6 +212,15 @@ public IEnumerable ParseQueryTokens(string queryText, IIndexTokenize case OperatorParseState.None: switch (current) { + case '\\': + tokenStart ??= i; + state = state with { OperatorState = OperatorParseState.ProcessingEscapedCharacter }; + break; + case '^': + var scoreBoostStart = i; + (var scoreBoost, i) = ConsumeNumber(i + 1, queryText); + state = state with { ScoreBoost = scoreBoost, ScoreBoostStartIndex = scoreBoostStart }; + break; case '&': yield return QueryToken.ForOperator(QueryTokenType.AndOperator); break; @@ -195,20 +232,53 @@ public IEnumerable ParseQueryTokens(string queryText, IIndexTokenize break; case '?': // Possibly a wildcard token character, or part of a fuzzy match token - switch (state.TokenState) + switch (state.FuzzyMatchState) { - case TokenParseState.None when tokenStart is null: + case FuzzyMatchParseState.None when tokenStart is null: // Start processing a fuzzy match operator - state = state with { TokenState = TokenParseState.ProcessingFuzzyMatch }; + state = state with { FuzzyMatchState = FuzzyMatchParseState.ProcessingFuzzyMatch }; break; - case TokenParseState.ProcessingFuzzyMatch: - // We're already procssing a fuzzy match, the second ? indicates the end of parameters and start of the search term - state = state with { TokenState = TokenParseState.ProcessingFuzzyMatchTerm }; + case FuzzyMatchParseState.ProcessingFuzzyMatch: + // We're already processing a fuzzy match, the second ? indicates the end of parameters and start of the search term + state = state with { FuzzyMatchState = FuzzyMatchParseState.ProcessingFuzzyMatchTerm }; break; } tokenStart ??= i; + break; + case '[': + tokenStart = i; + + // Keep processing characters until we reach a closing bracket. Characters escaped with a backslash are skipped + var foundCloseBracket = false; + for (i++; i < queryText.Length; i++) + { + current = queryText[i]; + if (current == ']') + { + foundCloseBracket = true; + break; + } + + if (current == '\\') + { + // Skip the next character + i++; + } + } + + if (foundCloseBracket == false) + { + throw new QueryParserException(ExceptionMessages.UnclosedSquareBracket); + } + + // Verify that the next character is an = + if (i + 1 == queryText.Length || queryText[i + 1] != '=') + { + throw new QueryParserException(ExceptionMessages.ExpectedEqualsAfterFieldName); + } + break; case '=': @@ -218,12 +288,27 @@ public IEnumerable ParseQueryTokens(string queryText, IIndexTokenize } var fieldName = queryText.Substring(tokenStart.Value, i - tokenStart.Value); + if (fieldName.Length > 1 && fieldName[0] == '[') + { + // Strip the square brackets + fieldName = fieldName.Substring(1, fieldName.Length - 2); + + // Replace any substituted characters + fieldName = Regex.Replace(fieldName, @"\\(.)", "$1"); + + if (fieldName.Length == 0) + { + throw new QueryParserException(ExceptionMessages.EmptyFieldNameEncountered); + } + } + yield return QueryToken.ForFieldFilter(fieldName); state = state.PushTokenizer(tokenizerProvider.GetTokenizerForField(fieldName)); tokenStart = null; break; + case ')': state = state.CloseBracket(); token = CreateTokenForYielding(i); @@ -253,6 +338,10 @@ public IEnumerable ParseQueryTokens(string queryText, IIndexTokenize break; + case OperatorParseState.ProcessingEscapedCharacter: + state = state with { OperatorState = OperatorParseState.None }; + break; + case OperatorParseState.ProcessingString: switch (current) { @@ -316,6 +405,46 @@ public IEnumerable ParseQueryTokens(string queryText, IIndexTokenize } } + private static string StripEscapeIndicators(string tokenText) + { +#if NETSTANDARD + if (tokenText.IndexOf('\\') >= 0) +#else + if (tokenText.Contains('\\', StringComparison.Ordinal)) +#endif + { + return escapeCharacterReplacer.Replace(tokenText, "$1"); + } + + return tokenText; + } + + private static (double scoreBoost, int endIndex) ConsumeNumber(int index, string queryText) + { + var startIndex = index; + for (; index < queryText.Length; index++) + { + var current = queryText[index]; + if (char.IsDigit(current) == false && current != '.') + { + break; + } + } + + if (index == startIndex) + { + throw new QueryParserException(ExceptionMessages.InvalidScoreBoostExpectedNumber); + } + + var numberText = queryText.Substring(startIndex, index - startIndex); + if (double.TryParse(numberText, out var scoreBoost) == false) + { + throw new QueryParserException(ExceptionMessages.InvalidScoreBoost, numberText); + } + + return (scoreBoost, index - 1); + } + private static bool IsSplitChar(char current, QueryTokenizerState state) { var isWhitespace = char.IsWhiteSpace(current); @@ -325,13 +454,13 @@ private static bool IsSplitChar(char current, QueryTokenizerState state) isWhitespace || // Whitespace is always a split character (!generalNonSplitPunctuation.Contains(current) && state.IndexTokenizer.IsSplitCharacter(current) // Defer to the tokenizer for the field as to whether this is a split character, - && !(state.TokenState == TokenParseState.ProcessingFuzzyMatch && current == ',')), // ..unless it's a comma appearing in the first part of a fuzzy match + && !(state.FuzzyMatchState == FuzzyMatchParseState.ProcessingFuzzyMatch && current == ',')), // ..unless it's a comma appearing in the first part of a fuzzy match OperatorParseState.ProcessingString => isWhitespace || (!quotedSectionNonSplitPunctuation.Contains(current) && state.IndexTokenizer.IsSplitCharacter(current)), - // When processing a near operator, no splitting is possible until the operator processing is complete - OperatorParseState.ProcessingNearOperator => false, + // When processing a near operator or escaped character, no splitting is possible until the operator processing is complete + OperatorParseState.ProcessingNearOperator or OperatorParseState.ProcessingEscapedCharacter => false, _ => throw new QueryParserException(ExceptionMessages.UnexpectedOperatorParseStateEncountered, state.OperatorState) }; } diff --git a/src/Lifti.Core/Querying/ScoredFieldMatch.cs b/src/Lifti.Core/Querying/ScoredFieldMatch.cs index b00ccdde..444e0a5e 100644 --- a/src/Lifti.Core/Querying/ScoredFieldMatch.cs +++ b/src/Lifti.Core/Querying/ScoredFieldMatch.cs @@ -1,20 +1,68 @@ using System; using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; namespace Lifti.Querying { /// - /// Information about an item's field that was matched and scored during the execution of a query. + /// Information about an document's field that was matched and scored during the execution of a query. /// - public readonly struct ScoredFieldMatch : IEquatable - { - /// - /// Constructs a new instance of . - /// - public ScoredFieldMatch(double score, FieldMatch fieldMatch) + public class ScoredFieldMatch : IEquatable + { + private readonly IReadOnlyList? rawOrderedLocations; + private readonly IReadOnlyList? interfacedLocations; + private ScoredFieldMatch(double score, byte fieldId, IReadOnlyList tokenLocations) { - this.Score = score; - this.FieldMatch = fieldMatch; + this.Score = score; + this.FieldId = fieldId; + this.rawOrderedLocations = tokenLocations; + this.Locations = tokenLocations; + } + + private ScoredFieldMatch(double score, byte fieldId, IReadOnlyList tokenLocations) + { + this.Score = score; + this.FieldId = fieldId; + this.interfacedLocations = tokenLocations; + this.Locations = tokenLocations; + } + + /// + /// Creates a new instance of the class where the caller is guaranteeing that + /// the set of token locations is already sorted. + /// + internal static ScoredFieldMatch CreateFromPresorted(double score, byte fieldId, IReadOnlyList tokenLocations) + { + EnsureTokenLocationOrder(tokenLocations); + return new ScoredFieldMatch(score, fieldId, tokenLocations); + } + + internal static ScoredFieldMatch CreateFromPresorted(double score, byte fieldId, IReadOnlyList tokenLocations) + { + EnsureTokenLocationOrder(tokenLocations); + return new ScoredFieldMatch(score, fieldId, tokenLocations); + } + + [Conditional("DEBUG")] + private static void EnsureTokenLocationOrder(IReadOnlyList tokenLocations) + { +#if DEBUG + // Verify that the tokens locations are in token index order + for (var i = 1; i < tokenLocations.Count; i++) + { + if (tokenLocations[i - 1].MinTokenIndex > tokenLocations[i].MinTokenIndex) + { + Debug.Fail("Token locations must be in token index order"); + } + } +#endif + } + + internal static ScoredFieldMatch CreateFromUnsorted(double score, byte fieldId, List tokenLocations) + { + tokenLocations.Sort((x, y) => x.MinTokenIndex.CompareTo(y.MinTokenIndex)); + return new ScoredFieldMatch(score, fieldId, tokenLocations); } /// @@ -25,18 +73,41 @@ public ScoredFieldMatch(double score, FieldMatch fieldMatch) /// /// Gets the id of the matched field. /// - public byte FieldId => this.FieldMatch.FieldId; + public byte FieldId { get; } /// /// Gets the locations in the field text at which the token was matched. /// - public IReadOnlyList Locations => this.FieldMatch.Locations; - + internal IReadOnlyList Locations { get; } + /// - /// Gets the details for this instance. + /// Collects, deduplicates and sorts all the s for this instance. + /// This method is only expected to be called once per instance, so the result of this is not cached. + /// Multiple calls to this method will result in multiple enumerations of the locations. /// - public FieldMatch FieldMatch { get; } + public IReadOnlyList GetTokenLocations() + { + if (this.rawOrderedLocations != null) + { + return this.rawOrderedLocations; + } + + var results = new HashSet(); + +#if !NETSTANDARD + results.EnsureCapacity(this.interfacedLocations!.Count); +#endif + + foreach (var location in this.interfacedLocations!) + { + location.AddTo(results); + } + return results + .OrderBy(l => l.TokenIndex) + .ToList(); + } + /// public override bool Equals(object? obj) { @@ -47,30 +118,107 @@ public override bool Equals(object? obj) /// public override int GetHashCode() { - return HashCode.Combine(this.Score, this.FieldMatch); + var hashCode = HashCode.Combine(this.Score, this.FieldId); + + foreach (var location in this.Locations) + { + hashCode = HashCode.Combine(hashCode, location); + } + + return hashCode; } /// - public bool Equals(ScoredFieldMatch other) + public bool Equals(ScoredFieldMatch? other) { - return this.Score == other.Score && - this.FieldMatch.Equals(other.FieldMatch); - } - + return other is not null && + this.Score == other.Score && + this.FieldId == other.FieldId && + this.Locations.SequenceEqual(other.Locations); + } + + internal static ScoredFieldMatch Merge(ScoredFieldMatch leftField, ScoredFieldMatch rightField) + { + if (leftField.rawOrderedLocations != null && rightField.rawOrderedLocations != null) + { + return CreateFromPresorted( + leftField.Score + rightField.Score, + leftField.FieldId, + MergeSort(leftField.rawOrderedLocations, rightField.rawOrderedLocations)); + } + + return CreateFromPresorted( + leftField.Score + rightField.Score, + leftField.FieldId, + MergeSort(leftField.Locations, rightField.Locations)); + } + /// /// Implements the operator ==. /// - public static bool operator ==(ScoredFieldMatch left, ScoredFieldMatch right) - { - return left.Equals(right); + public static bool operator ==(ScoredFieldMatch? left, ScoredFieldMatch? right) + { + return left?.Equals(right) ?? right is null; } /// /// Implements the operator !=. /// - public static bool operator !=(ScoredFieldMatch left, ScoredFieldMatch right) + public static bool operator !=(ScoredFieldMatch? left, ScoredFieldMatch? right) { return !(left == right); + } + + private static List MergeSort(IReadOnlyList left, IReadOnlyList right) + where T : IComparable + { + // When merging we'll compare the values by MinTokenIndex + var leftCount = left.Count; + var rightCount = right.Count; + var results = new List(leftCount + rightCount); + + var leftIndex = 0; + var rightIndex = 0; + + while (leftIndex < leftCount && rightIndex < rightCount) + { + var leftMatch = left[leftIndex]; + var rightMatch = right[rightIndex]; + + switch (leftMatch.CompareTo(rightMatch)) + { + case -1: + results.Add(leftMatch); + leftIndex++; + break; + case 1: + results.Add(rightMatch); + rightIndex++; + break; + default: + // They're equal, so we deduplicate and just add one + results.Add(leftMatch); + leftIndex++; + rightIndex++; + break; + } + } + + // Add any remaining matches from the left + while (leftIndex < leftCount) + { + results.Add(left[leftIndex]); + leftIndex++; + } + + // Add any remaining matches from the right + while (rightIndex < rightCount) + { + results.Add(right[rightIndex]); + rightIndex++; + } + + return results; } } } diff --git a/src/Lifti.Core/Querying/ScoredToken.cs b/src/Lifti.Core/Querying/ScoredToken.cs index 927f20bd..8498c10a 100644 --- a/src/Lifti.Core/Querying/ScoredToken.cs +++ b/src/Lifti.Core/Querying/ScoredToken.cs @@ -7,23 +7,51 @@ namespace Lifti.Querying { /// - /// Provides information about an item that was matched and scored whilst executing a query. + /// Provides information about a document's tokens that were matched and scored whilst executing a query. /// public readonly struct ScoredToken : IEquatable { /// /// Constructs a new instance of . /// - public ScoredToken(int itemId, IReadOnlyList fieldMatches) + public ScoredToken(int documentId, IReadOnlyList fieldMatches) { - this.ItemId = itemId; + this.DocumentId = documentId; this.FieldMatches = fieldMatches; + +#if DEBUG + // Verify that we are in field id order, and that there are no duplicates + // This is fairly safe to assume as the fields are indexed in order for any + // given document. +#pragma warning disable CA1062 // Validate arguments of public methods + for (var i = 0; i < fieldMatches.Count; i++) +#pragma warning restore CA1062 // Validate arguments of public methods + { + if (i > 0) + { + var previous = this.FieldMatches[i - 1].FieldId; + var next = this.FieldMatches[i].FieldId; + if (previous > next) + { + System.Diagnostics.Debug.Fail("Intermediate query results must be in field id order"); + } + else if (previous == next) + { + System.Diagnostics.Debug.Fail("Duplicate field id encountered in intermediate query results"); + } + } + } +#endif } + /// + [Obsolete("Use DocumentId property instead")] + public int ItemId => this.DocumentId; + /// - /// Gets the id of the item that was matched. + /// Gets the id of the document that was matched. /// - public int ItemId { get; } + public int DocumentId { get; } /// /// Gets the fields in which the tokens were matched. @@ -40,7 +68,7 @@ public override bool Equals(object? obj) /// public override int GetHashCode() { - return HashCode.Combine(this.ItemId, this.FieldMatches); + return HashCode.Combine(this.DocumentId, this.FieldMatches); } /// @@ -58,9 +86,9 @@ public override int GetHashCode() /// public bool Equals(ScoredToken other) { - return this.ItemId == other.ItemId && + return this.DocumentId == other.DocumentId && this.FieldMatches.SequenceEqual(other.FieldMatches); - } + } internal void ToString(StringBuilder builder) { @@ -69,7 +97,7 @@ internal void ToString(StringBuilder builder) throw new ArgumentNullException(nameof(builder)); } - builder.Append("Item: ").AppendLine(this.ItemId.ToString(CultureInfo.InvariantCulture)); + builder.Append("Document: ").AppendLine(this.DocumentId.ToString(CultureInfo.InvariantCulture)); builder.AppendLine("Field matches:"); foreach (var fieldMatch in this.FieldMatches) { diff --git a/src/Lifti.Core/Querying/SingleTokenLocationMatch.cs b/src/Lifti.Core/Querying/SingleTokenLocationMatch.cs deleted file mode 100644 index ce335474..00000000 --- a/src/Lifti.Core/Querying/SingleTokenLocationMatch.cs +++ /dev/null @@ -1,71 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Globalization; - -namespace Lifti.Querying -{ - /// - /// Represents the location of a single token manifested during the execution of a query. - /// - public readonly struct SingleTokenLocationMatch : ITokenLocationMatch, IEquatable - { - private readonly TokenLocation original; - - /// - /// Constructs a new instance of . - /// - public SingleTokenLocationMatch(TokenLocation original) - { - this.original = original; - } - - /// - public int MaxTokenIndex => this.original.TokenIndex; - - /// - public int MinTokenIndex => this.original.TokenIndex; - - /// - public override bool Equals(object? obj) - { - return obj is SingleTokenLocationMatch match && - this.Equals(match); - } - - /// - public override int GetHashCode() - { - return HashCode.Combine(this.original); - } - - /// - public IEnumerable GetLocations() - { - return new[] { original }; - } - - /// - public bool Equals(SingleTokenLocationMatch other) - { - return this.original.Equals(other.original); - } - - /// - public static bool operator ==(SingleTokenLocationMatch left, SingleTokenLocationMatch right) - { - return left.Equals(right); - } - - /// - public static bool operator !=(SingleTokenLocationMatch left, SingleTokenLocationMatch right) - { - return !(left == right); - } - - /// - public override string ToString() - { - return "Single index: " + this.original.TokenIndex.ToString(CultureInfo.InvariantCulture); - } - } -} diff --git a/src/Lifti.Core/Querying/UnionMerger.cs b/src/Lifti.Core/Querying/UnionMerger.cs index 19b6f943..3c523f5c 100644 --- a/src/Lifti.Core/Querying/UnionMerger.cs +++ b/src/Lifti.Core/Querying/UnionMerger.cs @@ -1,46 +1,76 @@ using System.Collections.Generic; -using System.Linq; namespace Lifti.Querying { /// /// Provides logic for unioning the results in two s. The results from - /// both parts of the query will be combined into one and field match locations combined where items appear on both sides. + /// both parts of the query will be combined into one and field match locations combined where documents appear on both sides. /// - public class UnionMerger : IntermediateQueryResultMerger + internal sealed class UnionMerger : IntermediateQueryResultMerger { /// /// Applies the union operation to the instances. /// - public static IEnumerable Apply(IntermediateQueryResult left, IntermediateQueryResult right) + public static List Apply(IntermediateQueryResult left, IntermediateQueryResult right) { - // Swap over the variables to ensure we're performing as few iterations as possible in the intersection - // "left" and "right" have no special meaning when performing an intersection - var rightDictionary = right.Matches.ToDictionary(i => i.ItemId); + // track two pointers through the lists on each side. The document ids are ordered on both sides, so we can + // move through the lists in a single pass - foreach (var leftMatch in left.Matches) + var leftIndex = 0; + var rightIndex = 0; + + var leftMatches = left.Matches; + var rightMatches = right.Matches; + var leftCount = leftMatches.Count; + var rightCount = rightMatches.Count; + + List result = new(leftCount + rightCount); + + List positionalMatches = []; + while (leftIndex < leftCount && rightIndex < rightCount) { - if (rightDictionary.TryGetValue(leftMatch.ItemId, out var rightMatch)) + var leftMatch = leftMatches[leftIndex]; + var rightMatch = rightMatches[rightIndex]; + + if (leftMatch.DocumentId == rightMatch.DocumentId) { // Exists in both - yield return new ScoredToken( - leftMatch.ItemId, - MergeFields(leftMatch, rightMatch).ToList()); + result.Add(new ScoredToken( + leftMatch.DocumentId, + MergeFields(leftMatch, rightMatch))); - rightDictionary.Remove(leftMatch.ItemId); + leftIndex++; + rightIndex++; } - else + else if (leftMatch.DocumentId < rightMatch.DocumentId) { // Exists only in current - yield return leftMatch; + result.Add(leftMatch); + leftIndex++; + } + else + { + // Exists only in next + result.Add(rightMatch); + rightIndex++; } } - // Any items still remaining in nextDictionary exist only in the new results so can just be yielded - foreach (var rightMatch in rightDictionary.Values) + // Add any remaining matches from the left + while (leftIndex < leftCount) { - yield return rightMatch; + result.Add(leftMatches[leftIndex]); + leftIndex++; } + + // Add any remaining matches from the right + while (rightIndex < rightCount) + { + result.Add(rightMatches[rightIndex]); + rightIndex++; + } + + return result; } } } diff --git a/src/Lifti.Core/Querying/WildcardQueryPartParser.cs b/src/Lifti.Core/Querying/WildcardQueryPartParser.cs index 04f5f932..7d2a6335 100644 --- a/src/Lifti.Core/Querying/WildcardQueryPartParser.cs +++ b/src/Lifti.Core/Querying/WildcardQueryPartParser.cs @@ -8,12 +8,16 @@ namespace Lifti.Querying { internal static class WildcardQueryPartParser { - public static bool TryParse(ReadOnlySpan token, IIndexTokenizer tokenizer, [NotNullWhen(true)] out WildcardQueryPart? part) + public static bool TryParse( + ReadOnlySpan token, + IIndexTokenizer tokenizer, + double? scoreBoost, + [NotNullWhen(true)] out WildcardQueryPart? part) { List? fragments = null; void AddFragment(WildcardQueryFragment fragment) { - fragments ??= new List(); + fragments ??= []; fragments.Add(fragment); } @@ -61,7 +65,7 @@ void AddPrecedingTextFragment(ReadOnlySpan token, int currentIndex) // wildcard fragment AddPrecedingTextFragment(token, token.Length); - part = new WildcardQueryPart(fragments); + part = new WildcardQueryPart(fragments, scoreBoost); return true; } diff --git a/src/Lifti.Core/ScoreBoostMetadata.cs b/src/Lifti.Core/ScoreBoostMetadata.cs new file mode 100644 index 00000000..0f6d18c3 --- /dev/null +++ b/src/Lifti.Core/ScoreBoostMetadata.cs @@ -0,0 +1,86 @@ +using Lifti.Tokenization.Objects; +using System; + +namespace Lifti +{ + /// + /// Maintains information about all the scoring metadata, e.g. freshness date, magnitude values, encountered + /// for a single object type. + /// + public class ScoreBoostMetadata + { + private readonly DateTimeScoreBoostValues freshnessBoost = new(); + private readonly DoubleScoreBoostValues magnitudeBoost = new(); + private readonly ObjectScoreBoostOptions scoreBoostOptions; + + internal ScoreBoostMetadata(ObjectScoreBoostOptions scoreBoostOptions) + { + this.scoreBoostOptions = scoreBoostOptions; + } + + /// + /// Calculates the score boost for the given . + /// + /// + /// This is virtual to allow for unit tests to override its behavior. + /// + public virtual double CalculateScoreBoost(DocumentMetadata documentMetadata) + { + if (documentMetadata is null) + { + throw new ArgumentNullException(nameof(documentMetadata)); + } + + if (documentMetadata.ScoringFreshnessDate is null && documentMetadata.ScoringMagnitude is null) + { + return 1.0D; + } + + if (this.freshnessBoost is null || this.magnitudeBoost is null) + { + throw new LiftiException(ExceptionMessages.ScoreBoostsNotCalculated); + } + + return this.freshnessBoost.CalculateBoost(this.scoreBoostOptions.FreshnessMultiplier, documentMetadata.ScoringFreshnessDate) + + this.magnitudeBoost.CalculateBoost(this.scoreBoostOptions.MagnitudeMultiplier, documentMetadata.ScoringMagnitude); + } + + internal void Add(DocumentMetadata documentMetadata) + { + AddToBoostValues(this.freshnessBoost, documentMetadata.ScoringFreshnessDate); + AddToBoostValues(this.magnitudeBoost, documentMetadata.ScoringMagnitude); + } + + internal void Remove(DocumentMetadata documentMetadata) + { + RemoveFromBoostValues(this.freshnessBoost, documentMetadata.ScoringFreshnessDate); + RemoveFromBoostValues(this.magnitudeBoost, documentMetadata.ScoringMagnitude); + } + + private static void RemoveFromBoostValues(ScoreBoostValues boostValues, T? newValue) + where T : struct, IComparable + { + if (newValue is null) + { + // Nothing to do + return; + } + + var value = newValue.GetValueOrDefault(); + boostValues.Remove(value); + } + + private static void AddToBoostValues(ScoreBoostValues boostValues, T? newValue) + where T : struct, IComparable + { + if (newValue is null) + { + // Nothing to do + return; + } + + var value = newValue.GetValueOrDefault(); + boostValues.Add(value); + } + } +} diff --git a/src/Lifti.Core/ScoreBoostValues.cs b/src/Lifti.Core/ScoreBoostValues.cs new file mode 100644 index 00000000..a18ccf1c --- /dev/null +++ b/src/Lifti.Core/ScoreBoostValues.cs @@ -0,0 +1,163 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti +{ + internal class DoubleScoreBoostValues : ScoreBoostValues + { + protected override double ValueAsDouble(double value) + { + return value; + } + } + + internal class DateTimeScoreBoostValues : ScoreBoostValues + { + protected override double ValueAsDouble(DateTime value) + { + return value.Ticks; + } + } + + /// + /// Maintains the set of values that have been encountered for a score boost property of a given object type, allowing for + /// a normalized score to be calculated for any given value. + /// + internal abstract class ScoreBoostValues + where T : struct, IComparable + { + private readonly Dictionary valueRefCount = []; + + /// + /// Calculating a normalized score is calculated as (value - min) / (max - min). We can pre-calculate two parts of this: + /// * A value that can be applied to the score to adjust it to the baseline. This is derived from the minimum value in the set of values. + /// * The denominator of the normalization calculation. + /// + private (double baselineAdjustment, double normalizationDenominator) normalizationPrecalculations; + private bool normalizationPrecalculationsValid; + + protected ScoreBoostValues() + { + } + + public T Minimum { get; private set; } + public T Maximum { get; private set; } + + public void Add(T value) + { + if (this.valueRefCount.Count == 0) + { + // This is the first value, so set the min/max to this value + this.Minimum = value; + this.Maximum = value; + + // Add the ref count for the value + this.valueRefCount.Add(value, 1); + } + else + { + // Adjust the ref count for the value + if (this.valueRefCount.TryGetValue(value, out var count)) + { + this.valueRefCount[value] = count + 1; + } + else + { + this.valueRefCount.Add(value, 1); + + // Adjust the min/max values if necessary + if (this.Minimum.CompareTo(value) > 0) + { + this.Minimum = value; + var minValue = this.ValueAsDouble(value); + this.ResetNormalizationPrecalculations(); + } + else if (this.Maximum.CompareTo(value) < 0) + { + this.Maximum = value; + this.ResetNormalizationPrecalculations(); + } + } + } + } + + private void ResetNormalizationPrecalculations() + { + this.normalizationPrecalculationsValid = false; + } + + public void Remove(T value) + { + // Adjust the ref count for the value + if (!this.valueRefCount.TryGetValue(value, out var count)) + { + throw new LiftiException(ExceptionMessages.UnexpectedScoreBoostValueRemoval); + } + + if (count == 1) + { + this.valueRefCount.Remove(value); + + if (this.valueRefCount.Count > 0) + { + // Because the value has been removed entirely, if it was the current minimum or maximum, we need to recalculate + if (this.Minimum.CompareTo(value) >= 0) + { + this.Minimum = this.valueRefCount.Keys.Min(); + this.ResetNormalizationPrecalculations(); + } + + if (this.Maximum.CompareTo(value) <= 0) + { + this.Maximum = this.valueRefCount.Keys.Max(); + this.ResetNormalizationPrecalculations(); + } + } + } + else + { + this.valueRefCount[value] = count - 1; + } + } + + protected abstract double ValueAsDouble(T value); + + internal double CalculateBoost(double multiplier, T? value) + { + if (value is null) + { + // No value, so no boost + return 0D; + } + + var baseline = this.ValueAsDouble((T)value); + + var (baselineAdjustment, normalizationDenominator) = this.GetNormalizationPrecalculations(); + + if (normalizationDenominator == 0D) + { + // In this case the max and min are the same, so just return the multiplier + return multiplier; + } + + // Standard normalization between 0 and 1 would be + // (value - min) / (max - min) + // But we want the value to range between 1..multiplier, so the formula we're using here is: + // 1 + ((value - min) / (max - min)) * (multiplier - 1) + // The multiplier values are guarded at index creation time to ensure they are greater than 1 + return 1D + ((baseline - baselineAdjustment) / normalizationDenominator * (multiplier - 1D)); + } + + private (double baselineAdjustment, double normalizationDenominator) GetNormalizationPrecalculations() + { + if (this.normalizationPrecalculationsValid == false) + { + var minValue = this.ValueAsDouble(this.Minimum); + this.normalizationPrecalculations = (minValue, this.ValueAsDouble(this.Maximum) - minValue); + } + + return this.normalizationPrecalculations; + } + } +} diff --git a/src/Lifti.Core/SearchResult.cs b/src/Lifti.Core/SearchResult.cs index 952fa430..4111154e 100644 --- a/src/Lifti.Core/SearchResult.cs +++ b/src/Lifti.Core/SearchResult.cs @@ -15,11 +15,9 @@ public class SearchResult /// /// Constructs a new instance. /// - /// - /// - public SearchResult(TKey item, IReadOnlyList locations) + public SearchResult(TKey key, IReadOnlyList locations) { - this.Key = item; + this.Key = key; this.FieldMatches = locations ?? throw new ArgumentNullException(nameof(locations)); var score = 0D; @@ -32,12 +30,12 @@ public SearchResult(TKey item, IReadOnlyList locations) } /// - /// Gets the item that matched the search criteria. + /// Gets the key of the document that matched the search criteria. /// public TKey Key { get; } /// - /// Gets the fields that were matched for the item. Each of these is scored independently and provides detailed information + /// Gets the fields that were matched for the document. Each of these is scored independently and provides detailed information /// about the location of the tokens that were matched. /// public IReadOnlyList FieldMatches { get; } diff --git a/src/Lifti.Core/SearchResults.cs b/src/Lifti.Core/SearchResults.cs index 47155427..c98e8590 100644 --- a/src/Lifti.Core/SearchResults.cs +++ b/src/Lifti.Core/SearchResults.cs @@ -24,6 +24,9 @@ internal SearchResults(FullTextIndex index, IEnumerable this.index = index; } + /// + public int Count => this.searchResults.Count; + /// public ISearchResults OrderByField(string fieldName) { @@ -33,19 +36,19 @@ public ISearchResults OrderByField(string fieldName) } /// - public async Task>> CreateMatchPhrasesAsync( - Func, IReadOnlyList> loadItems, + public async Task>> CreateMatchPhrasesAsync( + Func, IReadOnlyList> loadItems, CancellationToken cancellationToken = default) { return await this.CreateMatchPhrasesAsync( - (keys, ct) => new ValueTask>(loadItems(keys)), + (keys, ct) => new ValueTask>(loadItems(keys)), cancellationToken) .ConfigureAwait(false); } /// - public async Task>> CreateMatchPhrasesAsync( - Func, CancellationToken, ValueTask>> loadItemsAsync, + public async Task>> CreateMatchPhrasesAsync( + Func, CancellationToken, ValueTask>> loadItemsAsync, CancellationToken cancellationToken = default) { if (loadItemsAsync is null) @@ -53,47 +56,46 @@ public ISearchResults OrderByField(string fieldName) throw new ArgumentNullException(nameof(loadItemsAsync)); } - var itemType = typeof(TItem); - var itemTokenization = this.index.ItemTokenization.Get(); - var itemResults = this.FilterFieldMatches(); + var itemTokenization = this.index.ObjectTypeConfiguration.Get(); + var filteredResults = this.FilterFieldMatches(); - var items = (await loadItemsAsync(itemResults.Select(x => x.searchResult.Key).ToList(), cancellationToken).ConfigureAwait(false)) + var objectResults = (await loadItemsAsync(filteredResults.Select(x => x.searchResult.Key).ToList(), cancellationToken).ConfigureAwait(false)) .ToDictionary(x => itemTokenization.KeyReader(x)); - var missingIds = itemResults.Where(x => !items.ContainsKey(x.searchResult.Key)).ToList(); + var missingIds = filteredResults.Where(x => !objectResults.ContainsKey(x.searchResult.Key)).ToList(); return missingIds.Count > 0 ? throw new LiftiException(ExceptionMessages.NotAllRequestedItemsReturned, string.Join(",", missingIds)) : await this.CreateMatchPhrasesAsync( - (x, ct) => new ValueTask(items[x]), - itemResults, + (x, ct) => new ValueTask(objectResults[x]), + filteredResults, cancellationToken).ConfigureAwait(false); } /// - public Task>> CreateMatchPhrasesAsync( - Func, ValueTask>> loadItemsAsync, + public Task>> CreateMatchPhrasesAsync( + Func, ValueTask>> loadItemsAsync, CancellationToken cancellationToken = default) { return this.CreateMatchPhrasesAsync((key, ct) => loadItemsAsync(key), cancellationToken); } /// - public async Task>> CreateMatchPhrasesAsync( - Func loadItem, - CancellationToken cancellationToken = default) + public async Task>> CreateMatchPhrasesAsync( + Func loadItem, + CancellationToken cancellationToken = default) { return await this.CreateMatchPhrasesAsync( - (keys, ct) => new ValueTask(loadItem(keys)), + (keys, ct) => new ValueTask(loadItem(keys)), cancellationToken) .ConfigureAwait(false); } /// - public async Task>> CreateMatchPhrasesAsync( - Func> loadItemAsync, + public async Task>> CreateMatchPhrasesAsync( + Func> loadItemAsync, CancellationToken cancellationToken = default) { - var itemResults = this.FilterFieldMatches(); + var itemResults = this.FilterFieldMatches(); return await this.CreateMatchPhrasesAsync( loadItemAsync, @@ -102,33 +104,33 @@ public ISearchResults OrderByField(string fieldName) } /// - public Task>> CreateMatchPhrasesAsync( - Func> loadItemAsync, + public Task>> CreateMatchPhrasesAsync( + Func> loadItemAsync, CancellationToken cancellationToken = default) { return this.CreateMatchPhrasesAsync((key, ct) => loadItemAsync(key), cancellationToken); } /// - public IEnumerable> CreateMatchPhrases(Func loadText) + public IEnumerable> CreateMatchPhrases(Func loadText) { return this.CreateMatchPhrasesAsync((key, ct) => new ValueTask(loadText(key))) .GetAwaiter().GetResult(); } /// - public async Task>> CreateMatchPhrasesAsync( + public async Task>> CreateMatchPhrasesAsync( Func> loadTextAsync, CancellationToken cancellationToken = default) { - var itemTokenization = this.index.DefaultTokenizer; - var itemResults = this.FilterFieldMatches(field => field == IndexedFieldLookup.DefaultFieldName); + var objectTokenization = this.index.DefaultTokenizer; + var objectResults = this.FilterFieldMatches(field => field == IndexedFieldLookup.DefaultFieldName); - return await this.CreateMatchPhrasesAsync(loadTextAsync, itemResults, cancellationToken).ConfigureAwait(false); + return await this.CreateMatchPhrasesAsync(loadTextAsync, objectResults, cancellationToken).ConfigureAwait(false); } /// - public Task>> CreateMatchPhrasesAsync( + public Task>> CreateMatchPhrasesAsync( Func> loadTextAsync, CancellationToken cancellationToken = default) { @@ -137,17 +139,17 @@ public IEnumerable> CreateMatchPhrases(Func load cancellationToken); } - private async Task>> CreateMatchPhrasesAsync( + private async Task>> CreateMatchPhrasesAsync( Func> loadTextAsync, - List<(SearchResult searchResult, List fieldMatches)> itemResults, + List<(SearchResult searchResult, List fieldMatches)> objectResults, CancellationToken cancellationToken) { var phraseBuilder = new StringBuilder(); - var matchedPhrases = new List>(this.searchResults.Count); + var matchedPhrases = new List>(this.searchResults.Count); // Create an array that can be used on each call to VirtualString var textArray = new string[1]; - foreach (var (searchResult, fieldMatches) in itemResults) + foreach (var (searchResult, fieldMatches) in objectResults) { textArray[0] = await loadTextAsync(searchResult.Key, cancellationToken).ConfigureAwait(false); var text = new VirtualString(textArray); @@ -158,20 +160,20 @@ public IEnumerable> CreateMatchPhrases(Func load fieldPhrases.Add(CreatePhrases(fieldMatch, text, phraseBuilder)); } - matchedPhrases.Add(new ItemPhrases(searchResult, fieldPhrases)); + matchedPhrases.Add(new DocumentPhrases(searchResult, fieldPhrases)); } return matchedPhrases; } - private async Task>> CreateMatchPhrasesAsync( - Func> loadItemAsync, - List<(SearchResult searchResult, List fieldMatches)> itemResults, + private async Task>> CreateMatchPhrasesAsync( + Func> loadItemAsync, + List<(SearchResult searchResult, List fieldMatches)> objectResults, CancellationToken cancellationToken) { var phraseBuilder = new StringBuilder(); - var matchedPhrases = new List>(this.searchResults.Count); - foreach (var (searchResult, fieldMatches) in itemResults) + var matchedPhrases = new List>(this.searchResults.Count); + foreach (var (searchResult, fieldMatches) in objectResults) { var item = await loadItemAsync(searchResult.Key, cancellationToken).ConfigureAwait(false); if (item == null) @@ -187,26 +189,26 @@ public IEnumerable> CreateMatchPhrases(Func load fieldPhrases.Add(CreatePhrases(fieldMatch, text, phraseBuilder)); } - matchedPhrases.Add(new ItemPhrases(item, searchResult, fieldPhrases)); + matchedPhrases.Add(new DocumentPhrases(item, searchResult, fieldPhrases)); } return matchedPhrases; } - private List<(SearchResult searchResult, List fieldMatches)> FilterFieldMatches() + private List<(SearchResult searchResult, List fieldMatches)> FilterFieldMatches() { - return this.FilterFieldMatches(fieldName => this.index.FieldLookup.IsKnownField(typeof(TItem), fieldName)); + return this.FilterFieldMatches(fieldName => this.index.FieldLookup.IsKnownField(typeof(TObject), fieldName)); } private List<(SearchResult searchResult, List fieldMatches)> FilterFieldMatches( Func useFieldForMatch) { - // Technically an index can contain fields from multiple item sources, so not all results may - // be appropriate for the requested item type. + // Technically an index can contain fields from multiple object types, so not all results may + // be appropriate for the requested object type. return this.searchResults .Select(x => ( - ItemKey: x, + SearchResult: x, FieldMatches: x.FieldMatches .Where(match => useFieldForMatch(match.FoundIn)) .ToList()) diff --git a/src/Lifti.Core/Serialization/Binary/BinarySerializer.cs b/src/Lifti.Core/Serialization/Binary/BinarySerializer.cs index 78b1b002..033eb0b6 100644 --- a/src/Lifti.Core/Serialization/Binary/BinarySerializer.cs +++ b/src/Lifti.Core/Serialization/Binary/BinarySerializer.cs @@ -1,5 +1,6 @@ using System; using System.IO; +using System.Threading; using System.Threading.Tasks; namespace Lifti.Serialization.Binary @@ -35,7 +36,7 @@ public BinarySerializer(IKeySerializer keySerializer) } /// - public async Task SerializeAsync(IIndexSnapshot snapshot, Stream stream, bool disposeStream = true) + public async Task SerializeAsync(IIndexSnapshot snapshot, Stream stream, bool disposeStream = true, CancellationToken cancellationToken = default) { if (snapshot is null) { @@ -43,22 +44,22 @@ public async Task SerializeAsync(IIndexSnapshot snapshot, Stream stream, b } using var writer = new IndexWriter(stream, disposeStream, this.keySerializer); - await writer.WriteAsync(snapshot).ConfigureAwait(false); + await writer.WriteAsync(snapshot, cancellationToken).ConfigureAwait(false); } /// - public async Task SerializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true) + public async Task SerializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true, CancellationToken cancellationToken = default) { if (index is null) { throw new ArgumentNullException(nameof(index)); } - await this.SerializeAsync(index.Snapshot, stream, disposeStream).ConfigureAwait(false); + await this.SerializeAsync(index.Snapshot, stream, disposeStream, cancellationToken).ConfigureAwait(false); } /// - public async Task DeserializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true) + public async Task DeserializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true, CancellationToken cancellationToken = default) { if (index is null) { @@ -71,10 +72,10 @@ public async Task DeserializeAsync(FullTextIndex index, Stream stream, boo } using var reader = await this.CreateVersionedIndexReaderAsync(stream, disposeStream).ConfigureAwait(false); - await reader.ReadIntoAsync(index).ConfigureAwait(false); + await reader.ReadAsync(index, cancellationToken).ConfigureAwait(false); } - private async Task> CreateVersionedIndexReaderAsync(Stream stream, bool disposeStream) + private async Task> CreateVersionedIndexReaderAsync(Stream stream, bool disposeStream) { var version = await ReadFileVersionAsync(stream).ConfigureAwait(false); @@ -85,6 +86,7 @@ private async Task> CreateVersionedIndexReaderAsync(Stream st 3 => new V3IndexReader(stream, disposeStream, this.keySerializer), 4 => new V4IndexReader(stream, disposeStream, this.keySerializer), 5 => new V5IndexReader(stream, disposeStream, this.keySerializer), + 6 => new V6IndexReader(stream, disposeStream, this.keySerializer), _ => throw new DeserializationException(ExceptionMessages.NoDeserializerAvailableForIndexVersion, version), }; } @@ -107,6 +109,6 @@ private static async Task ReadFileVersionAsync(Stream stream) } throw new DeserializationException(ExceptionMessages.MissingLiftiHeaderIndicatorBytes); - } + } } } diff --git a/src/Lifti.Core/Serialization/Binary/DeserializationException.cs b/src/Lifti.Core/Serialization/Binary/DeserializationException.cs index ef2652aa..9d353bc3 100644 --- a/src/Lifti.Core/Serialization/Binary/DeserializationException.cs +++ b/src/Lifti.Core/Serialization/Binary/DeserializationException.cs @@ -21,8 +21,10 @@ public sealed class DeserializationException : LiftiException /// public DeserializationException(string message, Exception inner) : base(message, inner) { } +#if NETSTANDARD private DeserializationException( SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } } diff --git a/src/Lifti.Core/Serialization/Binary/IIndexReader.cs b/src/Lifti.Core/Serialization/Binary/IIndexReader.cs deleted file mode 100644 index b5b414b5..00000000 --- a/src/Lifti.Core/Serialization/Binary/IIndexReader.cs +++ /dev/null @@ -1,23 +0,0 @@ -using System; -using System.Threading.Tasks; - -namespace Lifti.Serialization.Binary -{ - /// - /// Defines methods for loading an index from a source. - /// - /// - /// The type of the key in the index. - /// - public interface IIndexReader : IDisposable - where TKey : notnull - { - /// - /// Populates the given . - /// - /// - /// The index to populate. This should be an empty state. - /// - Task ReadIntoAsync(FullTextIndex index); - } -} \ No newline at end of file diff --git a/src/Lifti.Core/Serialization/Binary/IIndexSerializer.cs b/src/Lifti.Core/Serialization/Binary/IIndexSerializer.cs index e81e38b3..9c83ffe3 100644 --- a/src/Lifti.Core/Serialization/Binary/IIndexSerializer.cs +++ b/src/Lifti.Core/Serialization/Binary/IIndexSerializer.cs @@ -1,4 +1,5 @@ using System.IO; +using System.Threading; using System.Threading.Tasks; namespace Lifti.Serialization.Binary @@ -23,8 +24,11 @@ public interface IIndexSerializer /// /// /// Whether the stream should be disposed of after serialization. + /// + /// + /// A cancellation token that can be used to cancel the operation. /// - Task SerializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true); + Task SerializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true, CancellationToken cancellationToken = default); /// /// Serializes an index into a binary format. @@ -37,8 +41,11 @@ public interface IIndexSerializer /// /// /// Whether the stream should be disposed of after serialization. + /// + /// + /// A cancellation token that can be used to cancel the operation. /// - Task SerializeAsync(IIndexSnapshot snapshot, Stream stream, bool disposeStream = true); + Task SerializeAsync(IIndexSnapshot snapshot, Stream stream, bool disposeStream = true, CancellationToken cancellationToken = default); /// /// Deserializes an index from a binary format into an index. @@ -51,7 +58,10 @@ public interface IIndexSerializer /// /// /// Whether the stream should be disposed of after deserialization. + /// + /// + /// A cancellation token that can be used to cancel the operation. /// - Task DeserializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true); + Task DeserializeAsync(FullTextIndex index, Stream stream, bool disposeStream = true, CancellationToken cancellationToken = default); } } \ No newline at end of file diff --git a/src/Lifti.Core/Serialization/Binary/IIndexWriter.cs b/src/Lifti.Core/Serialization/Binary/IIndexWriter.cs deleted file mode 100644 index 0463ccf9..00000000 --- a/src/Lifti.Core/Serialization/Binary/IIndexWriter.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System; -using System.Threading.Tasks; - -namespace Lifti.Serialization.Binary -{ - internal interface IIndexWriter : IDisposable - { - Task WriteAsync(IIndexSnapshot snapshot); - } -} \ No newline at end of file diff --git a/src/Lifti.Core/Serialization/Binary/IndexWriter.cs b/src/Lifti.Core/Serialization/Binary/IndexWriter.cs index 4e30b9be..d962d579 100644 --- a/src/Lifti.Core/Serialization/Binary/IndexWriter.cs +++ b/src/Lifti.Core/Serialization/Binary/IndexWriter.cs @@ -1,13 +1,16 @@ -using System.Diagnostics; +using System; +using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.Text; +using System.Threading; using System.Threading.Tasks; namespace Lifti.Serialization.Binary { - internal class IndexWriter : IIndexWriter + internal class IndexWriter : IndexSerializerBase { - private const ushort Version = 5; + private const ushort Version = 6; private readonly Stream underlyingStream; private readonly bool disposeStream; private readonly IKeySerializer keySerializer; @@ -23,37 +26,38 @@ public IndexWriter(Stream stream, bool disposeStream, IKeySerializer keySe this.writer = new BinaryWriter(this.buffer, Encoding.UTF8); } - public async Task WriteAsync(IIndexSnapshot snapshot) + protected override void Dispose(bool disposing) { - await this.WriteHeaderAsync(snapshot).ConfigureAwait(false); + base.Dispose(disposing); - await this.WriteFieldsAsync(snapshot).ConfigureAwait(false); - - await this.WriteItemsAsync(snapshot).ConfigureAwait(false); - - await this.WriteNodeAsync(snapshot.Root).ConfigureAwait(false); + if (disposing) + { + this.writer.Dispose(); + this.buffer.Dispose(); - await this.WriteTerminatorAsync().ConfigureAwait(false); + if (this.disposeStream) + { + this.underlyingStream.Dispose(); + } + } } - private async Task WriteFieldsAsync(IIndexSnapshot snapshot) + protected override ValueTask OnSerializationStart(IIndexSnapshot snapshot, CancellationToken cancellationToken) { - // We need to write information for all the fields in the index so that when - // we deserialize them to a new index we can ensure that the field ids are - // mapped correctly to a new index structure as new static fields may be registered - // in a new version of the index. - var fieldNames = snapshot.FieldLookup.AllFieldNames; + return this.WriteHeaderAsync(cancellationToken); + } - this.writer.Write((byte)fieldNames.Count); + protected override async ValueTask WriteFieldsAsync(IReadOnlyList fields, CancellationToken cancellationToken) + { + this.writer.Write((byte)fields.Count); - foreach (var fieldName in fieldNames) + foreach (var field in fields) { - var field = snapshot.FieldLookup.GetFieldInfo(fieldName); - this.writer.Write(field.Id); - this.writer.Write((byte)field.FieldKind); + this.writer.Write(field.FieldId); + this.writer.Write((byte)field.Kind); this.writer.Write(field.Name); - if (field.FieldKind == FieldKind.Dynamic) + if (field.Kind == FieldKind.Dynamic) { if (field.DynamicFieldReaderName == null) { @@ -64,10 +68,53 @@ private async Task WriteFieldsAsync(IIndexSnapshot snapshot) } } - await this.FlushAsync().ConfigureAwait(false); + await this.FlushAsync(cancellationToken).ConfigureAwait(false); + } + + protected override ValueTask OnSerializationComplete(IIndexSnapshot snapshot, CancellationToken cancellationToken) + { + return this.WriteTerminatorAsync(cancellationToken); } - private async Task WriteNodeAsync(IndexNode node) + protected override async ValueTask WriteIndexMetadataAsync(IIndexSnapshot index, CancellationToken cancellationToken) + { + this.writer.WriteNonNegativeVarInt32(index.Metadata.DocumentCount); + + foreach (var documentMetadata in index.Metadata.GetIndexedDocuments()) + { + // Write the standard information for the document, regardless of whether is was + // read from an object + this.writer.WriteNonNegativeVarInt32(documentMetadata.Id); + this.keySerializer.Write(this.writer, documentMetadata.Key); + this.writer.WriteNonNegativeVarInt32(documentMetadata.DocumentStatistics.TokenCountByField.Count); + foreach (var fieldTokenCount in documentMetadata.DocumentStatistics.TokenCountByField) + { + this.writer.Write(fieldTokenCount.Key); + this.writer.WriteNonNegativeVarInt32(fieldTokenCount.Value); + } + + // If the object is associated to an object type, write the object type id and any + // associated freshness info + if (documentMetadata.ObjectTypeId is byte objectTypeId) + { + this.WriteDocumentObjectMetadata(objectTypeId, documentMetadata); + } + else + { + // Write a zero byte to indicate that there is no object type id or metadata + this.writer.Write((byte)0); + } + } + + await this.FlushAsync(cancellationToken).ConfigureAwait(false); + } + + protected override async ValueTask WriteNodesAsync(IndexNode rootNode, CancellationToken cancellationToken) + { + await this.WriteNodeAsync(rootNode, cancellationToken).ConfigureAwait(false); + } + + private async ValueTask WriteNodeAsync(IndexNode node, CancellationToken cancellationToken) { var matchCount = node.Matches.Count; var childNodeCount = node.ChildNodes.Count; @@ -83,10 +130,10 @@ private async Task WriteNodeAsync(IndexNode node) if (childNodeCount > 0) { - foreach (var childNode in node.ChildNodes) + foreach (var (character, childNode) in node.ChildNodes.CharacterMap) { - this.writer.WriteVarUInt16(childNode.Key); - await this.WriteNodeAsync(childNode.Value).ConfigureAwait(false); + this.writer.WriteVarUInt16(character); + await this.WriteNodeAsync(childNode, cancellationToken).ConfigureAwait(false); } } @@ -97,18 +144,18 @@ private async Task WriteNodeAsync(IndexNode node) if (childNodeCount > 0) { - await this.FlushAsync().ConfigureAwait(false); + await this.FlushAsync(cancellationToken).ConfigureAwait(false); } } private void WriteMatchLocations(IndexNode node) { - foreach (var match in node.Matches) + foreach (var (documentId, matches) in node.Matches.Enumerate()) { - this.writer.WriteNonNegativeVarInt32(match.Key); - this.writer.WriteNonNegativeVarInt32(match.Value.Count); + this.writer.WriteNonNegativeVarInt32(documentId); + this.writer.WriteNonNegativeVarInt32(matches.Count); - foreach (var fieldMatch in match.Value) + foreach (var fieldMatch in matches) { this.writer.Write(fieldMatch.FieldId); this.writer.WriteNonNegativeVarInt32(fieldMatch.Locations.Count); @@ -122,9 +169,9 @@ private void WriteTokenLocations(IndexedToken fieldMatch) TokenLocation? lastLocation = null; foreach (var location in fieldMatch.Locations) { - if (lastLocation != null) + if (lastLocation is not null) { - var locationData = DeriveEntryStructureInformation(lastLocation.Value, location); + var locationData = DeriveEntryStructureInformation(lastLocation, location); if (locationData.structure == LocationEntrySerializationOptimizations.Full) { @@ -218,55 +265,63 @@ private void WriteLocationInFull(TokenLocation location) this.writer.WriteVarUInt16(location.Length); } - private async Task WriteTerminatorAsync() + private async ValueTask WriteTerminatorAsync(CancellationToken cancellationToken) { this.writer.Write(new byte[] { 0xFF, 0xFF, 0xFF, 0xFF }); - await this.FlushAsync().ConfigureAwait(false); + await this.FlushAsync(cancellationToken).ConfigureAwait(false); } - private async Task WriteItemsAsync(IIndexSnapshot index) + private void WriteDocumentObjectMetadata(byte objectTypeId, DocumentMetadata documentMetadata) { - this.writer.WriteNonNegativeVarInt32(index.Items.Count); + // Write the object info data byte + // 0-4: The object type id + // 5: 1 - the object has a scoring freshness date + // 6: 1 - the object has a scoring magnitude + // 7: RESERVED for now + var objectInfoData = objectTypeId; + Debug.Assert(objectTypeId < 32, "The object type id should be less than 32"); + + if (documentMetadata.ScoringFreshnessDate != null) + { + objectInfoData |= 0x20; + } - foreach (var itemMetadata in index.Items.GetIndexedItems()) + if (documentMetadata.ScoringMagnitude != null) { - this.writer.WriteNonNegativeVarInt32(itemMetadata.Id); - this.keySerializer.Write(this.writer, itemMetadata.Item); - this.writer.WriteNonNegativeVarInt32(itemMetadata.DocumentStatistics.TokenCountByField.Count); - foreach (var fieldTokenCount in itemMetadata.DocumentStatistics.TokenCountByField) - { - this.writer.Write(fieldTokenCount.Key); - this.writer.WriteNonNegativeVarInt32(fieldTokenCount.Value); - } + objectInfoData |= 0x40; } - await this.FlushAsync().ConfigureAwait(false); - } + this.writer.Write(objectInfoData); - private async Task WriteHeaderAsync(IIndexSnapshot index) - { - this.writer.Write(new byte[] { 0x4C, 0x49 }); - this.writer.Write(Version); + if (documentMetadata.ScoringFreshnessDate is DateTime scoringFreshnessDate) + { + this.writer.Write(scoringFreshnessDate.Ticks); + } - await this.FlushAsync().ConfigureAwait(false); + if (documentMetadata.ScoringMagnitude is double scoringMagnitude) + { + this.writer.Write(scoringMagnitude); + } } - public void Dispose() + private async ValueTask WriteHeaderAsync(CancellationToken cancellationToken) { - this.writer.Dispose(); - this.buffer.Dispose(); + this.writer.Write(new byte[] { 0x4C, 0x49 }); + this.writer.Write(Version); - if (this.disposeStream) - { - this.underlyingStream.Dispose(); - } + await this.FlushAsync(cancellationToken).ConfigureAwait(false); } - private async Task FlushAsync() + private async ValueTask FlushAsync(CancellationToken cancellationToken) { this.writer.Flush(); this.buffer.Position = 0L; - await this.buffer.CopyToAsync(this.underlyingStream).ConfigureAwait(false); +#if NETSTANDARD + // 81920 is taken from DefaultCopyBufferSize of GetCopyBufferSize in Stream.cs + await this.buffer.CopyToAsync(this.underlyingStream, 81920, cancellationToken).ConfigureAwait(false); +#else + await this.buffer.CopyToAsync(this.underlyingStream, cancellationToken).ConfigureAwait(false); +#endif this.buffer.SetLength(0L); } } diff --git a/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs b/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs index b09f3c6c..36a5f789 100644 --- a/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs +++ b/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs @@ -1,14 +1,13 @@ using System; using System.Collections.Generic; -using System.Collections.Immutable; using System.IO; using System.Linq; +using System.Threading; using System.Threading.Tasks; namespace Lifti.Serialization.Binary { - - internal class V2IndexReader : IIndexReader + internal class V2IndexReader : IIndexDeserializer where TKey : notnull { private readonly Stream underlyingStream; @@ -39,7 +38,9 @@ public void Dispose() } } - public async Task ReadIntoAsync(FullTextIndex index) + public async ValueTask ReadAsync( + FullTextIndex index, + CancellationToken cancellationToken) { await this.FillBufferAsync().ConfigureAwait(false); @@ -52,13 +53,14 @@ public async Task ReadIntoAsync(FullTextIndex index) // Keep track of all the distinct fields ids encountered during deserialization var distinctFieldIds = new HashSet(); - var itemCount = this.reader.ReadInt32(); - for (var i = 0; i < itemCount; i++) + var documentCount = this.reader.ReadInt32(); + var documentMetadataCollector = new DocumentMetadataCollector(documentCount); + for (var i = 0; i < documentCount; i++) { var id = this.reader.ReadInt32(); var key = keyReader(this.reader); var fieldStatCount = this.reader.ReadInt32(); - var fieldTokenCounts = ImmutableDictionary.CreateBuilder(); + var fieldTokenCounts = new Dictionary(fieldStatCount); var totalTokenCount = 0; for (var fieldIndex = 0; fieldIndex < fieldStatCount; fieldIndex++) { @@ -69,11 +71,12 @@ public async Task ReadIntoAsync(FullTextIndex index) fieldTokenCounts.Add(fieldId, wordCount); totalTokenCount += wordCount; } - - index.IdPool.Add( - id, - key, - new DocumentStatistics(fieldTokenCounts.ToImmutable(), totalTokenCount)); + + var documentStatistics = new DocumentStatistics(fieldTokenCounts, totalTokenCount); + + // Using ForLooseText here because we don't know any of the new information associated to an object + // type, e.g. its id or score boost options. This is the closest we can get to the old format. + documentMetadataCollector.Add(DocumentMetadata.ForLooseText(id, key, documentStatistics)); } // Double check that the index structure is aware of all the fields that are being deserialized @@ -86,7 +89,7 @@ public async Task ReadIntoAsync(FullTextIndex index) throw new LiftiException(ExceptionMessages.UnknownFieldsInSerializedIndex); } - index.SetRootWithLock(this.DeserializeNode(index.IndexNodeFactory, 0)); + var rootNode = this.DeserializeNode(index.IndexNodeFactory, 0); if (this.reader.ReadInt32() != -1) { @@ -96,7 +99,9 @@ public async Task ReadIntoAsync(FullTextIndex index) if (this.underlyingStream.CanSeek) { this.underlyingStream.Position = this.buffer.Position + this.initialUnderlyingStreamOffset; - } + } + + index.RestoreIndex(rootNode, documentMetadataCollector); } private IndexNode DeserializeNode(IIndexNodeFactory nodeFactory, int depth) @@ -105,48 +110,38 @@ private IndexNode DeserializeNode(IIndexNodeFactory nodeFactory, int depth) var matchCount = this.reader.ReadInt32(); var childNodeCount = this.reader.ReadInt32(); var intraNodeText = textLength == 0 ? null : this.ReadIntraNodeText(textLength); - var childNodes = childNodeCount > 0 ? ImmutableDictionary.CreateBuilder() : null; - var matches = matchCount > 0 ? ImmutableDictionary.CreateBuilder>() : null; + var childNodes = childNodeCount > 0 ? new ChildNodeMapEntry[childNodeCount] : null; + var matches = matchCount > 0 ? new Dictionary>() : null; for (var i = 0; i < childNodeCount; i++) { var matchChar = this.ReadMatchedCharacter(); - childNodes!.Add(matchChar, this.DeserializeNode(nodeFactory, depth + 1)); + childNodes![i] = new(matchChar, this.DeserializeNode(nodeFactory, depth + 1)); } - var locationMatches = new List(50); - for (var itemMatch = 0; itemMatch < matchCount; itemMatch++) + for (var documentMatch = 0; documentMatch < matchCount; documentMatch++) { - var itemId = this.reader.ReadInt32(); + var documentId = this.reader.ReadInt32(); var fieldCount = this.reader.ReadInt32(); - - var indexedTokens = ImmutableList.CreateBuilder(); + var indexedTokens = new IndexedToken[fieldCount]; for (var fieldMatch = 0; fieldMatch < fieldCount; fieldMatch++) { var fieldId = this.reader.ReadByte(); var locationCount = this.reader.ReadInt32(); - - locationMatches.Clear(); - - // Resize the collection immediately if required to prevent multiple resizes during deserialization - if (locationMatches.Capacity < locationCount) - { - locationMatches.Capacity = locationCount; - } - + var locationMatches = new List(locationCount); this.ReadLocations(locationCount, locationMatches); - indexedTokens.Add(new IndexedToken(fieldId, locationMatches.ToArray())); + indexedTokens[fieldMatch] = new IndexedToken(fieldId, locationMatches); } - matches!.Add(itemId, indexedTokens.ToImmutable()); + matches!.Add(documentId, indexedTokens); } return nodeFactory.CreateNode( intraNodeText, - childNodes?.ToImmutable() ?? ImmutableDictionary.Empty, - matches?.ToImmutable() ?? ImmutableDictionary>.Empty); + childNodes == null ? ChildNodeMap.Empty : new ChildNodeMap(childNodes), + matches == null ? DocumentTokenMatchMap.Empty : new DocumentTokenMatchMap(matches)); } /// @@ -184,12 +179,12 @@ private void ReadLocations(int locationCount, List locationMatche } else { - if (lastLocation == null) + if (lastLocation is null) { throw new DeserializationException(ExceptionMessages.MalformedDataExpectedFullLocationEntry); } - location = this.DeserializeLocationData(lastLocation.Value, structureType); + location = this.DeserializeLocationData(lastLocation, structureType); } locationMatches.Add(location); @@ -232,6 +227,6 @@ private async Task FillBufferAsync() this.initialUnderlyingStreamOffset = this.underlyingStream.Position; await this.underlyingStream.CopyToAsync(this.buffer).ConfigureAwait(false); this.buffer.Position = 0; - } + } } } diff --git a/src/Lifti.Core/Serialization/Binary/V5IndexReader.cs b/src/Lifti.Core/Serialization/Binary/V5IndexReader.cs index aa64a5ee..fac29ae8 100644 --- a/src/Lifti.Core/Serialization/Binary/V5IndexReader.cs +++ b/src/Lifti.Core/Serialization/Binary/V5IndexReader.cs @@ -1,17 +1,16 @@ -using System; -using System.Collections.Generic; -using System.Collections.Immutable; +using System.Collections.Generic; using System.IO; +using System.Threading; using System.Threading.Tasks; namespace Lifti.Serialization.Binary { - internal class V5IndexReader : IIndexReader, IDisposable + internal class V5IndexReader : IndexDeserializerBase where TKey : notnull { private readonly Stream underlyingStream; private readonly bool disposeStream; - private readonly IKeySerializer keySerializer; + protected readonly IKeySerializer keySerializer; private readonly MemoryStream buffer; private long initialUnderlyingStreamOffset; protected readonly BinaryReader reader; @@ -26,26 +25,29 @@ public V5IndexReader(Stream stream, bool disposeStream, IKeySerializer key this.reader = new BinaryReader(this.buffer); } - public void Dispose() + protected override void Dispose(bool disposing) { - this.reader.Dispose(); - this.buffer.Dispose(); + base.Dispose(disposing); - if (this.disposeStream) + if (disposing) { - this.underlyingStream.Dispose(); + this.reader.Dispose(); + this.buffer.Dispose(); + + if (this.disposeStream) + { + this.underlyingStream.Dispose(); + } } } - public async Task ReadIntoAsync(FullTextIndex index) + protected override async ValueTask OnDeserializationStartingAsync(CancellationToken cancellationToken) { await this.FillBufferAsync().ConfigureAwait(false); + } - var fieldIdMap = this.ReadFields(index); - this.ReadIndexedItems(index); - - index.SetRootWithLock(this.DeserializeNode(fieldIdMap, index.IndexNodeFactory, 0)); - + protected override ValueTask OnDeserializationCompleteAsync(FullTextIndex index, CancellationToken cancellationToken) + { if (this.reader.ReadInt32() != -1) { throw new DeserializationException(ExceptionMessages.MissingIndexTerminator); @@ -55,12 +57,14 @@ public async Task ReadIntoAsync(FullTextIndex index) { this.underlyingStream.Position = this.buffer.Position + this.initialUnderlyingStreamOffset; } + + return default; } - private Dictionary ReadFields(FullTextIndex index) + protected override ValueTask DeserializeKnownFieldsAsync(CancellationToken cancellationToken) { var fieldCount = this.reader.ReadNonNegativeVarInt32(); - var serializedFields = new List(fieldCount); + var serializedFields = new SerializedFieldCollector(fieldCount); for (var i = 0; i < fieldCount; i++) { @@ -71,18 +75,20 @@ public async Task ReadIntoAsync(FullTextIndex index) serializedFields.Add(new(fieldId, name, kind, dynamicFieldReaderName)); } - return index.RehydrateSerializedFields(serializedFields); + return new(serializedFields); } - private void ReadIndexedItems(FullTextIndex index) + protected override ValueTask> DeserializeDocumentMetadataAsync(CancellationToken cancellationToken) { - var itemCount = this.reader.ReadNonNegativeVarInt32(); - for (var i = 0; i < itemCount; i++) + var documentCount = this.reader.ReadNonNegativeVarInt32(); + var documentMetadataCollector = new DocumentMetadataCollector(documentCount); + + for (var i = 0; i < documentCount; i++) { var id = this.reader.ReadNonNegativeVarInt32(); var key = this.keySerializer.Read(this.reader); var fieldStatCount = (int)this.reader.ReadByte(); - var fieldTokenCounts = ImmutableDictionary.CreateBuilder(); + var fieldTokenCounts = new Dictionary(fieldStatCount); var totalTokenCount = 0; for (var fieldIndex = 0; fieldIndex < fieldStatCount; fieldIndex++) { @@ -92,64 +98,65 @@ private void ReadIndexedItems(FullTextIndex index) totalTokenCount += wordCount; } - index.IdPool.Add( - id, - key, - new DocumentStatistics(fieldTokenCounts.ToImmutable(), totalTokenCount)); + var documentStatistics = new DocumentStatistics(fieldTokenCounts, totalTokenCount); + + // Using ForLooseText here because we don't know any of the new information associated to an object + // type, e.g. its id or score boost options. This is the closest we can get to the old format. + documentMetadataCollector.Add(DocumentMetadata.ForLooseText(id, key, documentStatistics)); } + + return new(documentMetadataCollector); } - private IndexNode DeserializeNode(Dictionary fieldIdMap, IIndexNodeFactory nodeFactory, int depth) + protected override ValueTask DeserializeIndexNodeHierarchyAsync( + SerializedFieldIdMap serializedFieldIdMap, + IIndexNodeFactory indexNodeFactory, + CancellationToken cancellationToken) + { + return new(this.DeserializeNode(serializedFieldIdMap, indexNodeFactory, 0)); + } + + private IndexNode DeserializeNode(SerializedFieldIdMap fieldIdMap, IIndexNodeFactory nodeFactory, int depth) { var textLength = this.reader.ReadNonNegativeVarInt32(); var matchCount = this.reader.ReadNonNegativeVarInt32(); var childNodeCount = this.reader.ReadNonNegativeVarInt32(); var intraNodeText = textLength == 0 ? null : this.ReadIntraNodeText(textLength); - var childNodes = childNodeCount > 0 ? ImmutableDictionary.CreateBuilder() : null; - var matches = matchCount > 0 ? ImmutableDictionary.CreateBuilder>() : null; + var childNodes = childNodeCount > 0 ? new ChildNodeMapEntry[childNodeCount] : null; + var matches = matchCount > 0 ? new Dictionary>() : null; for (var i = 0; i < childNodeCount; i++) { var matchChar = (char)this.reader.ReadVarUInt16(); - childNodes!.Add(matchChar, this.DeserializeNode(fieldIdMap, nodeFactory, depth + 1)); + childNodes![i] = new(matchChar, this.DeserializeNode(fieldIdMap, nodeFactory, depth + 1)); } - var locationMatches = new List(50); - for (var itemMatch = 0; itemMatch < matchCount; itemMatch++) + for (var documentMatch = 0; documentMatch < matchCount; documentMatch++) { - var itemId = this.reader.ReadNonNegativeVarInt32(); + var documentId = this.reader.ReadNonNegativeVarInt32(); var fieldCount = this.reader.ReadNonNegativeVarInt32(); - - var indexedTokens = ImmutableList.CreateBuilder(); + var indexedTokens = new IndexedToken[fieldCount]; for (var fieldMatch = 0; fieldMatch < fieldCount; fieldMatch++) { // We read the serialized file id and use the mapping that the index has given us to // map it to the field id in the new index. - var fieldId = fieldIdMap[this.reader.ReadByte()]; + var fieldId = fieldIdMap.Map(this.reader.ReadByte()); var locationCount = this.reader.ReadNonNegativeVarInt32(); - - locationMatches.Clear(); - - // Resize the collection immediately if required to prevent multiple resizes during deserialization - if (locationMatches.Capacity < locationCount) - { - locationMatches.Capacity = locationCount; - } - + var locationMatches = new List(locationCount); this.ReadLocations(locationCount, locationMatches); - indexedTokens.Add(new IndexedToken(fieldId, locationMatches.ToArray())); + indexedTokens[fieldMatch] = new IndexedToken(fieldId, [.. locationMatches]); } - matches!.Add(itemId, indexedTokens.ToImmutable()); + matches!.Add(documentId, indexedTokens); } return nodeFactory.CreateNode( intraNodeText, - childNodes?.ToImmutable() ?? ImmutableDictionary.Empty, - matches?.ToImmutable() ?? ImmutableDictionary>.Empty); + childNodes == null ? ChildNodeMap.Empty : new ChildNodeMap(childNodes), + matches == null ? DocumentTokenMatchMap.Empty : new DocumentTokenMatchMap(matches)); } /// @@ -189,12 +196,12 @@ private void ReadLocations(int locationCount, List locationMatche } else { - if (lastLocation == null) + if (lastLocation is null) { throw new DeserializationException(ExceptionMessages.MalformedDataExpectedFullLocationEntry); } - location = this.DeserializeLocationData(lastLocation.Value, structureType); + location = this.DeserializeLocationData(lastLocation, structureType); } locationMatches.Add(location); diff --git a/src/Lifti.Core/Serialization/Binary/V6IndexReader.cs b/src/Lifti.Core/Serialization/Binary/V6IndexReader.cs new file mode 100644 index 00000000..7a715922 --- /dev/null +++ b/src/Lifti.Core/Serialization/Binary/V6IndexReader.cs @@ -0,0 +1,72 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Lifti.Serialization.Binary +{ + internal class V6IndexReader : V5IndexReader, IDisposable + where TKey : notnull + { + public V6IndexReader(Stream stream, bool disposeStream, IKeySerializer keySerializer) + : base(stream, disposeStream, keySerializer) + { + } + + /// + /// Version 6 of the index adds in scoring metadata for each document and its associated object type. + /// When reading document we now need to additionally read any associated object type id, and the score boost + /// metadata associated to the object. + /// + protected override ValueTask> DeserializeDocumentMetadataAsync(CancellationToken cancellationToken) + { + var documentCount = this.reader.ReadNonNegativeVarInt32(); + var documentMetadataCollector = new DocumentMetadataCollector(documentCount); + + for (var i = 0; i < documentCount; i++) + { + // First read the common information that's available whether or not the document is associated to an object + var id = this.reader.ReadNonNegativeVarInt32(); + var key = this.keySerializer.Read(this.reader); + var fieldStatCount = (int)this.reader.ReadByte(); + var fieldTokenCounts = new Dictionary(fieldStatCount); + var totalTokenCount = 0; + for (var fieldIndex = 0; fieldIndex < fieldStatCount; fieldIndex++) + { + var fieldId = this.reader.ReadByte(); + var wordCount = this.reader.ReadNonNegativeVarInt32(); + fieldTokenCounts.Add(fieldId, wordCount); + totalTokenCount += wordCount; + } + + var documentStatistics = new DocumentStatistics(fieldTokenCounts, totalTokenCount); + + // Now read the object type information, if any + var objectBitMaskInfo = this.reader.ReadByte(); + if (objectBitMaskInfo != 0) + { + // The object bit mask is: + // 0-4: The object type id + // 5: 1 - the object has a scoring freshness date + // 6: 1 - the object has a scoring magnitude + // 7: RESERVED for now + var objectTypeId = (byte)(objectBitMaskInfo & 0x1F); + var hasScoringFreshnessDate = (objectBitMaskInfo & 0x20) != 0; + var hasScoringMagnitude = (objectBitMaskInfo & 0x40) != 0; + + DateTime? freshnessDate = hasScoringFreshnessDate ? new DateTime(this.reader.ReadInt64()) : null; + double? magnitude = hasScoringMagnitude ? this.reader.ReadDouble() : null; + + documentMetadataCollector.Add(DocumentMetadata.ForObject(objectTypeId, id, key, documentStatistics, freshnessDate, magnitude)); + } + else + { + documentMetadataCollector.Add(DocumentMetadata.ForLooseText(id, key, documentStatistics)); + } + } + + return new(documentMetadataCollector); + } + } +} diff --git a/src/Lifti.Core/Serialization/DeserializedDataCollector.cs b/src/Lifti.Core/Serialization/DeserializedDataCollector.cs new file mode 100644 index 00000000..b1097524 --- /dev/null +++ b/src/Lifti.Core/Serialization/DeserializedDataCollector.cs @@ -0,0 +1,28 @@ +using System.Collections.Generic; + +namespace Lifti.Serialization +{ + /// + /// An abstract base class for collecting deserialized data during a deserialization process. + /// + public abstract class DeserializedDataCollector + { + /// + /// Creates a new instance of the class. + /// + protected DeserializedDataCollector(int expectedCount) + { + this.Collected = new List(expectedCount); + } + + /// + /// Adds the metadata to the collection. + /// + public void Add(T item) + { + this.Collected.Add(item); + } + + internal List Collected { get; } + } +} diff --git a/src/Lifti.Core/Serialization/DocumentMetadataCollector.cs b/src/Lifti.Core/Serialization/DocumentMetadataCollector.cs new file mode 100644 index 00000000..5dcfb23f --- /dev/null +++ b/src/Lifti.Core/Serialization/DocumentMetadataCollector.cs @@ -0,0 +1,29 @@ +using System.Collections.Generic; + +namespace Lifti.Serialization +{ + /// + /// Collects document metadata during a deserialization operation. + /// + public sealed class DocumentMetadataCollector : DeserializedDataCollector> + { + /// + /// Creates a new instance of the class. + /// + public DocumentMetadataCollector() + : base(10) + { + } + + /// + /// Creates a new instance of the class. + /// + /// + /// The expected number of records to be collected. + /// + public DocumentMetadataCollector(int expectedCount) + : base(expectedCount) + { + } + } +} diff --git a/src/Lifti.Core/Serialization/IIndexDeserializer.cs b/src/Lifti.Core/Serialization/IIndexDeserializer.cs new file mode 100644 index 00000000..d281bbdc --- /dev/null +++ b/src/Lifti.Core/Serialization/IIndexDeserializer.cs @@ -0,0 +1,23 @@ +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace Lifti.Serialization +{ + /// + /// Defines methods for loading an index from a source. + /// + /// + /// The type of the key in the index. + /// + public interface IIndexDeserializer : IDisposable + where TKey : notnull + { + /// + /// Reconstructs the index from a serialized source. + /// + ValueTask ReadAsync( + FullTextIndex index, + CancellationToken cancellationToken = default); + } +} \ No newline at end of file diff --git a/src/Lifti.Core/Serialization/IIndexSerializer.cs b/src/Lifti.Core/Serialization/IIndexSerializer.cs new file mode 100644 index 00000000..95be31a0 --- /dev/null +++ b/src/Lifti.Core/Serialization/IIndexSerializer.cs @@ -0,0 +1,11 @@ +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace Lifti.Serialization +{ + internal interface IIndexSerializer : IDisposable + { + ValueTask WriteAsync(IIndexSnapshot snapshot, CancellationToken cancellationToken = default); + } +} \ No newline at end of file diff --git a/src/Lifti.Core/Serialization/IndexDeserializerBase.cs b/src/Lifti.Core/Serialization/IndexDeserializerBase.cs new file mode 100644 index 00000000..fbf8d8fc --- /dev/null +++ b/src/Lifti.Core/Serialization/IndexDeserializerBase.cs @@ -0,0 +1,122 @@ +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace Lifti.Serialization +{ + /// + /// A base class for index readers capable of deserializing an index's information. + /// + /// + /// The type of key in the index. + /// + public abstract class IndexDeserializerBase : IIndexDeserializer + where TKey : notnull + { + /// + public async ValueTask ReadAsync( + FullTextIndex index, + CancellationToken cancellationToken) + { + if (index is null) + { + throw new ArgumentNullException(nameof(index)); + } + + await this.OnDeserializationStartingAsync(cancellationToken).ConfigureAwait(false); + + // Deserialize any fields that are used in the index + var serializedFields = await this.DeserializeKnownFieldsAsync(cancellationToken).ConfigureAwait(false); + + // Map the serialized fields to the fields in the index as it is now structured + // We do this because a code change may have been made to the index's FullTextIndexBuilder definition + // since the serialized index was created, which means that serialized field ids may need to be mapped + // to ids. + var fieldMap = index.MapSerializedFieldIds(serializedFields.Collected); + + // Next deserialize the document metadata + var documentMetadata = await this.DeserializeDocumentMetadataAsync(cancellationToken).ConfigureAwait(false); + + // Finally deserialize the index node hierarchy + var rootNode = await this.DeserializeIndexNodeHierarchyAsync(fieldMap, index.IndexNodeFactory, cancellationToken).ConfigureAwait(false); + + // Update the index with the deserialized information + index.RestoreIndex(rootNode, documentMetadata); + + await this.OnDeserializationCompleteAsync(index, cancellationToken).ConfigureAwait(false); + } + + /// + /// Invoked when the deserialization of the index is starting. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected virtual ValueTask OnDeserializationStartingAsync(CancellationToken cancellationToken) + { + return default; + } + + /// + /// Invoked when the deserialization of the index is complete. + /// + /// + /// The index, containing the deserialized information. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected virtual ValueTask OnDeserializationCompleteAsync(FullTextIndex index, CancellationToken cancellationToken) + { + return default; + } + + /// + /// Deserializes the known fields from the index into a . + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected abstract ValueTask DeserializeKnownFieldsAsync(CancellationToken cancellationToken); + + /// + /// Deserializes the document metadata from the index into a . + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected abstract ValueTask> DeserializeDocumentMetadataAsync(CancellationToken cancellationToken); + + /// + /// Deserializes the index node hierarchy for the index, returning the root . + /// + /// + /// A map of the serialized field ids to the field ids in the index as it is now structured. Use this to map any deserialized field ids + /// to their new ids. + /// + /// + /// The index node factory to use to create the index nodes. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected abstract ValueTask DeserializeIndexNodeHierarchyAsync(SerializedFieldIdMap serializedFieldIdMap, IIndexNodeFactory indexNodeFactory, CancellationToken cancellationToken); + + /// + /// Disposes of the resources used by the index reader. + /// + /// + /// True if the object is being disposed, false if it is being finalized. + /// + protected virtual void Dispose(bool disposing) + { + } + + /// + public void Dispose() + { + this.Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} diff --git a/src/Lifti.Core/Serialization/IndexSerializerBase.cs b/src/Lifti.Core/Serialization/IndexSerializerBase.cs new file mode 100644 index 00000000..b7660175 --- /dev/null +++ b/src/Lifti.Core/Serialization/IndexSerializerBase.cs @@ -0,0 +1,133 @@ +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace Lifti.Serialization +{ + /// + /// A base class helper for implementing implementations. + /// + /// + /// The type of key in the index. + /// + public abstract class IndexSerializerBase : IIndexSerializer + { + /// + public async ValueTask WriteAsync(IIndexSnapshot snapshot, CancellationToken cancellationToken) + { + if (snapshot is null) + { + throw new ArgumentNullException(nameof(snapshot)); + } + + await this.OnSerializationStart(snapshot, cancellationToken).ConfigureAwait(false); + + await this.WriteFieldsAsync(snapshot, cancellationToken).ConfigureAwait(false); + + await this.WriteIndexMetadataAsync(snapshot, cancellationToken).ConfigureAwait(false); + + await this.WriteNodesAsync(snapshot.Root, cancellationToken).ConfigureAwait(false); + + await this.OnSerializationComplete(snapshot, cancellationToken).ConfigureAwait(false); + } + + /// + /// Called when the serialization of the index is complete. This can be used to write out any footer information + /// required by the serialization format. + /// + /// + /// The snapshot of the index that has been serialized. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected virtual ValueTask OnSerializationComplete(IIndexSnapshot snapshot, CancellationToken cancellationToken) + { + return default; + } + + /// + /// Called when the serialization of the index is about to start. This can be used to write out any header information + /// required by the serialization format. + /// + /// + /// The snapshot of the index that has been serialized. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected virtual ValueTask OnSerializationStart(IIndexSnapshot snapshot, CancellationToken cancellationToken) + { + return default; + } + + /// + public void Dispose() + { + this.Dispose(disposing: true); + GC.SuppressFinalize(this); + } + + /// + /// Disposes of any resources used by the writer. + /// + /// + /// True if the writer is being disposed of, false if it is being finalized. + /// + protected virtual void Dispose(bool disposing) + { + } + + /// + /// Writes the for each field in the index. + /// + /// + /// The for each field in the index. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected abstract ValueTask WriteFieldsAsync(IReadOnlyList fields, CancellationToken cancellationToken); + + /// + /// Writes all the metadata for the index, including document statistics. + /// + /// + /// The index to write the metadata for. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected abstract ValueTask WriteIndexMetadataAsync(IIndexSnapshot index, CancellationToken cancellationToken); + + /// + /// Writes all the s for the index, starting from the root node. + /// + /// + /// The root node to start from. All child nodes must also be written as part of this operation. + /// + /// + /// A cancellation token that can be used to cancel the operation. + /// + protected abstract ValueTask WriteNodesAsync(IndexNode rootNode, CancellationToken cancellationToken); + + private async ValueTask WriteFieldsAsync(IIndexSnapshot snapshot, CancellationToken cancellationToken) + { + // We need to write information for all the fields in the index so that when + // we deserialize them to a new index we can ensure that the field ids are + // mapped correctly to a new index structure as new static fields may be registered + // in a new version of the index. + var fieldNames = snapshot.FieldLookup.AllFieldNames; + List fields = new(fieldNames.Count); + + foreach (var fieldName in fieldNames) + { + var field = snapshot.FieldLookup.GetFieldInfo(fieldName); + fields.Add(new SerializedFieldInfo(field.Id, field.Name, field.FieldKind, field.DynamicFieldReaderName)); + } + + await this.WriteFieldsAsync(fields, cancellationToken).ConfigureAwait(false); + } + } +} diff --git a/src/Lifti.Core/Serialization/SerializedFieldCollector.cs b/src/Lifti.Core/Serialization/SerializedFieldCollector.cs new file mode 100644 index 00000000..fa27bf13 --- /dev/null +++ b/src/Lifti.Core/Serialization/SerializedFieldCollector.cs @@ -0,0 +1,27 @@ +namespace Lifti.Serialization +{ + /// + /// Collects field metadata during a deserialization operation. + /// + public sealed class SerializedFieldCollector : DeserializedDataCollector + { + /// + /// Creates a new instance of the class. + /// + public SerializedFieldCollector() + : base(10) + { + } + + /// + /// Creates a new instance of the class. + /// + /// + /// The expected number of fields to be collected. + /// + public SerializedFieldCollector(int expectedCount) + : base(expectedCount) + { + } + } +} diff --git a/src/Lifti.Core/Serialization/SerializedFieldIdMap.cs b/src/Lifti.Core/Serialization/SerializedFieldIdMap.cs new file mode 100644 index 00000000..f2e15ae4 --- /dev/null +++ b/src/Lifti.Core/Serialization/SerializedFieldIdMap.cs @@ -0,0 +1,25 @@ +using System.Collections.Generic; + +namespace Lifti.Serialization +{ + /// + /// Provides a map between the field ids in a serialized index and the field ids in the index as it is now structured. + /// + public readonly record struct SerializedFieldIdMap + { + private readonly Dictionary fieldIdMap; + + internal SerializedFieldIdMap(Dictionary fieldIdMap) + { + this.fieldIdMap = fieldIdMap; + } + + /// + /// Maps a field id from the serialized index to the field id in the index as it is now structured. + /// + public byte Map(byte serializedFieldId) + { + return this.fieldIdMap[serializedFieldId]; + } + } +} diff --git a/src/Lifti.Core/Serialization/SerializedFieldInfo.cs b/src/Lifti.Core/Serialization/SerializedFieldInfo.cs new file mode 100644 index 00000000..7b3badc6 --- /dev/null +++ b/src/Lifti.Core/Serialization/SerializedFieldInfo.cs @@ -0,0 +1,7 @@ +namespace Lifti.Serialization +{ + /// + /// A record that describes a information for a field when it was serialized. + /// + public record struct SerializedFieldInfo(byte FieldId, string Name, FieldKind Kind, string? DynamicFieldReaderName); +} diff --git a/src/Lifti.Core/SerializedFieldInfo.cs b/src/Lifti.Core/SerializedFieldInfo.cs deleted file mode 100644 index 78cadc6c..00000000 --- a/src/Lifti.Core/SerializedFieldInfo.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace Lifti -{ - /// - /// A record that describes a information for a field when it was serialized. - /// - internal record SerializedFieldInfo(byte FieldId, string Name, FieldKind Kind, string? DynamicFieldReaderName); -} diff --git a/src/Lifti.Core/SharedPool.cs b/src/Lifti.Core/SharedPool.cs index fd15d8eb..9e130b40 100644 --- a/src/Lifti.Core/SharedPool.cs +++ b/src/Lifti.Core/SharedPool.cs @@ -3,20 +3,22 @@ namespace Lifti { - internal class SharedPool - where T: notnull + internal sealed class SharedPool + where T : notnull { - private readonly ConcurrentBag pool = new ConcurrentBag(); + private readonly ConcurrentBag pool = []; private readonly Func createNew; - private readonly Action resetForReuse; - - public SharedPool(Func createNew, Action resetForReuse) + private readonly Action resetForReuse; + private readonly int maxCapacity; + + public SharedPool(Func createNew, Action resetForReuse, int maxCapacity = 10) { this.createNew = createNew; - this.resetForReuse = resetForReuse; + this.resetForReuse = resetForReuse; + this.maxCapacity = maxCapacity; } - public T Create() + public T Take() { if (!this.pool.TryTake(out var result)) { @@ -28,7 +30,7 @@ public T Create() public void Return(T reusable) { - if (this.pool.Count > 10) + if (this.pool.Count > this.maxCapacity) { return; } diff --git a/src/Lifti.Core/ThesaurusBuilder.cs b/src/Lifti.Core/ThesaurusBuilder.cs index 0707bdde..4fa5684b 100644 --- a/src/Lifti.Core/ThesaurusBuilder.cs +++ b/src/Lifti.Core/ThesaurusBuilder.cs @@ -11,8 +11,8 @@ namespace Lifti /// public class ThesaurusBuilder { - private readonly Dictionary> synonymLookup = new(); - private readonly Dictionary> hypernymLookup = new(); + private readonly Dictionary> synonymLookup = []; + private readonly Dictionary> hypernymLookup = []; internal ThesaurusBuilder() { diff --git a/src/Lifti.Core/TokenLocation.cs b/src/Lifti.Core/TokenLocation.cs index 6d4cea90..fe0a82ac 100644 --- a/src/Lifti.Core/TokenLocation.cs +++ b/src/Lifti.Core/TokenLocation.cs @@ -1,11 +1,13 @@ -using System; +using Lifti.Querying; +using System; +using System.Collections.Generic; namespace Lifti { /// /// Provides information about the location of a token in the original text. /// - public readonly struct TokenLocation : IComparable, IEquatable + public class TokenLocation : IComparable, IEquatable, ITokenLocation { /// /// Constructs a new instance. @@ -30,7 +32,29 @@ public TokenLocation(int tokenIndex, int start, ushort length) /// /// Gets the length of the token. /// - public ushort Length { get; } + public ushort Length { get; } + + int ITokenLocation.MaxTokenIndex => this.TokenIndex; + + int ITokenLocation.MinTokenIndex => this.TokenIndex; + + void ITokenLocation.AddTo(HashSet collector) + { + collector.Add(this); + } + + CompositeTokenLocation ITokenLocation.ComposeWith(ITokenLocation other) + { + return other switch + { + CompositeTokenLocation composite => composite.ComposeWith(this), + TokenLocation location => new CompositeTokenLocation( + [this, location], + Math.Min(this.TokenIndex, location.TokenIndex), + Math.Max(this.TokenIndex, location.TokenIndex)), + _ => throw new InvalidOperationException($"Cannot compose a {nameof(TokenLocation)} with a {other.GetType().Name}"), + }; + } /// public override bool Equals(object? obj) @@ -52,64 +76,105 @@ public override string ToString() } /// - public int CompareTo(TokenLocation other) - { - var result = this.Start.CompareTo(other.Start); - if (result == 0) - { - result = this.Length.CompareTo(other.Length); - } - - if (result == 0) - { - result = this.TokenIndex.CompareTo(other.TokenIndex); - } - + public virtual int CompareTo(TokenLocation? other) + { + if (other is null) + { + return 1; + } + + var result = this.TokenIndex.CompareTo(other.TokenIndex); + if (result == 0) + { + result = this.Start.CompareTo(other.Start); + } + + if (result == 0) + { + result = this.Length.CompareTo(other.Length); + } + return result; } /// - bool IEquatable.Equals(TokenLocation location) + public virtual bool Equals(TokenLocation? other) { - return this.Start == location.Start && - this.Length == location.Length && - this.TokenIndex == location.TokenIndex; - } - + return other is not null && + this.TokenIndex == other.TokenIndex && + this.Start == other.Start && + this.Length == other.Length; + } + + int IComparable.CompareTo(ITokenLocation? other) + { + if (other is null) + { + return -1; + } + + if (other is TokenLocation location) + { + return this.TokenIndex.CompareTo(location.TokenIndex); + } + + var result = this.TokenIndex.CompareTo(other.MinTokenIndex); + + if (result == 0) + { + // When comparing a single token location to a composite location, we'll + // always treat the single token location as being less than the composite + // location. + return -1; + } + + return result; + } + + bool IEquatable.Equals(ITokenLocation? other) + { + if (other is TokenLocation location) + { + return this.Equals(location); + } + + return false; + } + /// - public static bool operator ==(TokenLocation left, TokenLocation right) + public static bool operator ==(TokenLocation? left, TokenLocation? right) { - return left.Equals(right); + return left?.Equals(right) ?? false; } /// - public static bool operator !=(TokenLocation left, TokenLocation right) + public static bool operator !=(TokenLocation? left, TokenLocation? right) { return !(left == right); } /// - public static bool operator <(TokenLocation left, TokenLocation right) + public static bool operator <(TokenLocation? left, TokenLocation? right) { - return left.CompareTo(right) < 0; + return (left?.CompareTo(right) ?? -1) < 0; } /// - public static bool operator <=(TokenLocation left, TokenLocation right) + public static bool operator <=(TokenLocation? left, TokenLocation? right) { - return left.CompareTo(right) <= 0; + return (left?.CompareTo(right) ?? -1) <= 0; } /// - public static bool operator >(TokenLocation left, TokenLocation right) + public static bool operator >(TokenLocation? left, TokenLocation? right) { - return left.CompareTo(right) > 0; + return (left?.CompareTo(right) ?? -1) > 0; } /// - public static bool operator >=(TokenLocation left, TokenLocation right) + public static bool operator >=(TokenLocation? left, TokenLocation? right) { - return left.CompareTo(right) >= 0; + return (left?.CompareTo(right) ?? -1) >= 0; } } } diff --git a/src/Lifti.Core/Tokenization/IStemmer.cs b/src/Lifti.Core/Tokenization/IStemmer.cs index 98581a23..d885136a 100644 --- a/src/Lifti.Core/Tokenization/IStemmer.cs +++ b/src/Lifti.Core/Tokenization/IStemmer.cs @@ -1,9 +1,28 @@ using System.Text; namespace Lifti.Tokenization -{ - internal interface IStemmer - { +{ + /// + /// A stemmer is used to reduce words to their root form. This is used to reduce the number of words + /// that need to be indexed, and to allow for more effective searching. + /// + public interface IStemmer + { + /// + /// Gets a value indicating whether the stemmer requires case insensitivity. In this case, words + /// are guaranteed to be passed to the stemmer in uppercase. + /// + bool RequiresCaseInsensitivity { get; } + + /// + /// Gets a value indicating whether the stemmer requires accent insensitivity. In this case, words + /// are guaranteed to be passed to the stemmer in their accent insensitive form. + /// + bool RequiresAccentInsensitivity { get; } + + /// + /// Applies stemming to the word in the given . + /// void Stem(StringBuilder builder); } } \ No newline at end of file diff --git a/src/Lifti.Core/Tokenization/IndexTokenizer.cs b/src/Lifti.Core/Tokenization/IndexTokenizer.cs index 6b49af4d..c7dd3282 100644 --- a/src/Lifti.Core/Tokenization/IndexTokenizer.cs +++ b/src/Lifti.Core/Tokenization/IndexTokenizer.cs @@ -4,7 +4,6 @@ using Lifti.Tokenization.TextExtraction; using System; using System.Collections.Generic; -using System.Globalization; using System.Text; namespace Lifti.Tokenization @@ -17,7 +16,7 @@ public class IndexTokenizer : IIndexTokenizer private readonly InputPreprocessorPipeline inputPreprocessorPipeline; private readonly HashSet additionalSplitChars; private readonly HashSet ignoreChars; - private readonly PorterStemmer? stemmer; + private readonly IStemmer? stemmer; /// /// Initializes a new instance of the class. @@ -26,11 +25,7 @@ public class IndexTokenizer : IIndexTokenizer public IndexTokenizer(TokenizationOptions tokenizationOptions) { this.Options = tokenizationOptions ?? throw new ArgumentNullException(nameof(tokenizationOptions)); - - if (tokenizationOptions.Stemming) - { - this.stemmer = new PorterStemmer(); - } + this.stemmer = tokenizationOptions.Stemmer; this.additionalSplitChars = new HashSet(tokenizationOptions.AdditionalSplitCharacters); this.ignoreChars = new HashSet(tokenizationOptions.IgnoreCharacters); @@ -159,7 +154,7 @@ private void CaptureToken(TokenStore processedTokens, ref int tokenIndex, int st if (length > ushort.MaxValue) { - throw new LiftiException(string.Format(CultureInfo.InvariantCulture, ExceptionMessages.MaxTokenLengthExceeded, ushort.MaxValue)); + throw new LiftiException(ExceptionMessages.MaxTokenLengthExceeded, ushort.MaxValue); } this.stemmer?.Stem(tokenBuilder); diff --git a/src/Lifti.Core/Tokenization/Objects/AsyncStringArrayFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/AsyncStringArrayFieldReader.cs index a2a871e5..9bb2917f 100644 --- a/src/Lifti.Core/Tokenization/Objects/AsyncStringArrayFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/AsyncStringArrayFieldReader.cs @@ -9,26 +9,27 @@ namespace Lifti.Tokenization.Objects /// /// A field tokenization capable of asynchronously reading an enumerable of strings for a field. /// - /// - /// The type of item the field belongs to. + /// + /// The type of object the field belongs to. /// - internal class AsyncStringArrayFieldReader : StaticFieldReader + internal class AsyncStringArrayFieldReader : StaticFieldReader { - private readonly Func>> reader; + private readonly Func>> reader; internal AsyncStringArrayFieldReader( string name, - Func>> reader, + Func>> reader, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - IThesaurus thesaurus) - : base(name, tokenizer, textExtractor, thesaurus) + IThesaurus thesaurus, + double scoreBoost) + : base(name, tokenizer, textExtractor, thesaurus, scoreBoost) { this.reader = reader ?? throw new ArgumentNullException(nameof(reader)); } /// - public override async ValueTask> ReadAsync(TItem item, CancellationToken cancellationToken) + public override async ValueTask> ReadAsync(TObject item, CancellationToken cancellationToken) { return await this.reader(item, cancellationToken).ConfigureAwait(false); } diff --git a/src/Lifti.Core/Tokenization/Objects/AsyncStringFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/AsyncStringFieldReader.cs index 1c985805..f08b43e4 100644 --- a/src/Lifti.Core/Tokenization/Objects/AsyncStringFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/AsyncStringFieldReader.cs @@ -9,26 +9,27 @@ namespace Lifti.Tokenization.Objects /// /// A field tokenization capable of asynchronously reading a string for a field. /// - /// - /// The type of item the field belongs to. + /// + /// The type of object the field belongs to. /// - internal class AsyncStringFieldReader : StaticFieldReader + internal class AsyncStringFieldReader : StaticFieldReader { - private readonly Func> reader; + private readonly Func> reader; internal AsyncStringFieldReader( string name, - Func> reader, + Func> reader, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - IThesaurus thesaurus) - : base(name, tokenizer, textExtractor, thesaurus) + IThesaurus thesaurus, + double scoreBoost) + : base(name, tokenizer, textExtractor, thesaurus, scoreBoost) { this.reader = reader ?? throw new ArgumentNullException(nameof(reader)); } /// - public override async ValueTask> ReadAsync(TItem item, CancellationToken cancellationToken) + public override async ValueTask> ReadAsync(TObject item, CancellationToken cancellationToken) { return new[] { await this.reader(item, cancellationToken).ConfigureAwait(false) }; } diff --git a/src/Lifti.Core/Tokenization/Objects/ChildItemDynamicFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/ChildItemDynamicFieldReader.cs index 0e526c35..200062c8 100644 --- a/src/Lifti.Core/Tokenization/Objects/ChildItemDynamicFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/ChildItemDynamicFieldReader.cs @@ -7,29 +7,30 @@ namespace Lifti.Tokenization.Objects { - internal abstract class ChildItemDynamicFieldReader : DynamicFieldReader + internal abstract class ChildItemDynamicFieldReader : DynamicFieldReader { - private readonly Func?> getChildObjects; - private readonly Func getFieldName; - private readonly Func getFieldText; + private readonly Func?> getChildObjects; + private readonly Func getFieldName; + private readonly Func getFieldText; protected ChildItemDynamicFieldReader( - Func?> getChildObjects, - Func getFieldName, - Func getFieldText, + Func?> getChildObjects, + Func getFieldName, + Func getFieldText, string dynamicFieldReaderName, string? fieldNamePrefix, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - Thesaurus thesaurus) - : base(tokenizer, textExtractor, thesaurus, dynamicFieldReaderName, fieldNamePrefix) + Thesaurus thesaurus, + double scoreBoost) + : base(tokenizer, textExtractor, thesaurus, dynamicFieldReaderName, fieldNamePrefix, scoreBoost) { this.getChildObjects = getChildObjects; this.getFieldName = getFieldName; this.getFieldText = getFieldText; } - public override ValueTask rawText)>> ReadAsync(TItem item, CancellationToken cancellationToken) + public override ValueTask rawText)>> ReadAsync(TObject item, CancellationToken cancellationToken) { var childObjects = this.getChildObjects(item); if (childObjects == null) @@ -43,7 +44,7 @@ public override ValueTask rawText .ToList()); } - public override ValueTask> ReadAsync(TItem item, string fieldName, CancellationToken cancellationToken) + public override ValueTask> ReadAsync(TObject item, string fieldName, CancellationToken cancellationToken) { var unprefixedFieldName = this.GetUnprefixedFieldName(fieldName); diff --git a/src/Lifti.Core/Tokenization/Objects/DictionaryDynamicFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/DictionaryDynamicFieldReader.cs index 555dd470..3af57ca6 100644 --- a/src/Lifti.Core/Tokenization/Objects/DictionaryDynamicFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/DictionaryDynamicFieldReader.cs @@ -6,24 +6,25 @@ namespace Lifti.Tokenization.Objects { - internal abstract class DictionaryDynamicFieldReader : DynamicFieldReader + internal abstract class DictionaryDynamicFieldReader : DynamicFieldReader { - private readonly Func?> reader; + private readonly Func?> reader; public DictionaryDynamicFieldReader( - Func?> reader, + Func?> reader, string dynamicFieldReaderName, string? fieldNamePrefix, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - IThesaurus thesaurus) - : base(tokenizer, textExtractor, thesaurus, dynamicFieldReaderName, fieldNamePrefix) + IThesaurus thesaurus, + double scoreBoost) + : base(tokenizer, textExtractor, thesaurus, dynamicFieldReaderName, fieldNamePrefix, scoreBoost) { this.reader = reader; } /// - public override ValueTask rawText)>> ReadAsync(TItem item, CancellationToken cancellationToken) + public override ValueTask rawText)>> ReadAsync(TObject item, CancellationToken cancellationToken) { var fields = this.reader(item); if (fields == null) @@ -44,7 +45,7 @@ public override ValueTask rawText } /// - public override ValueTask> ReadAsync(TItem item, string fieldName, CancellationToken cancellationToken) + public override ValueTask> ReadAsync(TObject item, string fieldName, CancellationToken cancellationToken) { var unprefixedName = this.GetUnprefixedFieldName(fieldName); @@ -54,8 +55,8 @@ public override ValueTask> ReadAsync(TItem item, string fiel return new ValueTask>(this.ReadFieldValueAsEnumerable(field)); } - // The field is known to this reader, but not present for the given item instance. - return DynamicFieldReader.EmptyField(); + // The field is known to this reader, but not present for the given instance. + return EmptyField(); } protected abstract IEnumerable ReadFieldValueAsEnumerable(TValue field); diff --git a/src/Lifti.Core/Tokenization/Objects/DynamicFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/DynamicFieldReader.cs index cd06712e..0ec0e279 100644 --- a/src/Lifti.Core/Tokenization/Objects/DynamicFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/DynamicFieldReader.cs @@ -8,8 +8,13 @@ namespace Lifti.Tokenization.Objects { internal abstract class DynamicFieldReader : FieldConfig { - protected DynamicFieldReader(IIndexTokenizer tokenizer, ITextExtractor textExtractor, IThesaurus thesaurus, string dynamicFieldReaderName) - : base(tokenizer, textExtractor, thesaurus) + protected DynamicFieldReader( + IIndexTokenizer tokenizer, + ITextExtractor textExtractor, + IThesaurus thesaurus, + string dynamicFieldReaderName, + double scoreBoost) + : base(tokenizer, textExtractor, thesaurus, scoreBoost) { this.Name = dynamicFieldReaderName; } @@ -23,10 +28,10 @@ protected DynamicFieldReader(IIndexTokenizer tokenizer, ITextExtractor textExtra /// /// Implemented by classes that can read an object's fields dynamically during indexing. /// - internal abstract class DynamicFieldReader : DynamicFieldReader + internal abstract class DynamicFieldReader : DynamicFieldReader { - private readonly Dictionary prefixedFields = new(); - private readonly Dictionary prefixedFieldsReverseLookup = new(); + private readonly Dictionary prefixedFields = []; + private readonly Dictionary prefixedFieldsReverseLookup = []; private readonly string? fieldNamePrefix; protected DynamicFieldReader( @@ -34,8 +39,9 @@ internal abstract class DynamicFieldReader : DynamicFieldReader ITextExtractor textExtractor, IThesaurus thesaurus, string dynamicFieldReaderName, - string? fieldNamePrefix) - : base(tokenizer, textExtractor, thesaurus, dynamicFieldReaderName) + string? fieldNamePrefix, + double scoreBoost) + : base(tokenizer, textExtractor, thesaurus, dynamicFieldReaderName, scoreBoost) { this.fieldNamePrefix = fieldNamePrefix; } @@ -43,13 +49,13 @@ internal abstract class DynamicFieldReader : DynamicFieldReader /// /// Provides a delegate capable of reading all fields and associated text from an object. /// - public abstract ValueTask rawText)>> ReadAsync(TItem item, CancellationToken cancellationToken); + public abstract ValueTask rawText)>> ReadAsync(TObject item, CancellationToken cancellationToken); /// /// Provides a delegate capable of reading a specific dynamic field from an object. If the field is not found on the given /// object, an empty enumerable will be returned and no error thrown. /// - public abstract ValueTask> ReadAsync(TItem item, string fieldName, CancellationToken cancellationToken); + public abstract ValueTask> ReadAsync(TObject item, string fieldName, CancellationToken cancellationToken); protected string GetPrefixedFieldName(string unprefixedFieldName) { diff --git a/src/Lifti.Core/Tokenization/Objects/FieldConfig.cs b/src/Lifti.Core/Tokenization/Objects/FieldConfig.cs index fae1ef27..f93be617 100644 --- a/src/Lifti.Core/Tokenization/Objects/FieldConfig.cs +++ b/src/Lifti.Core/Tokenization/Objects/FieldConfig.cs @@ -4,12 +4,12 @@ namespace Lifti.Tokenization.Objects { internal abstract class FieldConfig : IFieldConfig { - protected FieldConfig(IIndexTokenizer tokenizer, ITextExtractor textExtractor, IThesaurus thesaurus) + protected FieldConfig(IIndexTokenizer tokenizer, ITextExtractor textExtractor, IThesaurus thesaurus, double scoreBoost) { this.Tokenizer = tokenizer; this.TextExtractor = textExtractor; - this.Thesaurus = thesaurus; - + this.Thesaurus = thesaurus; + this.ScoreBoost = scoreBoost; } /// public IIndexTokenizer Tokenizer { get; } @@ -18,6 +18,9 @@ protected FieldConfig(IIndexTokenizer tokenizer, ITextExtractor textExtractor, I public ITextExtractor TextExtractor { get; } /// - public IThesaurus Thesaurus { get; } + public IThesaurus Thesaurus { get; } + + /// + public double ScoreBoost { get; } } } \ No newline at end of file diff --git a/src/Lifti.Core/Tokenization/Objects/IFieldConfig.cs b/src/Lifti.Core/Tokenization/Objects/IFieldConfig.cs index 4c7d7266..e428c0da 100644 --- a/src/Lifti.Core/Tokenization/Objects/IFieldConfig.cs +++ b/src/Lifti.Core/Tokenization/Objects/IFieldConfig.cs @@ -20,6 +20,11 @@ internal interface IFieldConfig /// /// Gets the configured for use with this field. /// - IThesaurus Thesaurus { get; } + IThesaurus Thesaurus { get; } + + /// + /// Gets the boost to apply to tokens extracted from this field. + /// + double ScoreBoost { get; } } } \ No newline at end of file diff --git a/src/Lifti.Core/Tokenization/Objects/IStaticFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/IStaticFieldReader.cs index 2155fca2..f4fefcf9 100644 --- a/src/Lifti.Core/Tokenization/Objects/IStaticFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/IStaticFieldReader.cs @@ -16,11 +16,11 @@ internal interface IStaticFieldReader : IFieldConfig } /// - internal interface IStaticFieldReader : IStaticFieldReader + internal interface IStaticFieldReader : IStaticFieldReader { /// /// Reads the field's text from the given item. /// - ValueTask> ReadAsync(TItem item, CancellationToken cancellationToken); + ValueTask> ReadAsync(TObject item, CancellationToken cancellationToken); } } \ No newline at end of file diff --git a/src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostBuilder.cs b/src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostBuilder.cs new file mode 100644 index 00000000..48bb918d --- /dev/null +++ b/src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostBuilder.cs @@ -0,0 +1,76 @@ +using System; + +namespace Lifti.Tokenization.Objects +{ + /// + /// Allows for the properties of an indexed object to influence how it is scored relative to other objects. + /// + /// The type of the object + public class ObjectScoreBoostBuilder + { + internal Func? MagnitudeProvider { get; private set; } + internal double MagnitudeMultiplier { get; private set; } + internal Func? FreshnessProvider { get; private set; } + internal double FreshnessMultiplier { get; private set; } + + /// + /// Freshness boosting allows you to boost results based on a date associated to the object. For example, assuming + /// all the documents have exactly the same text and a multiplier of 3 is specified, then the score of the newest + /// document will be 3 times higher than the oldest. + /// + /// + /// The delegate capable of reading the freshness value from the object. + /// + /// + /// The multiplier to apply to the score of the object's document based on its freshness. Must be greater than 1. + /// + /// + /// Thrown if the multiplier is less than or equal to 1. + /// + public ObjectScoreBoostBuilder Freshness(Func freshnessProvider, double multiplier) + { + if (multiplier <= 1) + { + throw new ArgumentException(ExceptionMessages.MultiplierValueMustBeGreaterThanOne); + } + + this.FreshnessProvider = freshnessProvider; + this.FreshnessMultiplier = multiplier; + return this; + } + + /// + /// Magnitude boosting allows you to boost results based on a numeric value associated to the object. For example, if you used this with a "star rating" property, + /// documents with a higher rating will be more likely to appear nearer the top of search results. + /// + /// + /// The delegate capable of reading the magnitude value from the object. + /// + /// + /// The multiplier to apply to the score of the object's document based on its magnitude. Must be greater than 1. + /// + /// + /// Thrown if the multiplier is less than or equal to 1. + /// + public ObjectScoreBoostBuilder Magnitude(Func magnitudeProvider, double multiplier) + { + if (multiplier <= 1) + { + throw new ArgumentException(ExceptionMessages.MultiplierValueMustBeGreaterThanOne); + } + + this.MagnitudeProvider = magnitudeProvider; + this.MagnitudeMultiplier = multiplier; + return this; + } + + internal ObjectScoreBoostOptions Build() + { + return new ObjectScoreBoostOptions( + this.MagnitudeMultiplier, + this.MagnitudeProvider, + this.FreshnessMultiplier, + this.FreshnessProvider); + } + } +} diff --git a/src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostOptions.cs b/src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostOptions.cs new file mode 100644 index 00000000..41d2bd0d --- /dev/null +++ b/src/Lifti.Core/Tokenization/Objects/ObjectScoreBoostOptions.cs @@ -0,0 +1,65 @@ +using System; + +namespace Lifti.Tokenization.Objects +{ + /// + /// Provides the configured options for boosting the score of an object based on its magnitude and freshness. + /// + /// + /// The multiplier to apply to the score of the object's document, based on its magnitude. + /// + /// + /// The multiplier to apply to the score of the object's document, based on its freshness. + /// + public abstract class ObjectScoreBoostOptions(double magnitudeMultiplier, double freshnessMultiplier) + { + /// + /// Gets the multiplier to apply to the score of the object's document, based on its magnitude. + /// + public double MagnitudeMultiplier { get; } = magnitudeMultiplier; + + /// + /// Gets the multiplier to apply to the score of the object's document, based on its freshness. + /// + public double FreshnessMultiplier { get; } = freshnessMultiplier; + } + + /// + /// Provides the configured options for boosting the score of an object based on its magnitude and freshness. + /// + /// The type of the object. + /// + /// The multiplier to apply to the score of the object's document, based on its magnitude. + /// + /// + /// The delegate capable of reading the magnitude value from the object. + /// + /// + /// The multiplier to apply to the score of the object's document, based on its freshness. + /// + /// + /// The delegate capable of reading the freshness value from the object. + /// + public class ObjectScoreBoostOptions( + double magnitudeMultiplier, + Func? magnitudeProvider, + double freshnessMultiplier, + Func? freshnessProvider) + : ObjectScoreBoostOptions(magnitudeMultiplier, freshnessMultiplier) + { + /// + /// Gets the delegate capable of reading the magnitude value from the object. + /// + public Func? MagnitudeProvider { get; } = magnitudeProvider; + + /// + /// Gets the delegate capable of reading the freshness value from the object. + /// + public Func? FreshnessProvider { get; } = freshnessProvider; + + internal static ObjectScoreBoostOptions Empty() + { + return new ObjectScoreBoostOptions(0D, null, 0D, null); + } + } +} diff --git a/src/Lifti.Core/Tokenization/Objects/ObjectTokenization.cs b/src/Lifti.Core/Tokenization/Objects/ObjectTokenization.cs deleted file mode 100644 index 35b68646..00000000 --- a/src/Lifti.Core/Tokenization/Objects/ObjectTokenization.cs +++ /dev/null @@ -1,41 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Lifti.Tokenization.Objects -{ - - /// - /// The type of object this tokenization is capable of indexing. - /// The type of the key. - internal class ObjectTokenization : IObjectTokenization - { - internal ObjectTokenization( - Func keyReader, - IReadOnlyList> fieldReaders, - IReadOnlyList> dynamicFieldReaders) - { - this.KeyReader = keyReader; - this.FieldReaders = fieldReaders.ToDictionary(x => x.Name); - this.DynamicFieldReaders = dynamicFieldReaders; - } - - /// - /// Gets the delegate capable of reading the key from the item. - /// - public Func KeyReader { get; } - - /// - /// Gets the set of configurations for fields that can be defined statically at index creation. - /// - public IDictionary> FieldReaders { get; } - - /// - /// Gets the set of configurations that determine dynamic fields that can only be known during indexing. - /// - public IReadOnlyList> DynamicFieldReaders { get; } - - /// - Type IObjectTokenization.ItemType { get; } = typeof(T); - } -} diff --git a/src/Lifti.Core/Tokenization/Objects/ObjectTokenizationBuilder.cs b/src/Lifti.Core/Tokenization/Objects/ObjectTokenizationBuilder.cs index 77bb4bd7..bcf17fe1 100644 --- a/src/Lifti.Core/Tokenization/Objects/ObjectTokenizationBuilder.cs +++ b/src/Lifti.Core/Tokenization/Objects/ObjectTokenizationBuilder.cs @@ -11,7 +11,10 @@ internal interface IObjectTokenizationBuilder { /// /// Builds this instance. - /// + /// + /// + /// The unique id for the object type. + /// /// The default to use when one is not /// explicitly configured for a field. /// The default @@ -26,33 +29,39 @@ internal interface IObjectTokenizationBuilder /// * has not been called. /// * No fields have been configured. /// - IObjectTokenization Build(IIndexTokenizer defaultTokenizer, ThesaurusBuilder defaultThesaurusBuilder, ITextExtractor defaultTextExtractor, IndexedFieldLookup fieldLookup); + IObjectTypeConfiguration Build( + byte objectTypeId, + IIndexTokenizer defaultTokenizer, + ThesaurusBuilder defaultThesaurusBuilder, + ITextExtractor defaultTextExtractor, + IndexedFieldLookup fieldLookup); } /// - /// The builder class used to configure an object type for indexing. The object type - /// must expose an id property of type configured using the + /// The builder class used to configure an object type for indexing. The object type + /// must expose an id property of type configured using the /// method. /// - /// + /// /// The type of object to configure. /// /// /// The type of key in the index. /// - public class ObjectTokenizationBuilder : IObjectTokenizationBuilder + public class ObjectTokenizationBuilder : IObjectTokenizationBuilder { - private readonly List>> fieldReaderBuilders = new(); - private Func? keyReader; - private readonly List>> dynamicFieldReaderBuilders = new(); + private readonly List>> fieldReaderBuilders = []; + private Func? keyReader; + private readonly List>> dynamicFieldReaderBuilders = []; + private ObjectScoreBoostBuilder? objectScoreBoostBuilder; /// - /// Indicates how the unique key of the item can be read. + /// Indicates how the unique key of the object can be read. /// /// - /// The delegate capable of reading the key from the item + /// The delegate capable of reading the key from the object. /// - public ObjectTokenizationBuilder WithKey(Func keyReader) + public ObjectTokenizationBuilder WithKey(Func keyReader) { if (keyReader is null) { @@ -62,10 +71,10 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder this.keyReader = keyReader; return this; - } - + } + /// - /// Adds a field to be indexed for the item. + /// Adds a field to be indexed for the object. /// /// /// The name of the field. This can be referred to when querying to restrict searches to text read for this field only. @@ -84,29 +93,34 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// /// The to use when indexing text from the field. If this is not specified then the default /// text extractor for the index will be used. + /// + /// + /// The multiplier to apply to the score of this field when ranking results. The default value of 1 is equivalent to no boosting. /// - public ObjectTokenizationBuilder WithField( + public ObjectTokenizationBuilder WithField( string name, - Func fieldTextReader, + Func fieldTextReader, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { ValidateFieldParameters(name, fieldTextReader); var tokenizer = tokenizationOptions.CreateTokenizer(); this.fieldReaderBuilders.Add( - (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringFieldReader( + (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringFieldReader( name, fieldTextReader, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } /// - /// Registers a property on the item that exposes a set of dynamic fields and the text to be indexed for each. + /// Registers a property for the object that exposes a set of dynamic fields and the text to be indexed for each. /// Dynamic fields are automatically registered with the index's as they are encountered /// during indexing. /// @@ -115,11 +129,11 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// restoring the relationship between the a dynamic field and its source provider. /// /// - /// The delegate capable of reading the the field name/text pairs from the item. + /// The delegate capable of reading the the field name/text pairs from the object. /// /// /// The optional prefix to apply to any field names read using the . - /// Use this if you need to register multiple sets of dynamic fields for the same item and there is a + /// Use this if you need to register multiple sets of dynamic fields for the same object and there is a /// chance the field names will overlap. /// /// @@ -133,14 +147,18 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// /// The to use when indexing text from the field. If this is not specified then the default /// text extractor for the index will be used. + /// + /// + /// The multiplier to apply to the score of this field when ranking results. The default value of 1 is equivalent to no boosting. /// - public ObjectTokenizationBuilder WithDynamicFields( + public ObjectTokenizationBuilder WithDynamicFields( string dynamicFieldReaderName, - Func?> dynamicFieldReader, + Func?> dynamicFieldReader, string? fieldNamePrefix = null, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { if (dynamicFieldReader == null) { @@ -149,25 +167,27 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder var tokenizer = tokenizationOptions.CreateTokenizer(); this.dynamicFieldReaderBuilders.Add( - (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringDictionaryDynamicFieldReader( + (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringDictionaryDynamicFieldReader( dynamicFieldReader, dynamicFieldReaderName, fieldNamePrefix, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } - /// - public ObjectTokenizationBuilder WithDynamicFields( + /// + public ObjectTokenizationBuilder WithDynamicFields( string dynamicFieldReaderName, - Func>> dynamicFieldReader, + Func>> dynamicFieldReader, string? fieldNamePrefix = null, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { if (dynamicFieldReader == null) { @@ -176,19 +196,20 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder var tokenizer = tokenizationOptions.CreateTokenizer(); this.dynamicFieldReaderBuilders.Add( - (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringArrayDictionaryDynamicFieldReader( + (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringArrayDictionaryDynamicFieldReader( dynamicFieldReaderName, dynamicFieldReader, fieldNamePrefix, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } /// - /// Registers a property on the item that exposes a set of dynamic fields and the text to be indexed for each. + /// Registers a property for the object that exposes a set of dynamic fields and the text to be indexed for each. /// Dynamic fields are automatically registered with the index's as they are encountered /// during indexing. /// @@ -207,7 +228,7 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// /// /// The optional prefix to apply to any field names read using the . - /// Use this if you need to register multiple sets of dynamic fields for the same item and there is a + /// Use this if you need to register multiple sets of dynamic fields for the same object and there is a /// chance the field names will overlap. /// /// @@ -221,16 +242,20 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// /// The to use when indexing text from the field. If this is not specified then the default /// text extractor for the index will be used. + /// + /// + /// The multiplier to apply to the score of this field when ranking results. The default value of 1 is equivalent to no boosting. /// - public ObjectTokenizationBuilder WithDynamicFields( + public ObjectTokenizationBuilder WithDynamicFields( string dynamicFieldReaderName, - Func?> dynamicFieldReader, + Func?> dynamicFieldReader, Func getFieldName, Func getFieldText, string? fieldNamePrefix = null, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { if (dynamicFieldReader == null) { @@ -239,7 +264,7 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder var tokenizer = tokenizationOptions.CreateTokenizer(); this.dynamicFieldReaderBuilders.Add( - (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringChildItemDynamicFieldReader( + (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringChildObjectDynamicFieldReader( dynamicFieldReader, getFieldName, getFieldText, @@ -247,21 +272,23 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder fieldNamePrefix, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } - /// - public ObjectTokenizationBuilder WithDynamicFields( + /// + public ObjectTokenizationBuilder WithDynamicFields( string dynamicFieldReaderName, - Func?> dynamicFieldReader, + Func?> dynamicFieldReader, Func getFieldName, Func> getFieldText, string? fieldNamePrefix = null, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { if (dynamicFieldReader == null) { @@ -270,7 +297,7 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder var tokenizer = tokenizationOptions.CreateTokenizer(); this.dynamicFieldReaderBuilders.Add( - (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringArrayChildItemDynamicFieldReader( + (defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => new StringArrayChildObjectDynamicFieldReader( dynamicFieldReader, getFieldName, getFieldText, @@ -278,13 +305,14 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder fieldNamePrefix, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } /// - /// Adds a field to be indexed for the item. + /// Adds a field to be indexed for the object. /// /// /// The name of the field. This can be referred to when querying to restrict searches to text read for this field only. @@ -303,29 +331,34 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// /// An optional delegate capable of building the thesaurus for this field. If this is unspecified then the default thesaurus /// for the index will be used. + /// + /// + /// The multiplier to apply to the score of this field when ranking results. The default value of 1 is equivalent to no boosting. /// - public ObjectTokenizationBuilder WithField( + public ObjectTokenizationBuilder WithField( string name, - Func> reader, + Func> reader, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { ValidateFieldParameters(name, reader); var tokenizer = tokenizationOptions.CreateTokenizer(); this.fieldReaderBuilders.Add((defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => - new StringArrayFieldReader( + new StringArrayFieldReader( name, reader, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } /// - /// Adds a field to be indexed for the item. + /// Adds a field to be indexed for the object. /// /// /// The name of the field. This can be referred to when querying to restrict searches to text read for this field only. @@ -344,45 +377,52 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// /// An optional delegate capable of building the thesaurus for this field. If this is unspecified then the default thesaurus /// for the index will be used. + /// + /// + /// The multiplier to apply to the score of this field when ranking results. The default value of 1 is equivalent to no boosting. /// - public ObjectTokenizationBuilder WithField( + public ObjectTokenizationBuilder WithField( string name, - Func> fieldTextReader, + Func> fieldTextReader, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { ValidateFieldParameters(name, fieldTextReader); var tokenizer = tokenizationOptions.CreateTokenizer(); this.fieldReaderBuilders.Add((defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => - new AsyncStringFieldReader( + new AsyncStringFieldReader( name, fieldTextReader, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } - /// - public ObjectTokenizationBuilder WithField( + /// + public ObjectTokenizationBuilder WithField( string name, - Func> fieldTextReader, + Func> fieldTextReader, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { return this.WithField( name, (item, ctx) => fieldTextReader(item), tokenizationOptions, textExtractor, - thesaurusOptions); + thesaurusOptions, + scoreBoost); } /// - /// Adds a field to be indexed for the item. + /// Adds a field to be indexed for the object. /// /// /// The name of the field. This can be referred to when querying to restrict searches to text read for this field only. @@ -401,41 +441,74 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder /// /// An optional delegate capable of building the thesaurus for this field. If this is unspecified then the default thesaurus /// for the index will be used. + /// + /// + /// The multiplier to apply to the score of this field when ranking results. The default value of 1 is equivalent to no boosting. /// - public ObjectTokenizationBuilder WithField( + public ObjectTokenizationBuilder WithField( string name, - Func>> fieldTextReader, + Func>> fieldTextReader, Func? tokenizationOptions = null, ITextExtractor? textExtractor = null, - Func? thesaurusOptions = null) + Func? thesaurusOptions = null, + double scoreBoost = 1D) { ValidateFieldParameters(name, fieldTextReader); var tokenizer = tokenizationOptions.CreateTokenizer(); this.fieldReaderBuilders.Add((defaultTokenizer, defaultThesaurusBuilder, defaultTextExtractor) => - new AsyncStringArrayFieldReader( + new AsyncStringArrayFieldReader( name, fieldTextReader, tokenizer ?? defaultTokenizer, textExtractor ?? defaultTextExtractor, - CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions))); + CreateFieldThesaurus(defaultTokenizer, tokenizer, defaultThesaurusBuilder, thesaurusOptions), + scoreBoost)); return this; } - /// - public ObjectTokenizationBuilder WithField( + /// + public ObjectTokenizationBuilder WithField( string name, - Func>> fieldTextReader, + Func>> fieldTextReader, Func? tokenizationOptions = null, Func? thesaurusOptions = null, - ITextExtractor? textExtractor = null) + ITextExtractor? textExtractor = null, + double scoreBoost = 1D) { return this.WithField( name, (item, ctx) => fieldTextReader(item), tokenizationOptions, textExtractor, - thesaurusOptions); + thesaurusOptions, + scoreBoost); + } + + /// + /// Configures the score boosting options for the object. + /// + /// + /// The delegate capable of configuring the score boosting options. + /// + /// + /// Thrown if this method is called more than once per object definition. + /// + public ObjectTokenizationBuilder WithScoreBoosting(Action> scoreBoostingOptions) + { + if (scoreBoostingOptions is null) + { + throw new ArgumentNullException(nameof(scoreBoostingOptions)); + } + + if (this.objectScoreBoostBuilder is not null) + { + throw new LiftiException(ExceptionMessages.WithScoreBoostingCanOnlyBeCalledOncePerObjectDefinition); + } + + this.objectScoreBoostBuilder = new ObjectScoreBoostBuilder(); + scoreBoostingOptions(this.objectScoreBoostBuilder); + return this; } private static Thesaurus CreateFieldThesaurus( @@ -455,7 +528,12 @@ public class ObjectTokenizationBuilder : IObjectTokenizationBuilder } /// - IObjectTokenization IObjectTokenizationBuilder.Build(IIndexTokenizer defaultTokenizer, ThesaurusBuilder defaultThesaurusBuilder, ITextExtractor defaultTextExtractor, IndexedFieldLookup fieldLookup) + IObjectTypeConfiguration IObjectTokenizationBuilder.Build( + byte objectTypeId, + IIndexTokenizer defaultTokenizer, + ThesaurusBuilder defaultThesaurusBuilder, + ITextExtractor defaultTextExtractor, + IndexedFieldLookup fieldLookup) { if (this.keyReader == null) { @@ -479,10 +557,12 @@ IObjectTokenization IObjectTokenizationBuilder.Build(IIndexTokenizer defaultToke fieldLookup.RegisterDynamicFieldReader(dynamicFieldReader); } - return new ObjectTokenization( + return new ObjectTypeConfiguration( + objectTypeId, this.keyReader, staticFields, - dynamicFieldReaders); + dynamicFieldReaders, + this.objectScoreBoostBuilder?.Build() ?? ObjectScoreBoostOptions.Empty()); } private static void ValidateFieldParameters(string name, object fieldTextReader) @@ -497,5 +577,5 @@ private static void ValidateFieldParameters(string name, object fieldTextReader) throw new ArgumentNullException(nameof(fieldTextReader)); } } - } + } } diff --git a/src/Lifti.Core/Tokenization/Objects/ObjectTokenizationLookup.cs b/src/Lifti.Core/Tokenization/Objects/ObjectTokenizationLookup.cs deleted file mode 100644 index 369e6608..00000000 --- a/src/Lifti.Core/Tokenization/Objects/ObjectTokenizationLookup.cs +++ /dev/null @@ -1,30 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Lifti.Tokenization.Objects -{ - /// - /// Defines a lookup for all the field readers associated to a given object type. - /// - /// The type of key in the index. - internal class ObjectTokenizationLookup - { - private readonly Dictionary options; - - public ObjectTokenizationLookup(IEnumerable objectTokenizers) - { - this.options = objectTokenizers.ToDictionary(x => x.ItemType); - } - - public ObjectTokenization Get() - { - if (this.options.TryGetValue(typeof(TItem), out var itemTokenizationOptions)) - { - return (ObjectTokenization)itemTokenizationOptions; - } - - throw new LiftiException(ExceptionMessages.NoTokenizationOptionsProvidedForType, typeof(TItem)); - } - } -} diff --git a/src/Lifti.Core/Tokenization/Objects/ObjectTypeConfiguration.cs b/src/Lifti.Core/Tokenization/Objects/ObjectTypeConfiguration.cs new file mode 100644 index 00000000..1bcbee31 --- /dev/null +++ b/src/Lifti.Core/Tokenization/Objects/ObjectTypeConfiguration.cs @@ -0,0 +1,56 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti.Tokenization.Objects +{ + + /// + /// The type of object this tokenization is capable of indexing. + /// The type of the key. + internal class ObjectTypeConfiguration : IObjectTypeConfiguration + { + internal ObjectTypeConfiguration( + byte id, + Func keyReader, + IReadOnlyList> fieldReaders, + IReadOnlyList> dynamicFieldReaders, + ObjectScoreBoostOptions scoreBoostOptions) + { + this.Id = id; + this.KeyReader = keyReader; + this.FieldReaders = fieldReaders.ToDictionary(x => x.Name); + this.DynamicFieldReaders = dynamicFieldReaders; + this.ScoreBoostOptions = scoreBoostOptions; + } + + /// + public byte Id { get; } + + /// + /// Gets the delegate capable of reading the key from the object. + /// + public Func KeyReader { get; } + + /// + /// Gets the set of configurations for fields that can be defined statically at index creation. + /// + public IDictionary> FieldReaders { get; } + + /// + /// Gets the set of configurations that determine dynamic fields that can only be known during indexing. + /// + public IReadOnlyList> DynamicFieldReaders { get; } + + /// + /// The score boost options for the object type. + /// + public ObjectScoreBoostOptions ScoreBoostOptions { get; } + + /// + Type IObjectTypeConfiguration.ObjectType { get; } = typeof(TObject); + + /// + ObjectScoreBoostOptions IObjectTypeConfiguration.ScoreBoostOptions => this.ScoreBoostOptions; + } +} diff --git a/src/Lifti.Core/Tokenization/Objects/ObjectTypeConfigurationLookup.cs b/src/Lifti.Core/Tokenization/Objects/ObjectTypeConfigurationLookup.cs new file mode 100644 index 00000000..2844e4ed --- /dev/null +++ b/src/Lifti.Core/Tokenization/Objects/ObjectTypeConfigurationLookup.cs @@ -0,0 +1,32 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti.Tokenization.Objects +{ + /// + /// Defines a lookup for all the field readers associated to a given object type. + /// + /// The type of key in the index. + internal class ObjectTypeConfigurationLookup + { + private readonly Dictionary options; + + public ObjectTypeConfigurationLookup(IEnumerable objectTypeConfigurations) + { + this.options = objectTypeConfigurations.ToDictionary(x => x.ObjectType); + } + + public IEnumerable AllConfigurations => this.options.Values; + + public ObjectTypeConfiguration Get() + { + if (this.options.TryGetValue(typeof(TObject), out var objectTypeConfiguration)) + { + return (ObjectTypeConfiguration)objectTypeConfiguration; + } + + throw new LiftiException(ExceptionMessages.NoTokenizationOptionsProvidedForType, typeof(TObject)); + } + } +} diff --git a/src/Lifti.Core/Tokenization/Objects/StaticFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/StaticFieldReader.cs index 09da78d1..b2d6df35 100644 --- a/src/Lifti.Core/Tokenization/Objects/StaticFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/StaticFieldReader.cs @@ -7,10 +7,10 @@ namespace Lifti.Tokenization.Objects { /// - internal abstract class StaticFieldReader : FieldConfig, IStaticFieldReader + internal abstract class StaticFieldReader : FieldConfig, IStaticFieldReader { - internal StaticFieldReader(string name, IIndexTokenizer tokenizer, ITextExtractor textExtractor, IThesaurus thesaurus) - : base(tokenizer, textExtractor, thesaurus) + internal StaticFieldReader(string name, IIndexTokenizer tokenizer, ITextExtractor textExtractor, IThesaurus thesaurus, double scoreBoost) + : base(tokenizer, textExtractor, thesaurus, scoreBoost) { this.Name = name ?? throw new ArgumentNullException(nameof(name)); } @@ -19,6 +19,6 @@ internal StaticFieldReader(string name, IIndexTokenizer tokenizer, ITextExtracto public string Name { get; } /// - public abstract ValueTask> ReadAsync(TItem item, CancellationToken cancellationToken); + public abstract ValueTask> ReadAsync(TObject item, CancellationToken cancellationToken); } } \ No newline at end of file diff --git a/src/Lifti.Core/Tokenization/Objects/StringArrayChildItemDynamicFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/StringArrayChildObjectDynamicFieldReader.cs similarity index 53% rename from src/Lifti.Core/Tokenization/Objects/StringArrayChildItemDynamicFieldReader.cs rename to src/Lifti.Core/Tokenization/Objects/StringArrayChildObjectDynamicFieldReader.cs index e7f95f68..55186f54 100644 --- a/src/Lifti.Core/Tokenization/Objects/StringArrayChildItemDynamicFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/StringArrayChildObjectDynamicFieldReader.cs @@ -4,9 +4,9 @@ namespace Lifti.Tokenization.Objects { - internal class StringArrayChildItemDynamicFieldReader : ChildItemDynamicFieldReader> + internal class StringArrayChildObjectDynamicFieldReader : ChildItemDynamicFieldReader> { - public StringArrayChildItemDynamicFieldReader( + public StringArrayChildObjectDynamicFieldReader( Func?> getChildObjects, Func getFieldName, Func> getFieldText, @@ -14,8 +14,18 @@ internal class StringArrayChildItemDynamicFieldReader : Child string? fieldNamePrefix, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - Thesaurus thesaurus) - : base(getChildObjects, getFieldName, getFieldText, dynamicFieldReaderName, fieldNamePrefix, tokenizer, textExtractor, thesaurus) + Thesaurus thesaurus, + double scoreBoost) + : base( + getChildObjects, + getFieldName, + getFieldText, + dynamicFieldReaderName, + fieldNamePrefix, + tokenizer, + textExtractor, + thesaurus, + scoreBoost) { } diff --git a/src/Lifti.Core/Tokenization/Objects/StringArrayDictionaryDynamicFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/StringArrayDictionaryDynamicFieldReader.cs index 5a9b264b..f1b5d532 100644 --- a/src/Lifti.Core/Tokenization/Objects/StringArrayDictionaryDynamicFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/StringArrayDictionaryDynamicFieldReader.cs @@ -4,16 +4,17 @@ namespace Lifti.Tokenization.Objects { - internal class StringArrayDictionaryDynamicFieldReader : DictionaryDynamicFieldReader> + internal class StringArrayDictionaryDynamicFieldReader : DictionaryDynamicFieldReader> { public StringArrayDictionaryDynamicFieldReader( string dynamicFieldReaderName, - Func>> reader, + Func>> reader, string? fieldNamePrefix, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - IThesaurus thesaurus) - : base(reader, dynamicFieldReaderName, fieldNamePrefix, tokenizer, textExtractor, thesaurus) + IThesaurus thesaurus, + double scoreBoost) + : base(reader, dynamicFieldReaderName, fieldNamePrefix, tokenizer, textExtractor, thesaurus, scoreBoost) { } diff --git a/src/Lifti.Core/Tokenization/Objects/StringArrayFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/StringArrayFieldReader.cs index 9dc62110..223225ed 100644 --- a/src/Lifti.Core/Tokenization/Objects/StringArrayFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/StringArrayFieldReader.cs @@ -9,26 +9,27 @@ namespace Lifti.Tokenization.Objects /// /// A field tokenization capable of reading an enumerable of strings for a field. /// - /// + /// /// The type of item the field belongs to. /// - internal class StringArrayFieldReader : StaticFieldReader + internal class StringArrayFieldReader : StaticFieldReader { - private readonly Func> reader; + private readonly Func> reader; internal StringArrayFieldReader( string name, - Func> reader, + Func> reader, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - IThesaurus thesaurus) - : base(name, tokenizer, textExtractor, thesaurus) + IThesaurus thesaurus, + double scoreBoost) + : base(name, tokenizer, textExtractor, thesaurus, scoreBoost) { this.reader = reader ?? throw new ArgumentNullException(nameof(reader)); } /// - public override ValueTask> ReadAsync(TItem item, CancellationToken cancellationToken) + public override ValueTask> ReadAsync(TObject item, CancellationToken cancellationToken) { return new ValueTask>(this.reader(item)); } diff --git a/src/Lifti.Core/Tokenization/Objects/StringChildItemDynamicFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/StringChildObjectDynamicFieldReader.cs similarity index 53% rename from src/Lifti.Core/Tokenization/Objects/StringChildItemDynamicFieldReader.cs rename to src/Lifti.Core/Tokenization/Objects/StringChildObjectDynamicFieldReader.cs index 727d25f5..9293f376 100644 --- a/src/Lifti.Core/Tokenization/Objects/StringChildItemDynamicFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/StringChildObjectDynamicFieldReader.cs @@ -4,9 +4,9 @@ namespace Lifti.Tokenization.Objects { - internal class StringChildItemDynamicFieldReader : ChildItemDynamicFieldReader + internal class StringChildObjectDynamicFieldReader : ChildItemDynamicFieldReader { - public StringChildItemDynamicFieldReader( + public StringChildObjectDynamicFieldReader( Func?> getChildObjects, Func getFieldName, Func getFieldText, @@ -14,8 +14,17 @@ internal class StringChildItemDynamicFieldReader : ChildItemD string? fieldNamePrefix, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - Thesaurus thesaurus) - : base(getChildObjects, getFieldName, getFieldText, dynamicFieldReaderName, fieldNamePrefix, tokenizer, textExtractor, thesaurus) + Thesaurus thesaurus, + double scoreBoost) + : base(getChildObjects, + getFieldName, + getFieldText, + dynamicFieldReaderName, + fieldNamePrefix, + tokenizer, + textExtractor, + thesaurus, + scoreBoost) { } diff --git a/src/Lifti.Core/Tokenization/Objects/StringDictionaryDynamicFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/StringDictionaryDynamicFieldReader.cs index 7899011b..759c95a1 100644 --- a/src/Lifti.Core/Tokenization/Objects/StringDictionaryDynamicFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/StringDictionaryDynamicFieldReader.cs @@ -4,16 +4,17 @@ namespace Lifti.Tokenization.Objects { - internal class StringDictionaryDynamicFieldReader : DictionaryDynamicFieldReader + internal class StringDictionaryDynamicFieldReader : DictionaryDynamicFieldReader { public StringDictionaryDynamicFieldReader( - Func?> reader, + Func?> reader, string dynamicFieldReaderName, string? fieldNamePrefix, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - IThesaurus thesaurus) - : base(reader, dynamicFieldReaderName, fieldNamePrefix, tokenizer, textExtractor, thesaurus) + IThesaurus thesaurus, + double scoreBoost) + : base(reader, dynamicFieldReaderName, fieldNamePrefix, tokenizer, textExtractor, thesaurus, scoreBoost) { } diff --git a/src/Lifti.Core/Tokenization/Objects/StringFieldReader.cs b/src/Lifti.Core/Tokenization/Objects/StringFieldReader.cs index c7a0ba1d..8081f2ce 100644 --- a/src/Lifti.Core/Tokenization/Objects/StringFieldReader.cs +++ b/src/Lifti.Core/Tokenization/Objects/StringFieldReader.cs @@ -7,28 +7,29 @@ namespace Lifti.Tokenization.Objects { /// - /// A field tokenization capable of asynchronously reading s string for a field. + /// A field tokenization capable of asynchronously reading a string for a field. /// - /// - /// The type of item the field belongs to. + /// + /// The type of object the field belongs to. /// - internal class StringFieldReader : StaticFieldReader + internal class StringFieldReader : StaticFieldReader { - private readonly Func reader; + private readonly Func reader; internal StringFieldReader( string name, - Func reader, + Func reader, IIndexTokenizer tokenizer, ITextExtractor textExtractor, - IThesaurus thesaurus) - : base(name, tokenizer, textExtractor, thesaurus) + IThesaurus thesaurus, + double scoreBoost) + : base(name, tokenizer, textExtractor, thesaurus, scoreBoost) { this.reader = reader ?? throw new ArgumentNullException(nameof(reader)); } /// - public override ValueTask> ReadAsync(TItem item, CancellationToken cancellationToken) + public override ValueTask> ReadAsync(TObject item, CancellationToken cancellationToken) { return new ValueTask>(new[] { this.reader(item) }); } diff --git a/src/Lifti.Core/Tokenization/Preprocessing/InputPreprocessorPipeline.cs b/src/Lifti.Core/Tokenization/Preprocessing/InputPreprocessorPipeline.cs index 648fe2a8..0d289962 100644 --- a/src/Lifti.Core/Tokenization/Preprocessing/InputPreprocessorPipeline.cs +++ b/src/Lifti.Core/Tokenization/Preprocessing/InputPreprocessorPipeline.cs @@ -8,10 +8,10 @@ namespace Lifti.Tokenization.Preprocessing /// public class InputPreprocessorPipeline : IInputPreprocessorPipeline { - private readonly List inputPreprocessors = new(); + private readonly List inputPreprocessors = []; private static readonly SharedPool> queuePool = new( - () => new Queue(4), - q => q.Clear()); + static () => new Queue(4), + static q => q.Clear()); /// /// Initializes a new instance of the class. @@ -48,8 +48,8 @@ public IEnumerable Process(char input) yield break; } - var processQueue = queuePool.Create(); - var outputQueue = queuePool.Create(); + var processQueue = queuePool.Take(); + var outputQueue = queuePool.Take(); processQueue.Enqueue(input); diff --git a/src/Lifti.Core/Tokenization/Stemming/PorterStemmer.cs b/src/Lifti.Core/Tokenization/Stemming/PorterStemmer.cs index c274740c..97afb131 100644 --- a/src/Lifti.Core/Tokenization/Stemming/PorterStemmer.cs +++ b/src/Lifti.Core/Tokenization/Stemming/PorterStemmer.cs @@ -10,141 +10,147 @@ internal class PorterStemmer : IStemmer /// /// The list of apostrophe based endings that can be pruned in step 0. /// - private static readonly Dictionary apostropheEnds = CreateSearchLookup(new[] { "'S'", "'S", "'" }); + private static readonly Dictionary apostropheEnds = CreateSearchLookup(["'S'", "'S", "'"]); /// /// The set of exceptions that are obeyed prior to any steps being executed. /// private static readonly WordReplacement[] exceptions = - { - new WordReplacement("SKIS", "SKI"), - new WordReplacement("SKIES", "SKY"), - new WordReplacement("DYING", "DIE"), - new WordReplacement("LYING", "LIE"), - new WordReplacement("TYING", "TIE"), - new WordReplacement("IDLY", "IDL"), - new WordReplacement("GENTLY", "GENTL"), - new WordReplacement("UGLY", "UGLI"), - new WordReplacement("EARLY", "EARLI"), - new WordReplacement("ONLY", "ONLI"), - new WordReplacement("SINGLY", "SINGL"), - new WordReplacement("SKY"), - new WordReplacement("NEWS"), - new WordReplacement("HOWE"), - new WordReplacement("ATLAS"), - new WordReplacement("COSMOS"), - new WordReplacement("BIAS"), - new WordReplacement("ANDES") - }; + [ + new("SKIS", "SKI"), + new("SKIES", "SKY"), + new("DYING", "DIE"), + new("LYING", "LIE"), + new("TYING", "TIE"), + new("IDLY", "IDL"), + new("GENTLY", "GENTL"), + new("UGLY", "UGLI"), + new("EARLY", "EARLI"), + new("ONLY", "ONLI"), + new("SINGLY", "SINGL"), + new("SKY"), + new("NEWS"), + new("HOWE"), + new("ATLAS"), + new("COSMOS"), + new("BIAS"), + new("ANDES") + ]; /// /// The set of exceptions that are obeyed between steps 1A and 1B. /// private static readonly WordReplacement[] exceptions2 = - { - new WordReplacement("INNING"), - new WordReplacement("OUTING"), - new WordReplacement("CANNING"), - new WordReplacement("HERRING"), - new WordReplacement("EARRING"), - new WordReplacement("PROCEED"), - new WordReplacement("EXCEED"), - new WordReplacement("SUCCEED") - }; + [ + new("INNING"), + new("OUTING"), + new("CANNING"), + new("HERRING"), + new("EARRING"), + new("PROCEED"), + new("EXCEED"), + new("SUCCEED") + ]; /// /// The list of endings need to have an "e" appended to the word during step 1b. /// - private static readonly Dictionary step1bAppendEEndings = CreateSearchLookup(new[] { "AT", "BL", "IZ" }); + private static readonly Dictionary step1bAppendEEndings = CreateSearchLookup(["AT", "BL", "IZ"]); /// /// The replacements that can be made in step 1B. /// private static readonly FullTextIndex step1bReplacements = CreateReplacementLookup( - new[] + new WordReplacement[] { - new WordReplacement("EEDLY", 3), - new WordReplacement("INGLY", 5), - new WordReplacement("EDLY", 4), - new WordReplacement("EED", 1), - new WordReplacement("ING", 3), - new WordReplacement("ED", 2) + new("EEDLY", 3), + new("INGLY", 5), + new("EDLY", 4), + new("EED", 1), + new("ING", 3), + new("ED", 2) }); /// /// The replacements that can be made in step 2. /// private static readonly FullTextIndex step2Replacements = CreateReplacementLookup( - new[] + new WordReplacement[] { - new WordReplacement("IZATION", "IZE"), - new WordReplacement("IVENESS", 4), - new WordReplacement("FULNESS", 4), - new WordReplacement("ATIONAL", "ATE"), - new WordReplacement("OUSNESS", 4), - new WordReplacement("BILITI", "BLE"), - new WordReplacement("TIONAL", 2), - new WordReplacement("LESSLI", 2), - new WordReplacement("FULLI", 2), - new WordReplacement("ENTLI", 2), - new WordReplacement("ATION", "ATE"), - new WordReplacement("ALITI", 3), - new WordReplacement("IVITI", "IVE"), - new WordReplacement("OUSLI", 2), - new WordReplacement("ALISM", 3), - new WordReplacement("ABLI", "ABLE"), - new WordReplacement("ANCI", "ANCE"), - new WordReplacement("ALLI", 2), - new WordReplacement("IZER", 1), - new WordReplacement("ENCI", "ENCE"), - new WordReplacement("ATOR", "ATE"), - new WordReplacement("BLI", "BLE"), - new WordReplacement("OGI", 1), - new WordReplacement("LI", 2) + new("IZATION", "IZE"), + new("IVENESS", 4), + new("FULNESS", 4), + new("ATIONAL", "ATE"), + new("OUSNESS", 4), + new("BILITI", "BLE"), + new("TIONAL", 2), + new("LESSLI", 2), + new("FULLI", 2), + new("ENTLI", 2), + new("ATION", "ATE"), + new("ALITI", 3), + new("IVITI", "IVE"), + new("OUSLI", 2), + new("ALISM", 3), + new("ABLI", "ABLE"), + new("ANCI", "ANCE"), + new("ALLI", 2), + new("IZER", 1), + new("ENCI", "ENCE"), + new("ATOR", "ATE"), + new("BLI", "BLE"), + new("OGI", 1), + new("LI", 2) }); /// /// The replacements that can be made in step 3. /// private static readonly FullTextIndex step3Replacements = CreateReplacementLookup( - new[] + new WordReplacement[] { - new WordReplacement("ATIONAL", "ATE"), - new WordReplacement("TIONAL", 2), - new WordReplacement("ALIZE", 3), - new WordReplacement("ICATE", 3), - new WordReplacement("ICITI", 3), - new WordReplacement("ATIVE", 5), - new WordReplacement("ICAL", 2), - new WordReplacement("NESS", 4), - new WordReplacement("FUL", 3) + new("ATIONAL", "ATE"), + new("TIONAL", 2), + new("ALIZE", 3), + new("ICATE", 3), + new("ICITI", 3), + new("ATIVE", 5), + new("ICAL", 2), + new("NESS", 4), + new("FUL", 3) }); /// /// The replacements that can be made in step 4. /// private static readonly FullTextIndex step4Replacements = CreateReplacementLookup( - new[] + new WordReplacement[] { - new WordReplacement("EMENT", 5), - new WordReplacement("MENT", 4), - new WordReplacement("ABLE", 4), - new WordReplacement("IBLE", 4), - new WordReplacement("ANCE", 4), - new WordReplacement("ENCE", 4), - new WordReplacement("ATE", 3), - new WordReplacement("ITI", 3), - new WordReplacement("ION", 3), - new WordReplacement("IZE", 3), - new WordReplacement("IVE", 3), - new WordReplacement("OUS", 3), - new WordReplacement("ANT", 3), - new WordReplacement("ISM", 3), - new WordReplacement("ENT", 3), - new WordReplacement("AL", 2), - new WordReplacement("ER", 2), - new WordReplacement("IC", 2) + new("EMENT", 5), + new("MENT", 4), + new("ABLE", 4), + new("IBLE", 4), + new("ANCE", 4), + new("ENCE", 4), + new("ATE", 3), + new("ITI", 3), + new("ION", 3), + new("IZE", 3), + new("IVE", 3), + new("OUS", 3), + new("ANT", 3), + new("ISM", 3), + new("ENT", 3), + new("AL", 2), + new("ER", 2), + new("IC", 2) }); + + /// + public bool RequiresCaseInsensitivity => true; + + /// + public bool RequiresAccentInsensitivity => true; /// public void Stem(StringBuilder builder) diff --git a/src/Lifti.Core/Tokenization/Stemming/StringBuilderExtensions.cs b/src/Lifti.Core/Tokenization/Stemming/StringBuilderExtensions.cs index 0d14c9b3..957d50e3 100644 --- a/src/Lifti.Core/Tokenization/Stemming/StringBuilderExtensions.cs +++ b/src/Lifti.Core/Tokenization/Stemming/StringBuilderExtensions.cs @@ -1,10 +1,9 @@ -namespace Lifti.Tokenization.Stemming -{ - using Lifti.Querying; - using System; - using System.Collections.Generic; - using System.Text; +using Lifti.Querying; +using System.Collections.Generic; +using System.Text; +namespace Lifti.Tokenization.Stemming +{ /// /// Extensions for the StringBuilder class to help with the Porter stemming code. /// @@ -20,18 +19,11 @@ internal static class StringBuilderExtensions /// public static bool IsVowel(this StringBuilder builder, int index) { - switch (builder[index]) + return builder[index] switch { - case 'A': - case 'I': - case 'E': - case 'O': - case 'U': - case 'Y': - return true; - default: - return false; - } + 'A' or 'I' or 'E' or 'O' or 'U' or 'Y' => true, + _ => false, + }; } /// @@ -156,29 +148,27 @@ public static WordReplacement EndsWith(this StringBuilder builder, IFullTextInde var length = builder.Length; if (length > 3) { - using (var navigator = replacementSetLookup.Snapshot.CreateNavigator()) + using var navigator = replacementSetLookup.Snapshot.CreateNavigator(); + if (navigator.Process(builder[builder.Length - 1])) { - if (navigator.Process(builder[builder.Length - 1])) + var bestMatch = IntermediateQueryResult.Empty; + for (var i = builder.Length - 2; i >= 0; i--) { - var bestMatch = IntermediateQueryResult.Empty; - for (var i = builder.Length - 2; i >= 0; i--) + if (!navigator.Process(builder[i])) { - if (!navigator.Process(builder[i])) - { - break; - } - - if (navigator.HasExactMatches) - { - bestMatch = navigator.GetExactMatches(); - } + break; } - if (bestMatch.Matches.Count > 0) + if (navigator.HasExactMatches) { - return replacementSetLookup.Items.GetMetadata(bestMatch.Matches[0].ItemId).Item; + bestMatch = navigator.GetExactMatches(QueryContext.Empty); } } + + if (bestMatch.Matches.Count > 0) + { + return replacementSetLookup.Metadata.GetDocumentMetadata(bestMatch.Matches[0].DocumentId).Key; + } } } diff --git a/src/Lifti.Core/Tokenization/Thesaurus.cs b/src/Lifti.Core/Tokenization/Thesaurus.cs index 1a156daf..f54fc1d4 100644 --- a/src/Lifti.Core/Tokenization/Thesaurus.cs +++ b/src/Lifti.Core/Tokenization/Thesaurus.cs @@ -9,7 +9,7 @@ public Thesaurus(Dictionary> wordLookup) this.WordLookup = wordLookup; } - public static IThesaurus Empty { get; } = new Thesaurus(new Dictionary>()); + public static IThesaurus Empty { get; } = new Thesaurus([]); public Dictionary> WordLookup { get; } @@ -20,8 +20,9 @@ public IEnumerable Process(Token token) // Return a token match at all the locations for each // synonym. This list will include the original token text as well foreach (string alternative in alternatives) - { - yield return new Token(alternative, token.Locations); + { + // We can share the exact locations list here. + yield return token.WithAlternativeValue(alternative); } } else diff --git a/src/Lifti.Core/Tokenization/Token.cs b/src/Lifti.Core/Tokenization/Token.cs index 90ba161d..dc09e13c 100644 --- a/src/Lifti.Core/Tokenization/Token.cs +++ b/src/Lifti.Core/Tokenization/Token.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; namespace Lifti.Tokenization { @@ -14,27 +15,31 @@ public class Token /// public Token(string token, TokenLocation location) { - this.locations = new List { location }; + this.locations = [location]; this.Value = token; } /// /// Initializes a new instance of the class. /// - public Token(string token, params TokenLocation[] locations) + public Token(string token, params TokenLocation[] locations) + : this(token, new List(locations)) { - this.locations = new List(locations); - this.Value = token; } /// /// Initializes a new instance of the class. /// - public Token(string token, IReadOnlyList locations) + public Token(string token, IReadOnlyList locations) + : this(token, new List(locations)) { - this.locations = new List(locations); - this.Value = token; } + + private Token(string token, List locations) + { + this.Value = token; + this.locations = locations; + } /// /// Gets the locations at which the token was located in the document. @@ -52,6 +57,15 @@ public Token(string token, IReadOnlyList locations) public void AddLocation(TokenLocation location) { this.locations.Add(location); - } + } + + /// + /// Creates a copy of the token with the same locations, but with the specified alternative value. + /// This avoids having to create a new list of locations for synonyms. + /// + internal Token WithAlternativeValue(string alternativeValue) + { + return new Token(alternativeValue, this.locations); + } } } diff --git a/src/Lifti.Core/Tokenization/TokenExtensions.cs b/src/Lifti.Core/Tokenization/TokenExtensions.cs new file mode 100644 index 00000000..7c315af6 --- /dev/null +++ b/src/Lifti.Core/Tokenization/TokenExtensions.cs @@ -0,0 +1,18 @@ +using System.Collections.Generic; + +namespace Lifti.Tokenization +{ + internal static class TokenExtensions + { + internal static int CalculateTotalTokenCount(this IList tokens) + { + var totalCount = 0; + for (var i = 0; i < tokens.Count; i++) + { + totalCount += tokens[i].Locations.Count; + } + + return totalCount; + } + } +} diff --git a/src/Lifti.Core/Tokenization/TokenStore.cs b/src/Lifti.Core/Tokenization/TokenStore.cs index f24e6387..27f4c6c8 100644 --- a/src/Lifti.Core/Tokenization/TokenStore.cs +++ b/src/Lifti.Core/Tokenization/TokenStore.cs @@ -9,7 +9,7 @@ namespace Lifti.Tokenization /// internal class TokenStore { - private readonly Dictionary materializedTokens = new(); + private readonly Dictionary materializedTokens = []; /// /// Captures a token at a location, merging the token with any locations diff --git a/src/Lifti.Core/TokenizationOptions.cs b/src/Lifti.Core/TokenizationOptions.cs index 36d7e581..55ff1841 100644 --- a/src/Lifti.Core/TokenizationOptions.cs +++ b/src/Lifti.Core/TokenizationOptions.cs @@ -41,7 +41,7 @@ internal TokenizationOptions() /// public bool CaseInsensitive { - get => this.caseInsensitive || this.Stemming; + get => this.caseInsensitive || (this.Stemmer?.RequiresCaseInsensitivity ?? false); internal set => this.caseInsensitive = value; } @@ -50,7 +50,7 @@ public bool CaseInsensitive /// public bool AccentInsensitive { - get => this.accentInsensitive || this.Stemming; + get => this.accentInsensitive || (this.Stemmer?.RequiresAccentInsensitivity ?? false); internal set => this.accentInsensitive = value; } @@ -59,7 +59,7 @@ public bool AccentInsensitive /// Gets a value indicating whether word stemming should be applied when tokenizing. Setting this value to true /// forces both and to be true. /// - public bool Stemming { get; internal set; } + public IStemmer? Stemmer { get; internal set; } /// /// Gets the set of characters that should be ignored in any input. diff --git a/src/Lifti.Core/TokenizerBuilder.cs b/src/Lifti.Core/TokenizerBuilder.cs index 97c59cf1..fe72c730 100644 --- a/src/Lifti.Core/TokenizerBuilder.cs +++ b/src/Lifti.Core/TokenizerBuilder.cs @@ -1,4 +1,5 @@ using Lifti.Tokenization; +using Lifti.Tokenization.Stemming; using System; namespace Lifti @@ -13,7 +14,7 @@ public class TokenizerBuilder private bool splitOnPunctuation = true; private bool accentInsensitive = true; private bool caseInsensitive = true; - private bool stemming; + private IStemmer? stemmer; private char[]? additionalSplitCharacters; private Func factory = defaultTokenizerFactory; private char[]? ignoreCharacters; @@ -37,9 +38,9 @@ public TokenizerBuilder WithFactory(Func t /// be suppressed by passing false to this method, in which case only characters explicitly specified /// using will be treated as word breaks. /// - public TokenizerBuilder SplitOnPunctuation(bool splitOnPunctionation = true) + public TokenizerBuilder SplitOnPunctuation(bool splitOnPunctuation = true) { - this.splitOnPunctuation = splitOnPunctionation; + this.splitOnPunctuation = splitOnPunctuation; return this; } @@ -65,15 +66,38 @@ public TokenizerBuilder AccentInsensitive(bool accentInsensitive = true) return this; } + /// + [Obsolete("Use WithStemming() instead.")] +#pragma warning disable RS0027 // API with optional parameter(s) should have the most parameters amongst its public overloads + public TokenizerBuilder WithStemming(bool stemming = true) +#pragma warning restore RS0027 // API with optional parameter(s) should have the most parameters amongst its public overloads + { + this.stemmer = new PorterStemmer(); + return this; + } + /// - /// Configures the tokenizer to apply word stemming, e.g. de-pluralizing and stripping - /// endings such as ING from words. Enabling this will cause both case and accent - /// insensitivity to be applied. - /// - public TokenizerBuilder WithStemming(bool stemming = true) + /// Configures the tokenizer to apply word stemming using the default English Porter Stemmer implementation. + /// Used to reduce English words to a common root form, i.e. de-pluralizing and stripping endings such as ING from words. + /// Enabling this will cause both case and accent insensitivity to be applied. + /// + public TokenizerBuilder WithStemming() { - this.stemming = stemming; + this.stemmer = new PorterStemmer(); return this; + } + + /// + /// Configures the tokenizer to apply word stemming using the specified stemmer. Depending on the + /// and properties of the stemmer, accent and case insensitivity may be applied to the index. + /// + /// + /// The stemmer to use. + /// + public TokenizerBuilder WithStemming(IStemmer stemmer) + { + this.stemmer = stemmer; + return this; } /// @@ -112,7 +136,7 @@ public IIndexTokenizer Build() SplitOnPunctuation = this.splitOnPunctuation, AccentInsensitive = this.accentInsensitive, CaseInsensitive = this.caseInsensitive, - Stemming = this.stemming + Stemmer = this.stemmer }; if (this.ignoreCharacters != null) diff --git a/src/Lifti.Core/VirtualString.cs b/src/Lifti.Core/VirtualString.cs index a6c7a24c..2f438ee0 100644 --- a/src/Lifti.Core/VirtualString.cs +++ b/src/Lifti.Core/VirtualString.cs @@ -28,11 +28,14 @@ public string Substring(int start, int length) return string.Empty; } +#if NET8_0_OR_GREATER + ArgumentOutOfRangeException.ThrowIfNegative(start); +#else if (start < 0) { throw new ArgumentOutOfRangeException(nameof(start)); } - +#endif if (start + length > this.length) { // Pin the requested length to the maximum length it can be diff --git a/test/Lifti.Tests/ChildNodeMapTests.cs b/test/Lifti.Tests/ChildNodeMapTests.cs new file mode 100644 index 00000000..c0c83275 --- /dev/null +++ b/test/Lifti.Tests/ChildNodeMapTests.cs @@ -0,0 +1,83 @@ +using FluentAssertions; +using System; +using Xunit; + +namespace Lifti.Tests +{ + public class ChildNodeMapTests + { + [Fact] + public void TryGetValue_WhenNoCharacters_ReturnsFalse() + { + var sut = new ChildNodeMap(); + sut.TryGetValue('A', out var nextNode).Should().BeFalse(); + } + + [Fact] + public void TryGetValue_WithSingleMatchingCharacter_ReturnsMatch() + { + var sut = new ChildNodeMap([CreateTestIndexNodeMap('A')]); + + this.VerifySuccessfulMatch(sut, 'A'); + this.VerifyUnsuccessfulMatch(sut, 'B'); + this.VerifyUnsuccessfulMatch(sut, 'a'); + } + + [Fact] + public void TryGetValue_WithTwoCharacters_ReturnsMatch() + { + var sut = new ChildNodeMap( + [ + CreateTestIndexNodeMap('A'), + CreateTestIndexNodeMap('Z') + ]); + + this.VerifySuccessfulMatch(sut, 'A'); + this.VerifySuccessfulMatch(sut, 'Z'); + this.VerifyUnsuccessfulMatch(sut, 'B'); + this.VerifyUnsuccessfulMatch(sut, 'a'); + } + + [Fact] + public void TryGetValue_WithFiveCharacters_ReturnsMatch() + { + var sut = new ChildNodeMap( + [ + CreateTestIndexNodeMap('E'), + CreateTestIndexNodeMap('H'), + CreateTestIndexNodeMap('L'), + CreateTestIndexNodeMap('N'), + CreateTestIndexNodeMap('P') + ]); + + this.VerifySuccessfulMatch(sut, 'E'); + this.VerifySuccessfulMatch(sut, 'H'); + this.VerifySuccessfulMatch(sut, 'L'); + this.VerifySuccessfulMatch(sut, 'N'); + this.VerifySuccessfulMatch(sut, 'P'); + this.VerifyUnsuccessfulMatch(sut, 'M'); + this.VerifyUnsuccessfulMatch(sut, 'B'); + this.VerifyUnsuccessfulMatch(sut, 'Z'); + this.VerifyUnsuccessfulMatch(sut, 'a'); + this.VerifyUnsuccessfulMatch(sut, 'e'); + } + + private void VerifySuccessfulMatch(ChildNodeMap sut, char character) + { + sut.TryGetValue(character, out var nextNode).Should().BeTrue(); + nextNode!.IntraNodeText.ToString().Should().BeEquivalentTo(character.ToString()); + } + + private void VerifyUnsuccessfulMatch(ChildNodeMap sut, char character) + { + sut.TryGetValue(character, out var nextNode).Should().BeFalse(); + } + + private static ChildNodeMapEntry CreateTestIndexNodeMap(char character) + { + return new( + character, + new IndexNode(character.ToString().AsMemory(), new ChildNodeMap(), new DocumentTokenMatchMap())); + } + } +} diff --git a/test/Lifti.Tests/Fakes/FakeFieldScoreBoostProvider.cs b/test/Lifti.Tests/Fakes/FakeFieldScoreBoostProvider.cs new file mode 100644 index 00000000..97c2040e --- /dev/null +++ b/test/Lifti.Tests/Fakes/FakeFieldScoreBoostProvider.cs @@ -0,0 +1,26 @@ +using Lifti.Querying; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti.Tests.Fakes +{ + internal class FakeFieldScoreBoostProvider : IFieldScoreBoostProvider + { + private Dictionary boostOverrides; + + public FakeFieldScoreBoostProvider(params (byte fieldId, double scoreBoost)[] boostOverrides) + { + this.boostOverrides = boostOverrides.ToDictionary(x => x.fieldId, x => x.scoreBoost); + } + + public double GetScoreBoost(byte fieldId) + { + if (boostOverrides.TryGetValue(fieldId, out var boost)) + { + return boost; + } + + return 1D; + } + } +} diff --git a/test/Lifti.Tests/Fakes/FakeIndexMetadata.cs b/test/Lifti.Tests/Fakes/FakeIndexMetadata.cs new file mode 100644 index 00000000..14b8bb95 --- /dev/null +++ b/test/Lifti.Tests/Fakes/FakeIndexMetadata.cs @@ -0,0 +1,61 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Lifti.Tests.Fakes +{ + internal class FakeIndexMetadata : IIndexMetadata + { + private readonly Dictionary> documentMetadata; + private readonly Dictionary> objectTypeMetadata; + + public FakeIndexMetadata( + int count, + IndexStatistics? statistics = null, + (int documentId, DocumentMetadata statistics)[]? documentMetadata = null, + (byte objectTypeId, Func scoreProvider)[]? objectTypeMetadata = null) + { + this.DocumentCount = count; + this.IndexStatistics = statistics ?? new IndexStatistics(); + this.documentMetadata = (documentMetadata ?? []).ToDictionary(i => i.documentId, i => i.statistics); + this.objectTypeMetadata = (objectTypeMetadata ?? []).ToDictionary(i => i.objectTypeId, i => i.scoreProvider); + } + + public int DocumentCount { get; private set; } + + public IndexStatistics IndexStatistics { get; private set; } + + public int Count => this.DocumentCount; + + public DocumentMetadata GetDocumentMetadata(int documentId) + { + return this.documentMetadata[documentId]; + } + + public DocumentMetadata GetMetadata(int documentId) + { + return this.GetDocumentMetadata(documentId); + } + + public ScoreBoostMetadata GetObjectTypeScoreBoostMetadata(byte objectTypeId) + { + return new FakeScoreBoostMetadata(this.objectTypeMetadata[objectTypeId]); + } + + private class FakeScoreBoostMetadata : ScoreBoostMetadata + { + private readonly Func scoreBoostCalculator; + + public FakeScoreBoostMetadata(Func func) + : base(null!) + { + this.scoreBoostCalculator = func; + } + + public override double CalculateScoreBoost(DocumentMetadata documentMetadata) + { + return this.scoreBoostCalculator(documentMetadata); + } + } + } +} diff --git a/test/Lifti.Tests/Fakes/FakeIndexSnapshot.cs b/test/Lifti.Tests/Fakes/FakeIndexSnapshot.cs new file mode 100644 index 00000000..3c1fa515 --- /dev/null +++ b/test/Lifti.Tests/Fakes/FakeIndexSnapshot.cs @@ -0,0 +1,25 @@ +using Lifti.Querying; + +namespace Lifti.Tests.Fakes +{ + internal class FakeIndexSnapshot : IIndexSnapshot + { + public FakeIndexSnapshot(IIndexMetadata indexMetadata) + { + this.Metadata = indexMetadata; + } + + public IndexNode Root => throw new System.NotImplementedException(); + + public IIndexedFieldLookup FieldLookup => throw new System.NotImplementedException(); + + public IIndexMetadata Items => this.Metadata; + + public IIndexMetadata Metadata { get; private set; } + + public IIndexNavigator CreateNavigator() + { + throw new System.NotImplementedException(); + } + } +} diff --git a/test/Lifti.Tests/Fakes/FakeItemStore.cs b/test/Lifti.Tests/Fakes/FakeItemStore.cs deleted file mode 100644 index 5b17e672..00000000 --- a/test/Lifti.Tests/Fakes/FakeItemStore.cs +++ /dev/null @@ -1,26 +0,0 @@ -using System.Collections.Generic; -using System.Linq; - -namespace Lifti.Tests.Fakes -{ - internal class FakeItemStore : IItemStore - { - private Dictionary> itemMetadata; - - public FakeItemStore(int count, IndexStatistics statistics, params (int id, ItemMetadata statistics)[] itemMetadata) - { - this.Count = count; - this.IndexStatistics = statistics; - this.itemMetadata = itemMetadata.ToDictionary(i => i.id, i => i.statistics); - } - - public int Count { get; private set; } - - public IndexStatistics IndexStatistics { get; private set; } - - public IItemMetadata GetMetadata(int id) - { - return this.itemMetadata[id]; - } - } -} diff --git a/test/Lifti.Tests/Fakes/FakeQueryContext.cs b/test/Lifti.Tests/Fakes/FakeQueryContext.cs deleted file mode 100644 index a9786c6f..00000000 --- a/test/Lifti.Tests/Fakes/FakeQueryContext.cs +++ /dev/null @@ -1,20 +0,0 @@ -using Lifti.Querying; -using Lifti.Querying.QueryParts; - -namespace Lifti.Tests.Fakes -{ - public class FakeQueryContext : IQueryContext - { - private readonly IntermediateQueryResult result; - - public FakeQueryContext(IntermediateQueryResult result) - { - this.result = result; - } - - public IntermediateQueryResult ApplyTo(IntermediateQueryResult intermediateQueryResult) - { - return this.result; - } - } -} diff --git a/test/Lifti.Tests/Fakes/FakeQueryParser.cs b/test/Lifti.Tests/Fakes/FakeQueryParser.cs index 6b944483..e8bd0ce9 100644 --- a/test/Lifti.Tests/Fakes/FakeQueryParser.cs +++ b/test/Lifti.Tests/Fakes/FakeQueryParser.cs @@ -12,7 +12,7 @@ public FakeQueryParser(IQuery returnedQuery) this.returnedQuery = returnedQuery; } - public List ParsedQueries { get; } = new List(); + public List ParsedQueries { get; } = []; public IQuery Parse(IIndexedFieldLookup fieldLookup, string queryText, IIndexTokenizerProvider tokenizerProvider) { diff --git a/test/Lifti.Tests/Fakes/FakeScorer.cs b/test/Lifti.Tests/Fakes/FakeScorer.cs index fc4d43cd..224eca4a 100644 --- a/test/Lifti.Tests/Fakes/FakeScorer.cs +++ b/test/Lifti.Tests/Fakes/FakeScorer.cs @@ -1,6 +1,5 @@ using Lifti.Querying; using System.Collections.Generic; -using System.Linq; namespace Lifti.Tests.Fakes { @@ -13,11 +12,9 @@ public FakeScorer(double score) this.score = score; } - public IReadOnlyList Score(IReadOnlyList tokens, double weighting) + public double CalculateScore(int totalMatchedDocuments, int documentId, byte fieldId, IReadOnlyList tokenLocations, double weighting) { - return tokens.Select(m => new ScoredToken( - m.ItemId, - m.FieldMatches.Select(fm => new ScoredFieldMatch(this.score, fm)).ToList())).ToList(); + return this.score; } } } diff --git a/test/Lifti.Tests/Fakes/FakeTokenLocationMatch.cs b/test/Lifti.Tests/Fakes/FakeTokenLocationMatch.cs deleted file mode 100644 index f1cd2ac6..00000000 --- a/test/Lifti.Tests/Fakes/FakeTokenLocationMatch.cs +++ /dev/null @@ -1,26 +0,0 @@ -using Lifti.Querying; -using System.Collections.Generic; - -namespace Lifti.Tests.Fakes -{ - public class FakeTokenLocationMatch : ITokenLocationMatch - { - private readonly TokenLocation[] locations; - - public FakeTokenLocationMatch(int minTokenIndex, int maxTokenIndex, params TokenLocation[] locations) - { - this.MaxTokenIndex = maxTokenIndex; - this.MinTokenIndex = minTokenIndex; - this.locations = locations; - } - - public int MaxTokenIndex { get; private set; } - - public int MinTokenIndex { get; private set; } - - public IEnumerable GetLocations() - { - return this.locations; - } - } -} diff --git a/test/Lifti.Tests/FullTextIndexBuilderTests.cs b/test/Lifti.Tests/FullTextIndexBuilderTests.cs index 2d4e7500..eda94dcd 100644 --- a/test/Lifti.Tests/FullTextIndexBuilderTests.cs +++ b/test/Lifti.Tests/FullTextIndexBuilderTests.cs @@ -39,6 +39,25 @@ public void WithObjectConfiguration_ShouldConstructIndexWithSpecifiedConfig() index.FieldLookup.GetFieldInfo("Title").Id.Should().Be(3); } + [Fact] + public void WithObjectConfiguration_ShouldAllocateUniqueObjectTypeIds() + { + this.sut.WithObjectTokenization( + o => o + .WithKey(i => i.Id) + .WithField("TextField", i => i.Text)) + .WithObjectTokenization( + o => o + .WithKey(i => i.Id) + .WithField("Content", i => i.Content) + .WithField("Title", i => i.Title)); + + var index = this.sut.Build(); + + index.ObjectTypeConfiguration.AllConfigurations.Select(x => x.Id) + .Should().BeEquivalentTo(new[] { 1, 2 }); + } + [Fact] public void WithObjectConfiguration_ShouldUseDefaultTokenizationOptionsIfNotProvided() { @@ -102,7 +121,7 @@ public void WithObjectConfiguration_ShouldUseDefaultThesaurusIfNotProvided() [Fact] public void WithConfiguredExplicitFuzzyMatchParameters_ShouldPassParametersToConstructedQueryParsers() { - var passedOptions = BuildSutAndGetPassedOptions(o => o.WithFuzzySearchDefaults(10, 4)); + var passedOptions = this.BuildSutAndGetPassedOptions(o => o.WithFuzzySearchDefaults(10, 4)); passedOptions.Should().NotBeNull(); passedOptions!.FuzzySearchMaxEditDistance(1000).Should().Be(10); @@ -112,7 +131,7 @@ public void WithConfiguredExplicitFuzzyMatchParameters_ShouldPassParametersToCon [Fact] public void WithNoDefaultJoiningOperatorConfigured_ShouldPassAndOperatorToIndex() { - var passedOptions = BuildSutAndGetPassedOptions(o => o); + var passedOptions = this.BuildSutAndGetPassedOptions(o => o); passedOptions.Should().NotBeNull(); passedOptions!.DefaultJoiningOperator.Should().Be(QueryTermJoinOperatorKind.And); } @@ -120,7 +139,7 @@ public void WithNoDefaultJoiningOperatorConfigured_ShouldPassAndOperatorToIndex( [Fact] public void WithDefaultJoiningOperatorConfigured_ShouldPassDefaultOperatorToIndex() { - var passedOptions = BuildSutAndGetPassedOptions(o => o.WithDefaultJoiningOperator(QueryTermJoinOperatorKind.Or)); + var passedOptions = this.BuildSutAndGetPassedOptions(o => o.WithDefaultJoiningOperator(QueryTermJoinOperatorKind.Or)); passedOptions.Should().NotBeNull(); passedOptions!.DefaultJoiningOperator.Should().Be(QueryTermJoinOperatorKind.Or); } @@ -161,7 +180,7 @@ public void WithCustomQueryParser_ShouldPassCustomImplementationToIndex() index.Search("test").Should().BeEmpty(); - parser.ParsedQueries.Should().BeEquivalentTo(new[] { "test" }); + parser.ParsedQueries.Should().BeEquivalentTo(["test"]); } [Fact] @@ -235,21 +254,21 @@ public async Task WithIndexModifiedActions_ShouldPassActionsToIndex() var action1 = new List(); var action2 = new List(); - this.sut.WithIndexModificationAction(i => action1.Add(i.Items.Count.ToString())) - .WithIndexModificationAction(i => action2.Add(i.Items.Count)); + this.sut.WithIndexModificationAction(i => action1.Add(i.Metadata.DocumentCount.ToString())) + .WithIndexModificationAction(i => action2.Add(i.Metadata.DocumentCount)); var index = this.sut.Build(); await index.AddAsync(9, "Test"); - action1.Should().BeEquivalentTo(new[] { "1" }); + action1.Should().BeEquivalentTo(["1"]); action2.Should().BeEquivalentTo(new[] { 1 }); } [Fact] public async Task WithDuplicateItemKeysThrowingExceptions_ShouldPassOptionToIndex() { - var index = this.sut.WithDuplicateItemBehavior(DuplicateItemBehavior.ThrowException) + var index = this.sut.WithDuplicateKeyBehavior(DuplicateKeyBehavior.ThrowException) .Build(); await index.AddAsync(1, "Test"); @@ -261,7 +280,7 @@ public async Task WithDuplicateItemKeysThrowingExceptions_ShouldPassOptionToInde [Fact] public async Task WithDuplicateItemKeysReplacingItems_ShouldPassOptionToIndex() { - var index = this.sut.WithDuplicateItemBehavior(DuplicateItemBehavior.ReplaceItem) + var index = this.sut.WithDuplicateKeyBehavior(DuplicateKeyBehavior.Replace) .Build(); await index.AddAsync(1, "Test"); diff --git a/test/Lifti.Tests/FullTextIndexTests.cs b/test/Lifti.Tests/FullTextIndexTests.cs index c492eed7..690e2d2c 100644 --- a/test/Lifti.Tests/FullTextIndexTests.cs +++ b/test/Lifti.Tests/FullTextIndexTests.cs @@ -8,14 +8,16 @@ using System.Threading; using System.Threading.Tasks; using Xunit; +using Xunit.Abstractions; namespace Lifti.Tests { public class FullTextIndexTests { private readonly FullTextIndex index; + private readonly ITestOutputHelper testOutput; - public FullTextIndexTests() + public FullTextIndexTests(ITestOutputHelper testOutput) { this.index = new FullTextIndexBuilder() .WithObjectTokenization( @@ -32,6 +34,7 @@ public FullTextIndexTests() .WithField("TextAsync", i => Task.Run(() => i.Text)) .WithField("MultiTextAsync", i => Task.Run(() => (IEnumerable)i.MultiText))) .Build(); + this.testOutput = testOutput; } [Fact] @@ -52,17 +55,17 @@ public async Task SequentialWildcardMatches_ShouldResultInCorrectMatchedPhrases( await index.AddAsync(entry.Item1, entry.Item2); } - index.Search("\"* to *\"").CreateMatchPhrases(x => data.First(d => d.Item1 == x).Item2) + index.Search("\"* to *\"") + .CreateMatchPhrases(x => data.First(d => d.Item1 == x).Item2) .SelectMany(x => x.FieldPhrases.SelectMany(x => x.Phrases)) .Should() .BeEquivalentTo( - new[] - { + [ "seems to some", "seems to be", "seems to me", "ought to work" - }); + ]); } [Fact] @@ -104,8 +107,8 @@ public async Task IndexingItemsAgainstDefaultField_ShouldUpdateTotalTokenCountSt { await this.WithIndexedStringsAsync(); - this.index.Items.IndexStatistics.TotalTokenCount.Should().Be(26); - this.index.Items.IndexStatistics.TokenCountByField.Should().BeEquivalentTo(new Dictionary + this.index.Metadata.IndexStatistics.TotalTokenCount.Should().Be(26); + this.index.Metadata.IndexStatistics.TokenCountByField.Should().BeEquivalentTo(new Dictionary { { 0, 26 } }); @@ -116,8 +119,8 @@ public async Task IndexingItemsAgainstWithMultipleFields_ShouldUpdateTotalTokenC { await this.WithIndexedSingleStringPropertyObjectsAsync(); - this.index.Items.IndexStatistics.TotalTokenCount.Should().Be(22L); - this.index.Items.IndexStatistics.TokenCountByField.Should().BeEquivalentTo(new Dictionary + this.index.Metadata.IndexStatistics.TotalTokenCount.Should().Be(22L); + this.index.Metadata.IndexStatistics.TokenCountByField.Should().BeEquivalentTo(new Dictionary { { 1, 4 }, { 2, 4 }, @@ -241,24 +244,23 @@ public async Task IndexedObjectsShouldBeRetrievableByTextFromAnyIndexedField() [Fact] public async void ObjectsWithMultipleDynamicFieldsShouldGenerateCorrectPrefixedFieldNames() { - var index = await CreateDynamicObjectTestIndex(true); + var index = await CreateDynamicObjectTestIndexAsync(true); index.FieldLookup.AllFieldNames.Should().BeEquivalentTo( - new[] - { + [ "Details", "Dyn1Field1", "Dyn1Field2", "Dyn2Field1", "Dyn2Field2", "Dyn1Field3" - }); + ]); } [Fact] public async void SearchesCanBePerformedForDynamicFieldsWithPrefixes() { - var index = await CreateDynamicObjectTestIndex(true); + var index = await CreateDynamicObjectTestIndexAsync(true); var resultsWithoutFieldFilter = index.Search("Three").ToList(); var resultsWithFieldFilter = index.Search("Dyn1Field3=Three").ToList(); @@ -272,11 +274,10 @@ public async void SearchesCanBePerformedForDynamicFieldsWithPrefixes() public async void ObjectsWithMultipleDynamicFieldsUsingTheSameFieldNamesShouldRaiseError() { var exception = await Assert.ThrowsAsync( - async () => await CreateDynamicObjectTestIndex(false)); - + async () => await CreateDynamicObjectTestIndexAsync(false)); exception.Message.Should().Be( - "A duplicate field \"Field1\" was encountered while indexing item A. Most likely multiple dynamic field providers have been configured " + - "and the same field was produced by more than one of them. Consider using a field prefix when configuring the dynamic fields."); + "A duplicate field \"Field1\" was encountered while indexing the object with key A. Most likely multiple dynamic field providers have been configured and the same " + + "field was produced by more than one of them. Consider using a field prefix when configuring the dynamic fields."); } [Fact] @@ -471,7 +472,7 @@ public async Task RemovingItemAndAddingAgainItTheSameBatch_ShouldResultInTheSame await this.index.CommitBatchChangeAsync(); - this.index.Root.Should().BeEquivalentTo(previousRoot); + this.index.Root.ToString().Should().BeEquivalentTo(previousRoot.ToString()); } [Fact] @@ -505,14 +506,99 @@ public async Task SearchingTheIndex_ShouldNotUseTextExtractor() index.Search("olleh").Should().HaveCount(1); } - private static async Task> CreateDynamicObjectTestIndex(bool usePrefixes = false) + [Fact] + public async Task ScoreBoostingAField_ShouldResultInBoostedSearchResults() + { + var index = new FullTextIndexBuilder() + .WithObjectTokenization( + o => o.WithKey(i => i.Id) + .WithField("Text", i => i.Text, scoreBoost: 10D) + .WithField("MultiText", i => i.MultiText)) + .Build(); + + + await index.AddAsync(new TestObject3("A", "Test Two", "Test One")); + await index.AddAsync(new TestObject3("B", "Test One", "Test Two")); + + // "B" should be returned first, and have 10x the score of "A" because of the 10x boost on the Text field + var results = index.Search("One").ToList(); + results.Select(x => x.Key).Should().BeEquivalentTo(new[] { "B", "A" }, o => o.WithStrictOrdering()); + + results[0].Score.Should().Be(results[1].Score * 10); + } + + [Fact] + public async Task ScoreBoostingADynamicField_ShouldResultInBoostedSearchResults() + { + var index = await CreateDynamicObjectTestIndexAsync(true, dynamicField1ScoreBoost: 10D); + + await index.AddAsync( + new DynamicObject( + "C", + "Text One", + new Dictionary { { "Field1", "Angry" } }, + new ExtraField("Field1", "Happy"))); + + await index.AddAsync( + new DynamicObject( + "D", + "Text One", + new Dictionary { { "Field1", "Happy" } }, + new ExtraField("Field1", "Angry"))); + + // "D" should be returned first, and have 10x the score of "C" because of the 10x boost on the dynamic field + var results = index.Search("Happy").ToList(); + results.Select(x => x.Key).Should().BeEquivalentTo(new[] { "D", "C" }, o => o.WithStrictOrdering()); + } + + [Fact] + public async Task ReadingIndexWhileWritingToIt_ShouldNotResultInErrors() + { + var indexing = true; + + // Set up two tasks - one will add a wikipedia page to the index and sleep for 10ms, the other + // will continually search the index using a full wildcard search. This should never error. + var addTask = Task.Run(async () => + { + var wikipediaTests = WikipediaDataLoader.Load(this.GetType()); + foreach (var (name, text) in wikipediaTests.Take(100)) + { + await this.index.AddAsync(name, text); + } + + // Now remove them again + foreach (var (name, _) in wikipediaTests.Take(100)) + { + await this.index.RemoveAsync(name); + } + + indexing = false; + }); + + var searchTask = Task.Run(async () => + { + while (indexing) + { + await Task.Delay(2); + var results = this.index.Search("*"); + this.testOutput.WriteLine($"Matched {results.Count} results"); + } + }); + + await Task.WhenAll(addTask, searchTask); + } + + private static async Task> CreateDynamicObjectTestIndexAsync( + bool usePrefixes = false, + double dynamicField1ScoreBoost = 1D, + double dynamicField2ScoreBoost = 1D) { var index = new FullTextIndexBuilder() .WithObjectTokenization( o => o.WithKey(i => i.Id) .WithField("Details", i => i.Details) - .WithDynamicFields("Dyn", i => i.DynamicFields, usePrefixes ? "Dyn1" : null) - .WithDynamicFields("Extra", i => i.ExtraFields, x => x.Name, x => x.Value, usePrefixes ? "Dyn2" : null)) + .WithDynamicFields("Dyn", i => i.DynamicFields, usePrefixes ? "Dyn1" : null, scoreBoost: dynamicField1ScoreBoost) + .WithDynamicFields("Extra", i => i.ExtraFields, x => x.Name, x => x.Value, usePrefixes ? "Dyn2" : null, scoreBoost: dynamicField2ScoreBoost)) .Build(); await index.AddAsync( @@ -574,46 +660,16 @@ public TestObject(string id, string text1, string text2, string text3) public string Text3 { get; } } - public class TestObject2 - { - public TestObject2(string id, params string[] text) - { - this.Id = id; - this.Text = text; - } - - public string Id { get; } - public string[] Text { get; } - } - - public class TestObject3 - { - public TestObject3(string id, string text, params string[] multiText) - { - this.Id = id; - this.Text = text; - this.MultiText = multiText; - } + public record TestObject2(string Id, params string[] Text); - public string Id { get; } - public string Text { get; } - public string[] MultiText { get; } - } + public record TestObject3(string Id, string Text, params string[] MultiText); - public class DynamicObject + public class DynamicObject(string id, string details, Dictionary dynamicFields, params FullTextIndexTests.ExtraField[] extraFields) { - public DynamicObject(string id, string details, Dictionary dynamicFields, params ExtraField[] extraFields) - { - this.Id = id; - this.Details = details; - this.DynamicFields = dynamicFields; - this.ExtraFields = extraFields.Length == 0 ? null : extraFields; - } - - public string Id { get; } - public string Details { get; } - public Dictionary DynamicFields { get; } - public ExtraField[]? ExtraFields { get; } + public string Id { get; } = id; + public string Details { get; } = details; + public Dictionary DynamicFields { get; } = dynamicFields; + public ExtraField[]? ExtraFields { get; } = extraFields.Length == 0 ? null : extraFields; } public record ExtraField(string Name, string Value); diff --git a/test/Lifti.Tests/GlobalSuppressions.cs b/test/Lifti.Tests/GlobalSuppressions.cs new file mode 100644 index 00000000..7830b373 --- /dev/null +++ b/test/Lifti.Tests/GlobalSuppressions.cs @@ -0,0 +1,8 @@ +// This file is used by Code Analysis to maintain SuppressMessage +// attributes that are applied to this project. +// Project-level suppressions either have no target or are given +// a specific target and scoped to a namespace, type, member, etc. + +using System.Diagnostics.CodeAnalysis; + +[assembly: SuppressMessage("Performance", "CA1861:Avoid constant arrays as arguments", Justification = "OK in these unit tests")] diff --git a/test/Lifti.Tests/IdPoolTests.cs b/test/Lifti.Tests/IdPoolTests.cs deleted file mode 100644 index 710ded13..00000000 --- a/test/Lifti.Tests/IdPoolTests.cs +++ /dev/null @@ -1,169 +0,0 @@ -using FluentAssertions; -using System.Collections.Immutable; -using System.Linq; -using Xunit; - -namespace Lifti.Tests -{ - public class IdPoolTests - { - private static readonly DocumentStatistics item1DocumentStatistics = DocumentStatistics((1, 100)); - private static readonly DocumentStatistics item2DocumentStatistics = DocumentStatistics((1, 50), (2, 200)); - - private readonly IdPool sut; - private readonly int id1; - private readonly int id2; - - public IdPoolTests() - { - this.sut = new IdPool(); - this.id1 = this.sut.Add("1", item1DocumentStatistics); - this.id2 = this.sut.Add("2", item2DocumentStatistics); - } - - [Fact] - public void Add_ItemOnly_ShouldIncrementIndexStatistics() - { - this.sut.IndexStatistics.Should().BeEquivalentTo(IndexStatistics((1, 150), (2, 200))); - } - - [Fact] - public void Add_ItemOnly_ShouldIncrementWhenNoItemsReturned() - { - this.id1.Should().BeLessThan(this.id2); - } - - [Fact] - public void Add_ItemOnly_ShouldThrowExceptionIfItemAlreadyIndexed() - { - Assert.Throws(() => this.sut.Add("1", DocumentStatistics())) - .Message.Should().Be("Item already indexed"); - } - - [Fact] - public void Add_ItemWithId_ShouldThrowExceptionIfItemAlreadyIndexed() - { - Assert.Throws(() => this.sut.Add(9, "1", DocumentStatistics())) - .Message.Should().Be("Item already indexed"); - } - - [Fact] - public void Add_ItemWithId_ShouldThrowExceptionIfIdAlreadyUsedAlreadyIndexed() - { - Assert.Throws(() => this.sut.Add(1, "9", DocumentStatistics())) - .Message.Should().Be("Id 1 is already registered in the index."); - } - - [Fact] - public void Add_ItemWithId_ShouldAddItemToIndex() - { - var documentStatistics = DocumentStatistics((1, 20), (2, 50), (3, 10)); - this.sut.Add(9, "9", documentStatistics); - this.sut.GetMetadata(9).Should().BeEquivalentTo( - new ItemMetadata( - 9, - "9", - documentStatistics)); - } - - [Fact] - public void Add_ItemWithId_ShouldAdjustIndexStatistics() - { - var documentStatistics = DocumentStatistics((1, 20), (2, 50), (3, 10)); - this.sut.Add(9, "9", documentStatistics); - this.sut.IndexStatistics.Should().BeEquivalentTo( - IndexStatistics((1, 170), (2, 250), (3, 10))); - } - - [Fact] - public void Add_ItemWithId_ShouldResetTheNextIdBasedOnTheHighestIndexedId() - { - this.sut.Add(10, "10", DocumentStatistics((10, 10))); - this.sut.Add(9, "9", DocumentStatistics((9, 9))); - - this.sut.Add("7", DocumentStatistics((7, 7))); - - this.sut.GetIndexedItems().Should().BeEquivalentTo( - new[] - { - new ItemMetadata(0, "1", item1DocumentStatistics), - new ItemMetadata(1, "2", item2DocumentStatistics), - new ItemMetadata(9, "9", DocumentStatistics((9, 9))), - new ItemMetadata(10, "10", DocumentStatistics((10, 10))), - new ItemMetadata(11, "7", DocumentStatistics((7, 7))) - }); - } - - [Fact] - public void Count_ShouldReturnCorrectValue() - { - this.sut.Count.Should().Be(2); - } - - [Fact] - public void GetIndexedItems_ShouldReturnMetadataForAllItemsInTheIndex() - { - this.sut.GetIndexedItems().Should().BeEquivalentTo( - new[] - { - new ItemMetadata(0, "1", item1DocumentStatistics), - new ItemMetadata(1, "2", item2DocumentStatistics) - }); - } - - [Fact] - public void GetMetadataById_ShouldReturnCorrectItemForId() - { - this.sut.GetMetadata(this.id1).Should().BeEquivalentTo(new ItemMetadata(this.id1, "1", item1DocumentStatistics)); - this.sut.GetMetadata(this.id2).Should().BeEquivalentTo(new ItemMetadata(this.id2, "2", item2DocumentStatistics)); - } - - [Fact] - public void GetMetadataById_ShouldThrowExceptionIfItemNotGound() - { - Assert.Throws(() => this.sut.GetMetadata(this.id2 + 1)) - .Message.Should().Be("Item not found"); - } - - [Fact] - public void ReleaseItem_ShouldReturnIdOfReleasedItem() - { - this.sut.ReleaseItem("2").Should().Be(this.id2); - Assert.Throws(() => this.sut.GetMetadata(this.id2)); - } - - [Fact] - public void ReleasedItemId_ShouldBeReusedOnNextCreateId() - { - this.sut.ReleaseItem("1").Should().Be(this.id1); - this.sut.ReleaseItem("2").Should().Be(this.id2); - this.sut.Add("3", DocumentStatistics()).Should().Be(this.id1); - this.sut.Add("4", DocumentStatistics()).Should().Be(this.id2); - this.sut.Add("5", DocumentStatistics()).Should().Be(this.id2 + 1); - } - - [Fact] - public void Contains_WhenItemExists_ShouldReturnTrue() - { - this.sut.Contains("1").Should().BeTrue(); - } - - [Fact] - public void Contains_WhenItemDoesntExist_ShouldReturnFalse() - { - this.sut.Contains("9").Should().BeFalse(); - } - - private static DocumentStatistics DocumentStatistics(params (byte fieldId, int tokenCount)[] fieldWordCounts) - { - return new DocumentStatistics(fieldWordCounts.ToDictionary(f => f.fieldId, f => f.tokenCount)); - } - - private static IndexStatistics IndexStatistics(params (byte fieldId, int wordCount)[] fieldTokenCounts) - { - return new IndexStatistics( - fieldTokenCounts.ToImmutableDictionary(f => f.fieldId, f => (long)f.wordCount), - fieldTokenCounts.Sum(c => c.wordCount)); - } - } -} diff --git a/test/Lifti.Tests/IndexInsertionMutationTests.cs b/test/Lifti.Tests/IndexInsertionMutationTests.cs index 0dea869e..c78b12b4 100644 --- a/test/Lifti.Tests/IndexInsertionMutationTests.cs +++ b/test/Lifti.Tests/IndexInsertionMutationTests.cs @@ -1,4 +1,5 @@ using Lifti.Tokenization; +using System; using Xunit; namespace Lifti.Tests @@ -24,7 +25,7 @@ public void IndexingAtNodeWithSameTextForDifferentItem_ShouldResultInItemsDirect this.Sut.Add(Item2, FieldId1, new Token(word, this.Locations2.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, word, new[] { (Item1, this.Locations1), (Item2, this.Locations2) }); + VerifyResult(result, word, [(Item1, this.Locations1), (Item2, this.Locations2)]); } [Fact] @@ -36,11 +37,11 @@ public void IndexingWordEndingAtSplit_ShouldResultInItemIndexedWhereSplitOccurs( this.Sut.Add(Item4, FieldId1, new Token("a", this.Locations4.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, null, expectedChildNodes: new[] { 'a', 'b' }); - VerifyResult(result, new[] { 'a' }, null, new[] { (Item4, this.Locations4) }, new[] { 'p', 'b' }); - VerifyResult(result, new[] { 'b' }, "anana", new[] { (Item3, this.Locations3) }); - VerifyResult(result, new[] { 'a', 'b' }, "le", new[] { (Item2, this.Locations2) }); - VerifyResult(result, new[] { 'a', 'p' }, "ple", new[] { (Item1, this.Locations1) }); + VerifyResult(result, null, expectedChildNodes: ['a', 'b']); + VerifyResult(result, ['a'], null, new[] { (Item4, this.Locations4) }, ['b', 'p']); + VerifyResult(result, ['b'], "anana", new[] { (Item3, this.Locations3) }); + VerifyResult(result, ['a', 'b'], "le", new[] { (Item2, this.Locations2) }); + VerifyResult(result, ['a', 'p'], "ple", new[] { (Item1, this.Locations1) }); } [Fact] @@ -51,10 +52,10 @@ public void IndexingWhenChildNodeAlreadyExists_ShouldContinueIndexingAtExistingC this.Sut.Add(Item3, FieldId1, new Token("freddy", this.Locations3.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, "fre", expectedChildNodes: new[] { 'e', 'd' }); - VerifyResult(result, new[] { 'e' }, "dom", new[] { (Item1, this.Locations1) }); - VerifyResult(result, new[] { 'd' }, null, new[] { (Item2, this.Locations2) }, new[] { 'd' }); - VerifyResult(result, new[] { 'd', 'd' }, "y", new[] { (Item3, this.Locations3) }); + VerifyResult(result, "fre", expectedChildNodes: ['d', 'e']); + VerifyResult(result, ['e'], "dom", new[] { (Item1, this.Locations1) }); + VerifyResult(result, ['d'], null, new[] { (Item2, this.Locations2) }, ['d']); + VerifyResult(result, ['d', 'd'], "y", new[] { (Item3, this.Locations3) }); } [Fact] @@ -65,9 +66,9 @@ public void IndexingAtNodeWithTextWithSameSuffix_ShouldCreateNewChildNode() this.Sut.Add(Item3, FieldId1, new Token("tester", this.Locations3.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, "test", new[] { (Item1, this.Locations1) }, new[] { 'i', 'e' }); - VerifyResult(result, new[] { 'i' }, "ng", new[] { (Item2, this.Locations2) }); - VerifyResult(result, new[] { 'e' }, "r", new[] { (Item3, this.Locations3) }); + VerifyResult(result, "test", new[] { (Item1, this.Locations1) }, ['e', 'i']); + VerifyResult(result, ['i'], "ng", new[] { (Item2, this.Locations2) }); + VerifyResult(result, ['e'], "r", new[] { (Item3, this.Locations3) }); } [Fact] @@ -78,10 +79,10 @@ public void IndexingAtNodeAlreadySplit_ShouldMaintainMatchesAtFirstSplitNode() this.Sut.Add(Item3, FieldId1, new Token("brokerage", this.Locations3.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, "broke", expectedChildNodes: new[] { 'r', 'n' }); - VerifyResult(result, new[] { 'r' }, "", new[] { (Item1, this.Locations1) }, new[] { 'a' }); - VerifyResult(result, new[] { 'n' }, "", new[] { (Item2, this.Locations2) }); - VerifyResult(result, new[] { 'r', 'a' }, "ge", new[] { (Item3, this.Locations3) }); + VerifyResult(result, "broke", expectedChildNodes: ['n', 'r']); + VerifyResult(result, ['r'], "", new[] { (Item1, this.Locations1) }, ['a']); + VerifyResult(result, ['n'], "", new[] { (Item2, this.Locations2) }); + VerifyResult(result, ['r', 'a'], "ge", new[] { (Item3, this.Locations3) }); } [Theory] @@ -100,9 +101,14 @@ public void IndexingAtNodeAlreadySplit_ShouldMaintainMatchesAtFirstSplitNode() this.Sut.Add(Item2, FieldId1, new Token(indexText, this.Locations2.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, remainingIntraText, expectedChildNodes: new[] { originalSplitChar, newSplitChar }); - VerifyResult(result, new[] { originalSplitChar }, splitIntraText, new[] { (Item1, this.Locations1) }); - VerifyResult(result, new[] { newSplitChar }, newIntraText, new[] { (Item2, this.Locations2) }); + var expectedChildNodes = new[] { originalSplitChar, newSplitChar }; + + // The order of child nodes *must* be ascending, so we'll order the array + Array.Sort(expectedChildNodes); + + VerifyResult(result, remainingIntraText, expectedChildNodes: expectedChildNodes); + VerifyResult(result, [originalSplitChar], splitIntraText, new[] { (Item1, this.Locations1) }); + VerifyResult(result, [newSplitChar], newIntraText, new[] { (Item2, this.Locations2) }); } [Fact] @@ -112,8 +118,8 @@ public void IndexingAtNodeCausingSplitAtMiddleOfIntraNodeText_ShouldPlaceMatchAt this.Sut.Add(Item2, FieldId1, new Token("NOITA", this.Locations2.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, "NOITA", new[] { (Item2, this.Locations2) }, expectedChildNodes: new[] { 'Z' }); - VerifyResult(result, new[] { 'Z' }, "I", new[] { (Item1, this.Locations1) }); + VerifyResult(result, "NOITA", new[] { (Item2, this.Locations2) }, expectedChildNodes: ['Z']); + VerifyResult(result, ['Z'], "I", new[] { (Item1, this.Locations1) }); } [Fact] @@ -124,10 +130,10 @@ public void IndexingAtNodeCausingSplitAtStartOfIntraNodeText_ShouldReturnInEntry this.Sut.Add(Item3, FieldId1, new Token("w3", this.Locations3.Locations)); var result = this.Sut.Apply(); - VerifyResult(result, "w", expectedChildNodes: new[] { 'w', '3' }); - VerifyResult(result, new[] { 'w' }, "w", new[] { (Item1, this.Locations1) }); - VerifyResult(result, new[] { '3' }, null, new[] { (Item3, this.Locations3) }, new[] { 'c' }); - VerifyResult(result, new[] { '3', 'c' }, null, new[] { (Item2, this.Locations2) }); - } + VerifyResult(result, "w", expectedChildNodes: ['3', 'w']); + VerifyResult(result, ['w'], "w", new[] { (Item1, this.Locations1) }); + VerifyResult(result, ['3'], null, new[] { (Item3, this.Locations3) }, ['c']); + VerifyResult(result, ['3', 'c'], null, new[] { (Item2, this.Locations2) }); + } } } diff --git a/test/Lifti.Tests/IndexMetadataTests.cs b/test/Lifti.Tests/IndexMetadataTests.cs new file mode 100644 index 00000000..5a98c2f6 --- /dev/null +++ b/test/Lifti.Tests/IndexMetadataTests.cs @@ -0,0 +1,197 @@ +using FluentAssertions; +using Lifti.Tokenization.Objects; +using System; +using System.Linq; +using Xunit; + +namespace Lifti.Tests +{ + public class IndexMetadataTests + { + private static readonly DocumentStatistics item1DocumentStatistics = DocumentStatistics((1, 100)); + private static readonly DocumentStatistics item2DocumentStatistics = DocumentStatistics((1, 50), (2, 200)); + + private readonly IndexMetadata sut; + private readonly int id1; + private readonly int id2; + + public IndexMetadataTests() + { + this.sut = new IndexMetadata( + new[] + { + new ObjectTypeConfiguration( + 1, + x => x, + Array.Empty>(), + Array.Empty>(), + new ObjectScoreBoostOptions(10D, null, 10D, null)) + }); + + this.id1 = this.sut.Add("1", item1DocumentStatistics); + this.id2 = this.sut.Add("2", item2DocumentStatistics); + } + + [Fact] + public void Add_ItemOnly_ShouldIncrementIndexStatistics() + { + this.sut.IndexStatistics.Should().BeEquivalentTo(IndexStatistics((1, 150), (2, 200))); + } + + [Fact] + public void Add_ItemOnly_ShouldIncrementWhenNoItemsReturned() + { + this.id1.Should().BeLessThan(this.id2); + } + + [Fact] + public void Add_ItemOnly_ShouldThrowExceptionIfItemAlreadyIndexed() + { + Assert.Throws(() => this.sut.Add("1", DocumentStatistics())) + .Message.Should().Be("Document already indexed"); + } + + [Fact] + public void Add_ItemWithMatchingKey_ShouldThrowExceptionIfItemAlreadyIndexed() + { + Assert.Throws(() => this.sut.Add(DocumentMetadata(0))) + .Message.Should().Be("Document already indexed"); + } + + [Fact] + public void Add_ItemWithMatchingId_ShouldThrowExceptionIfIdAlreadyUsedAlreadyIndexed() + { + Assert.Throws(() => this.sut.Add(DocumentMetadata(1, key: "DifferentKey"))) + .Message.Should().Be("Id 1 is already registered in the index."); + } + + [Fact] + public void Add_ItemWithId_ShouldAddItemToIndex() + { + var documentStatistics = DocumentStatistics((1, 20), (2, 50), (3, 10)); + var itemMetadata = Lifti.DocumentMetadata.ForObject(1, 9, "9", documentStatistics, new System.DateTime(2022, 11, 23), 12D); + this.sut.Add(itemMetadata); + this.sut.GetDocumentMetadata(9).Should().BeEquivalentTo( + itemMetadata); + } + + [Fact] + public void Add_ItemWithId_ShouldAdjustIndexStatistics() + { + var documentStatistics = DocumentStatistics((1, 20), (2, 50), (3, 10)); + this.sut.Add(DocumentMetadata(9, documentStatistics)); + this.sut.IndexStatistics.Should().BeEquivalentTo( + IndexStatistics((1, 170), (2, 250), (3, 10))); + } + + [Fact] + public void Add_ItemWithId_ShouldResetTheNextIdBasedOnTheHighestIndexedId() + { + this.sut.Add(DocumentMetadata(10, DocumentStatistics((10, 10)))); + this.sut.Add(DocumentMetadata(9, DocumentStatistics((9, 9)))); + + this.sut.Add("7", DocumentStatistics((7, 7))); + + this.sut.GetIndexedDocuments().Should().BeEquivalentTo( + new[] + { + DocumentMetadata(0, item1DocumentStatistics), + DocumentMetadata(1, item2DocumentStatistics), + DocumentMetadata(9, DocumentStatistics((9, 9))), + DocumentMetadata(10,DocumentStatistics((10, 10))), + DocumentMetadata(11, DocumentStatistics((7, 7)), key: "7"), + }); + } + + [Fact] + public void Count_ShouldReturnCorrectValue() + { + this.sut.DocumentCount.Should().Be(2); + } + + [Fact] + public void GetIndexedDocuments_ShouldReturnMetadataForAllDocumentsInTheIndex() + { + this.sut.GetIndexedDocuments().Should().BeEquivalentTo( + new[] + { + DocumentMetadata(0, item1DocumentStatistics), + DocumentMetadata(1, item2DocumentStatistics) + }); + } + + [Fact] + public void GetMetadataById_ShouldReturnCorrectItemForId() + { + this.sut.GetDocumentMetadata(this.id1).Should().BeEquivalentTo(DocumentMetadata(this.id1, item1DocumentStatistics)); + this.sut.GetDocumentMetadata(this.id2).Should().BeEquivalentTo(DocumentMetadata(this.id2, item2DocumentStatistics)); + } + + [Fact] + public void GetMetadataById_ShouldThrowExceptionIfItemNotGound() + { + Assert.Throws(() => this.sut.GetDocumentMetadata(this.id2 + 1)) + .Message.Should().Be("Item not found"); + } + + [Fact] + public void ReleaseItem_ShouldReturnIdOfReleasedItem() + { + this.sut.Remove("2").Should().Be(this.id2); + Assert.Throws(() => this.sut.GetDocumentMetadata(this.id2)); + } + + [Fact] + public void ReleasedItemId_ShouldBeReusedOnNextCreateId() + { + this.sut.Remove("1").Should().Be(this.id1); + this.sut.Remove("2").Should().Be(this.id2); + this.sut.Add("3", DocumentStatistics()).Should().Be(this.id1); + this.sut.Add("4", DocumentStatistics()).Should().Be(this.id2); + this.sut.Add("5", DocumentStatistics()).Should().Be(this.id2 + 1); + } + + [Fact] + public void Contains_WhenItemExists_ShouldReturnTrue() + { + this.sut.Contains("1").Should().BeTrue(); + } + + [Fact] + public void Contains_WhenItemDoesntExist_ShouldReturnFalse() + { + this.sut.Contains("9").Should().BeFalse(); + } + + [Fact] + public void GetObjectTypeScoreBoostMetadata_WhenObjectIdExists_ShouldReturnMetadata() + { + this.sut.GetObjectTypeScoreBoostMetadata(1) + .Should().NotBeNull(); + } + + [Fact] + public void GetObjectTypeScoreBoostMetadata_WhenObjectIdDoesntExist_ShouldThrowException() + { + Assert.Throws(() => this.sut.GetObjectTypeScoreBoostMetadata(2)) + .Message.Should().Be("Unknown object type id 2"); + } + + private static DocumentMetadata DocumentMetadata(int id, DocumentStatistics? documentStatistics = null, string? key = null) + { + return Lifti.DocumentMetadata.ForLooseText(id, key ?? (id + 1).ToString(), documentStatistics ?? DocumentStatistics()); + } + + private static DocumentStatistics DocumentStatistics(params (byte fieldId, int tokenCount)[] fieldWordCounts) + { + return new DocumentStatistics(fieldWordCounts.ToDictionary(f => f.fieldId, f => f.tokenCount)); + } + + private static IndexStatistics IndexStatistics(params (byte fieldId, int wordCount)[] fieldTokenCounts) + { + return new IndexStatistics( + fieldTokenCounts.ToDictionary(f => f.fieldId, f => (long)f.wordCount), + fieldTokenCounts.Sum(c => c.wordCount)); + } + } +} diff --git a/test/Lifti.Tests/IndexRemovalMutationTests.cs b/test/Lifti.Tests/IndexRemovalMutationTests.cs index 8692c94a..84d3a48d 100644 --- a/test/Lifti.Tests/IndexRemovalMutationTests.cs +++ b/test/Lifti.Tests/IndexRemovalMutationTests.cs @@ -14,8 +14,8 @@ public void RemovingItemIdDuringMutation_ShouldCauseItemToBeRemovedFromIndexAndC var result = this.Sut.Apply(); - VerifyResult(result, "www", expectedChildNodes: new[] { 'w' }); - VerifyResult(result, new[] { 'w' }, "w"); + VerifyResult(result, "www", expectedChildNodes: ['w']); + VerifyResult(result, ['w'], "w"); } [Fact] @@ -30,8 +30,8 @@ public void RemovingItemIdFromUnmutatedIndex_ShouldCauseItemToBeRemovedFromIndex var result = this.Sut.Apply(); - VerifyResult(result, "www", expectedChildNodes: new[] { 'w' }); - VerifyResult(result, new[] { 'w' }, "w"); + VerifyResult(result, "www", expectedChildNodes: ['w']); + VerifyResult(result, ['w'], "w"); } [Fact] @@ -40,14 +40,20 @@ public void RemovingItemIdFromUnmutatedIndex_ShouldNotAffectOtherItemsData() this.Sut.Add(Item1, FieldId1, new Token("www", this.Locations1.Locations)); this.Sut.Add(Item2, FieldId1, new Token("wwwww", this.Locations2.Locations)); - this.ApplyMutationsToNewSut(); + var result = this.ApplyMutationsToNewSut(); + + VerifyResult(result, "www", expectedChildNodes: ['w'], expectedMatches: new[] { (Item1, this.Locations1) }); + VerifyResult(result, ['w'], "w", new[] { (Item2, this.Locations2) }); this.Sut.Remove(Item1); - var result = this.Sut.Apply(); + result = this.Sut.Apply(); + + // Item1 should be gone + VerifyResult(result, "www", expectedChildNodes: ['w']); - VerifyResult(result, "www", expectedChildNodes: new[] { 'w' }); - VerifyResult(result, new[] { 'w' }, "w", new[] { (Item2, this.Locations2) }); + // But because we only removed Item1, Item2 should still be present + VerifyResult(result, ['w'], "w", new[] { (Item2, this.Locations2) }); } } } diff --git a/test/Lifti.Tests/IndexedFieldLookupTests.cs b/test/Lifti.Tests/IndexedFieldLookupTests.cs index 5c1e991e..fc2f33fe 100644 --- a/test/Lifti.Tests/IndexedFieldLookupTests.cs +++ b/test/Lifti.Tests/IndexedFieldLookupTests.cs @@ -1,6 +1,7 @@ using FluentAssertions; using Lifti.Tokenization; using Lifti.Tokenization.Objects; +using Lifti.Tokenization.Stemming; using Lifti.Tokenization.TextExtraction; using System.Collections.Generic; using System.Threading.Tasks; @@ -40,7 +41,7 @@ public async Task StaticFieldsShouldBeRegisteredCorrectly() fieldInfo.FieldKind.Should().Be(FieldKind.Static); fieldInfo.Tokenizer.Should().NotBeNull(); var readField = await fieldInfo.ReadAsync("foo", default); - readField.Should().BeEquivalentTo(new[] { "foo" }); + readField.Should().BeEquivalentTo(["foo"]); } [Fact] @@ -56,7 +57,7 @@ public async Task DynamicFieldsShouldBeRegisteredCorrectly() fieldInfo.FieldKind.Should().Be(FieldKind.Dynamic); fieldInfo.Tokenizer.Should().NotBeNull(); var readField = await fieldInfo.ReadAsync(new TestObject(), default); - readField.Should().BeEquivalentTo(new[] { "bar" }); + readField.Should().BeEquivalentTo(["bar"]); } [Fact] @@ -94,8 +95,8 @@ public void GettingTokenizationOptionsShouldReturnCorrectlyConstructedInstances( { this.WithBasicConfig(); - ((IndexTokenizer)this.sut.GetFieldInfo("FieldX").Tokenizer).Options.Stemming.Should().BeTrue(); - ((IndexTokenizer)this.sut.GetFieldInfo("FieldY").Tokenizer).Options.Stemming.Should().BeFalse(); + ((IndexTokenizer)this.sut.GetFieldInfo("FieldX").Tokenizer).Options.Stemmer.Should().BeOfType(); + ((IndexTokenizer)this.sut.GetFieldInfo("FieldY").Tokenizer).Options.Stemmer.Should().BeNull(); } [Fact] @@ -157,25 +158,31 @@ private void WithBasicConfig() this.sut); } - private IObjectTokenization Build(ObjectTokenizationBuilder objectTokenizationBuilder, IndexedFieldLookup fieldLookup) + private IObjectTypeConfiguration Build(ObjectTokenizationBuilder objectTokenizationBuilder, IndexedFieldLookup fieldLookup) { return (objectTokenizationBuilder as IObjectTokenizationBuilder) - .Build(IndexTokenizer.Default, new ThesaurusBuilder(), new PlainTextExtractor(), fieldLookup); + .Build( + 1, + IndexTokenizer.Default, + new ThesaurusBuilder(), + new PlainTextExtractor(), + fieldLookup); } - private static DynamicFieldReader CreateDynamicFieldReader() - where T : new() + private static DynamicFieldReader CreateDynamicFieldReader() + where TObject : new() { - var reader = new StringDictionaryDynamicFieldReader( + var reader = new StringDictionaryDynamicFieldReader( x => new Dictionary { { "foo", "bar" } }, "Test", null, IndexTokenizer.Default, new PlainTextExtractor(), - new ThesaurusBuilder().Build(IndexTokenizer.Default)); + new ThesaurusBuilder().Build(IndexTokenizer.Default), + 1D); // Force the reader to first produce (and cache) the field names - reader.ReadAsync(new T(), default); + reader.ReadAsync(new TObject(), default); return reader; } diff --git a/test/Lifti.Tests/Lifti.Tests.csproj b/test/Lifti.Tests/Lifti.Tests.csproj index 96f3e23c..ae1c27cc 100644 --- a/test/Lifti.Tests/Lifti.Tests.csproj +++ b/test/Lifti.Tests/Lifti.Tests.csproj @@ -1,10 +1,10 @@  - net6.0;net7.0;netframework4.8.1 + net6.0;net7.0;net8.0;netframework4.8.1 false - 10.0 + latest enable diff --git a/test/Lifti.Tests/MutationTestBase.cs b/test/Lifti.Tests/MutationTestBase.cs index 4752a4be..35714676 100644 --- a/test/Lifti.Tests/MutationTestBase.cs +++ b/test/Lifti.Tests/MutationTestBase.cs @@ -1,6 +1,5 @@ using FluentAssertions; using System; -using System.Collections.Immutable; using System.Linq; namespace Lifti.Tests @@ -23,15 +22,17 @@ protected MutationTestBase() { this.indexNodeFactory = new IndexNodeFactory(new IndexOptions { SupportIntraNodeTextAfterIndexDepth = 0 }); this.RootNode = this.indexNodeFactory.CreateRootNode(); - this.Sut = new IndexMutation(this.RootNode, this.indexNodeFactory); + this.Sut = new IndexMutation(this.RootNode, new IndexMetadata(Array.Empty()), this.indexNodeFactory); } protected IndexNode RootNode { get; } - internal IndexMutation Sut { get; set; } + internal IndexMutation Sut { get; set; } - protected void ApplyMutationsToNewSut() + protected IndexNode ApplyMutationsToNewSut() { - this.Sut = new IndexMutation(this.Sut.Apply(), this.indexNodeFactory); + var applied = this.Sut.Apply(); + this.Sut = new IndexMutation(applied, new IndexMetadata(Array.Empty()), this.indexNodeFactory); + return applied; } protected static void VerifyResult( @@ -40,14 +41,14 @@ protected void ApplyMutationsToNewSut() (int, IndexedToken)[]? expectedMatches = null, char[]? expectedChildNodes = null) { - expectedChildNodes ??= Array.Empty(); - expectedMatches ??= Array.Empty<(int, IndexedToken)>(); + expectedChildNodes ??= []; + expectedMatches ??= []; node.HasChildNodes.Should().Be(expectedChildNodes.Length > 0); node.HasMatches.Should().Be(expectedMatches.Length > 0); - node.IntraNodeText.ToArray().Should().BeEquivalentTo(intraNodeText?.ToCharArray() ?? Array.Empty()); - node.ChildNodes.Keys.Should().BeEquivalentTo(expectedChildNodes, o => o.WithoutStrictOrdering()); - node.Matches.Should().BeEquivalentTo(expectedMatches.ToImmutableDictionary(x => x.Item1, x => new[] { x.Item2 })); + node.IntraNodeText.ToArray().Should().BeEquivalentTo(intraNodeText?.ToCharArray() ?? []); + node.ChildNodes.CharacterMap.ToArray().Select(x => x.ChildChar).Should().BeEquivalentTo(expectedChildNodes, o => o.WithStrictOrdering()); + node.Matches.Enumerate().SelectMany(x => x.indexedTokens.Select(token => (x.documentId, token))).ToList().Should().BeEquivalentTo(expectedMatches); } protected static void VerifyResult( @@ -59,7 +60,7 @@ protected void ApplyMutationsToNewSut() { foreach (var navigationChar in navigationChars) { - node = node.ChildNodes[navigationChar]; + node.ChildNodes.TryGetValue(navigationChar, out node!).Should().BeTrue(); } VerifyResult(node, intraNodeText, expectedMatches, expectedChildNodes); diff --git a/test/Lifti.Tests/ObjectScoreBoostMetadataTests.cs b/test/Lifti.Tests/ObjectScoreBoostMetadataTests.cs new file mode 100644 index 00000000..4deb88bb --- /dev/null +++ b/test/Lifti.Tests/ObjectScoreBoostMetadataTests.cs @@ -0,0 +1,93 @@ +using FluentAssertions; +using Lifti.Tokenization.Objects; +using System; +using Xunit; + +namespace Lifti.Tests +{ + public class ObjectScoreBoostMetadataTests + { + private const double FreshnessMultiplier = 10D; + private const double MagnitudeMultiplier = 20D; + + private readonly ScoreBoostMetadata sut; + + public ObjectScoreBoostMetadataTests() + { + this.sut = new ScoreBoostMetadata(new ObjectScoreBoostOptions(MagnitudeMultiplier, null, FreshnessMultiplier, null)); + } + + [Fact] + public void FreshnessDate_WithOnlyOneValue_ReturnsMultiplier() + { + var metadata = DocumentMetadata(new DateTime(2022, 11, 12), null); + this.sut.Add(metadata); + + this.sut.CalculateScoreBoost(metadata).Should().Be(FreshnessMultiplier); + } + + [Fact] + public void Magnitude_WithOnlyOneValue_ReturnsMultiplier() + { + var metadata = DocumentMetadata(null, 100433D); + this.sut.Add(metadata); + + this.sut.CalculateScoreBoost(metadata).Should().Be(MagnitudeMultiplier); + } + + [Fact] + public void FreshnessDate_WithTwoValues_ReturnsFullMultiplierForMaxAndOneForMin() + { + var minItem = DocumentMetadata(new DateTime(1980, 11, 12), null); + var maxItem = DocumentMetadata(new DateTime(2022, 11, 12), null); + this.sut.Add(minItem); + this.sut.Add(maxItem); + + this.sut.CalculateScoreBoost(minItem).Should().Be(1D); + this.sut.CalculateScoreBoost(maxItem).Should().Be(FreshnessMultiplier); + } + + [Fact] + public void Magnitude_WithTwoValues_ReturnsFullMultiplierForMaxAndOneForMin() + { + var minItem = DocumentMetadata(null, -100D); + var maxItem = DocumentMetadata(null, 100433D); + this.sut.Add(minItem); + this.sut.Add(maxItem); + + this.sut.CalculateScoreBoost(minItem).Should().Be(1D); + this.sut.CalculateScoreBoost(maxItem).Should().Be(MagnitudeMultiplier); + } + + [Fact] + public void Magnitude_WithMultipleValues_CalculatesMidPoint() + { + var minItem = DocumentMetadata(null, -100D); + var midItem = DocumentMetadata(null, 400D); + var maxItem = DocumentMetadata(null, 900D); + this.sut.Add(minItem); + this.sut.Add(midItem); + this.sut.Add(maxItem); + + this.sut.CalculateScoreBoost(minItem).Should().Be(1D); + // This isn't 10 because the value ranges from 1 to MagnitudeMultiplier, not 0 to MagnitudeMultiplier. + // That makes the mid point (19 / 2) + 1 = 10.5 + this.sut.CalculateScoreBoost(midItem).Should().Be(10.5D); + this.sut.CalculateScoreBoost(maxItem).Should().Be(MagnitudeMultiplier); + } + + [Fact] + public void Item_WithNoScoreMultipliers_ReturnsOne() + { + var metadata = DocumentMetadata(null, null); + this.sut.Add(metadata); + + this.sut.CalculateScoreBoost(metadata).Should().Be(1D); + } + + private static DocumentMetadata DocumentMetadata(DateTime? scoringFreshnessDate, double? scoringMagnitude) + { + return Lifti.DocumentMetadata.ForObject(1, 1, "A", new DocumentStatistics(1, 2), scoringFreshnessDate, scoringMagnitude); + } + } +} diff --git a/test/Lifti.Tests/Querying/CompositeTokenLocationTests.cs b/test/Lifti.Tests/Querying/CompositeTokenLocationTests.cs new file mode 100644 index 00000000..72733571 --- /dev/null +++ b/test/Lifti.Tests/Querying/CompositeTokenLocationTests.cs @@ -0,0 +1,75 @@ +using FluentAssertions; +using Lifti.Querying; +using System.Collections.Generic; +using Xunit; + +namespace Lifti.Tests.Querying +{ + public class CompositeTokenLocationTests : QueryTestBase + { + [Fact] + public void ComposingTwoTokenLocations_ShouldBuildAppropriately() + { + var composite = ((ITokenLocation)TokenLocation(3)).ComposeWith(TokenLocation(2)); + composite.MinTokenIndex.Should().Be(2); + composite.MaxTokenIndex.Should().Be(3); + + // And the same if we compose the other way around + composite = ((ITokenLocation)TokenLocation(2)).ComposeWith(TokenLocation(3)); + composite.MinTokenIndex.Should().Be(2); + composite.MaxTokenIndex.Should().Be(3); + + // Both tokens should be returned + CompositeTokenLocationTests.VerifyCollectedTokens(composite, TokenLocations(2, 3)); + } + + [Fact] + public void ComposingCompositeWithNewMaxTokenLocation_ShouldBuildAppropriately() + { + var existingComposite = new CompositeTokenLocation([.. TokenLocations(6, 2)], 2, 6); + + var composite = existingComposite.ComposeWith(TokenLocation(9)); + + composite.MinTokenIndex.Should().Be(2); + composite.MaxTokenIndex.Should().Be(9); + + // All tokens should be returned + CompositeTokenLocationTests.VerifyCollectedTokens(composite, TokenLocations(2, 6, 9)); + } + + [Fact] + public void ComposingCompositeWithNewMinTokenLocation_ShouldBuildAppropriately() + { + var existingComposite = new CompositeTokenLocation([.. TokenLocations(6, 2)], 2, 6); + + var composite = existingComposite.ComposeWith(TokenLocation(1)); + + composite.MinTokenIndex.Should().Be(1); + composite.MaxTokenIndex.Should().Be(6); + + // All tokens should be returned + CompositeTokenLocationTests.VerifyCollectedTokens(composite, TokenLocations(1, 2, 6)); + } + + [Fact] + public void ComposingCompositeWithComposite_ShouldBuildAppropriately() + { + var composite1 = new CompositeTokenLocation([.. TokenLocations(6, 2)], 2, 6); + var composite2 = new CompositeTokenLocation([.. TokenLocations(1, 9)], 1, 9); + + var composite = composite1.ComposeWith(composite2); + + composite.MinTokenIndex.Should().Be(1); + composite.MaxTokenIndex.Should().Be(9); + + CompositeTokenLocationTests.VerifyCollectedTokens(composite, TokenLocations(1, 2, 6, 9)); + } + + private static void VerifyCollectedTokens(CompositeTokenLocation composite, List tokenLocations) + { + var collectedTokens = new HashSet(); + composite.AddTo(collectedTokens); + collectedTokens.Should().BeEquivalentTo(tokenLocations); + } + } +} diff --git a/test/Lifti.Tests/Querying/CompositeTokenMatchLocationTests.cs b/test/Lifti.Tests/Querying/CompositeTokenMatchLocationTests.cs deleted file mode 100644 index 576b79ab..00000000 --- a/test/Lifti.Tests/Querying/CompositeTokenMatchLocationTests.cs +++ /dev/null @@ -1,62 +0,0 @@ -using FluentAssertions; -using Lifti.Querying; -using Lifti.Tests.Fakes; -using System.Linq; -using Xunit; - -namespace Lifti.Tests.Querying -{ - public class CompositeTokenMatchLocationTests : QueryTestBase - { - private readonly FakeTokenLocationMatch match2; - private readonly FakeTokenLocationMatch match1; - private readonly CompositeTokenMatchLocation sut1; - private readonly CompositeTokenMatchLocation sut2; - private readonly TokenLocation[] match1Locations; - private readonly TokenLocation[] match2Locations; - - public CompositeTokenMatchLocationTests() - { - this.match1Locations = new[] - { - new TokenLocation(100, 1, 2), - new TokenLocation(200, 1, 2) - }; - - this.match1 = new FakeTokenLocationMatch(100, 200, this.match1Locations); - - this.match2Locations = new[] - { - new TokenLocation(110, 1, 2), - new TokenLocation(150, 1, 2), - new TokenLocation(180, 1, 2) - }; - - this.match2 = new FakeTokenLocationMatch(110, 180, this.match2Locations); - - this.sut1 = new CompositeTokenMatchLocation(this.match1, this.match2); - this.sut2 = new CompositeTokenMatchLocation(this.match2, this.match1); - } - - [Fact] - public void MinLocationShouldBeMinimumOfTheLeftAndRightMinimumValues() - { - this.sut1.MinTokenIndex.Should().Be(100); - this.sut2.MinTokenIndex.Should().Be(100); - } - - [Fact] - public void MaxLocationShouldBeMinimumOfTheLeftAndRightMinimumValues() - { - this.sut1.MaxTokenIndex.Should().Be(200); - this.sut2.MaxTokenIndex.Should().Be(200); - } - - [Fact] - public void GetLocationsShouldReturnAllLocations() - { - this.sut1.GetLocations().Should().BeEquivalentTo(this.match1Locations.Concat(this.match2Locations).ToList()); - this.sut2.GetLocations().Should().BeEquivalentTo(this.match2Locations.Concat(this.match1Locations).ToList()); - } - } -} diff --git a/test/Lifti.Tests/Querying/DocumentMatchCollectorTests.cs b/test/Lifti.Tests/Querying/DocumentMatchCollectorTests.cs new file mode 100644 index 00000000..fc9f73da --- /dev/null +++ b/test/Lifti.Tests/Querying/DocumentMatchCollectorTests.cs @@ -0,0 +1,71 @@ +using FluentAssertions; +using Lifti.Querying; +using Xunit; + +namespace Lifti.Tests.Querying +{ + public class DocumentMatchCollectorTests : QueryTestBase + { + [Fact] + public void AddingSingleDocument() + { + var sut = new DocumentMatchCollector(); + + sut.Add(10, 1, TokenLocations(1, 2, 3), 2D); + + sut.ToIntermediateQueryResult() + .Should().BeEquivalentTo( + IntermediateQueryResult(ScoredToken(10, ScoredFieldMatch(2D, 1, 1, 2, 3))), + o => o.WithStrictOrdering()); + } + + [Fact] + public void AddingDifferentDocuments() + { + var sut = new DocumentMatchCollector(); + + sut.Add(10, 1, TokenLocations(1, 2, 3), 2D); + sut.Add(1, 1, TokenLocations(1, 2, 3), 2D); + + sut.ToIntermediateQueryResult() + .Should().BeEquivalentTo( + IntermediateQueryResult( + ScoredToken(1, ScoredFieldMatch(2D, 1, 1, 2, 3)), + ScoredToken(10, ScoredFieldMatch(2D, 1, 1, 2, 3))), + o => o.WithStrictOrdering()); + } + + [Fact] + public void AddingDifferentFieldForSameDocument() + { + var sut = new DocumentMatchCollector(); + + sut.Add(10, 1, TokenLocations(1, 2, 3), 2D); + sut.Add(10, 2, TokenLocations(2), 20D); + + sut.ToIntermediateQueryResult() + .Should().BeEquivalentTo( + IntermediateQueryResult( + ScoredToken(10, + ScoredFieldMatch(2D, 1, 1, 2, 3), + ScoredFieldMatch(20D, 2, 2))), + o => o.WithStrictOrdering()); + } + + [Fact] + public void AddingMoreLocationsForSameField_ShouldMergeFieldsAndAddScores() + { + var sut = new DocumentMatchCollector(); + + sut.Add(10, 1, TokenLocations(1, 2, 30), 2D); + sut.Add(10, 1, TokenLocations(9), 20D); + + sut.ToIntermediateQueryResult() + .Should().BeEquivalentTo( + IntermediateQueryResult( + ScoredToken(10, + ScoredFieldMatch(22D, 1, 1, 2, 9, 30))), + o => o.WithStrictOrdering()); + } + } +} diff --git a/test/Lifti.Tests/Querying/FakeIndexNavigator.cs b/test/Lifti.Tests/Querying/FakeIndexNavigator.cs index dfdc7534..627bde04 100644 --- a/test/Lifti.Tests/Querying/FakeIndexNavigator.cs +++ b/test/Lifti.Tests/Querying/FakeIndexNavigator.cs @@ -1,4 +1,5 @@ using Lifti.Querying; +using Lifti.Tests.Fakes; using System; using System.Collections.Generic; using System.Linq; @@ -8,42 +9,56 @@ namespace Lifti.Tests.Querying public class FakeIndexNavigator : QueryTestBase, IIndexNavigator { public FakeIndexNavigator() - { + { + this.Snapshot = new FakeIndexSnapshot(new FakeIndexMetadata(10)); } - private FakeIndexNavigator(bool exactAndChildMatchOnly, params int[] matchedItems) + private FakeIndexNavigator(bool exactAndChildMatchOnly, params int[] matchedDocumentIds) + : this() { this.ExpectedExactAndChildMatches = new IntermediateQueryResult( - matchedItems.Select( + matchedDocumentIds.Select( m => ScoredToken( - m, - new[] { ScoredFieldMatch(1D, (byte)m, m) }))); + m, + [ScoredFieldMatch(1D, (byte)m, m)])) + .ToList(), + false); this.ExpectedExactMatches = exactAndChildMatchOnly ? Lifti.Querying.IntermediateQueryResult.Empty : this.ExpectedExactAndChildMatches; } - private FakeIndexNavigator(bool exactAndChildMatchOnly, params ScoredToken[] matches) + private FakeIndexNavigator(bool exactAndChildMatchOnly, params ScoredToken[] matches) + : this() { - this.ExpectedExactAndChildMatches = new IntermediateQueryResult(matches); + this.ExpectedExactAndChildMatches = new IntermediateQueryResult(matches.ToList(), false); this.ExpectedExactMatches = exactAndChildMatchOnly ? Lifti.Querying.IntermediateQueryResult.Empty : this.ExpectedExactAndChildMatches; } public IntermediateQueryResult ExpectedExactMatches { get; set; } public IntermediateQueryResult ExpectedExactAndChildMatches { get; set; } - public List NavigatedCharacters { get; } = new List(); - public List NavigatedStrings { get; } = new List(); - - public bool HasExactMatches => this.ExpectedExactMatches.Matches.Count > 0; + public List NavigatedCharacters { get; } = []; + public List NavigatedStrings { get; } = []; + public List ProvidedWeightings { get; } = []; + public List ProvidedQueryContexts { get; } = []; + + public int ExactMatchCount() + { + return this.ExpectedExactMatches.Matches.Count; + } - public static FakeIndexNavigator ReturningExactMatches(params int[] matchedItems) + public bool HasExactMatches => this.ExpectedExactMatches.Matches.Count > 0; + + public IIndexSnapshot Snapshot { get; set; } + + public static FakeIndexNavigator ReturningExactMatches(params int[] matchedDocumentIds) { - return new FakeIndexNavigator(false, matchedItems); + return new FakeIndexNavigator(false, matchedDocumentIds); } - public static FakeIndexNavigator ReturningExactAndChildMatches(params int[] matchedItems) + public static FakeIndexNavigator ReturningExactAndChildMatches(params int[] matchedDocumentIds) { - return new FakeIndexNavigator(true, matchedItems); + return new FakeIndexNavigator(true, matchedDocumentIds); } public static FakeIndexNavigator ReturningExactMatches(params ScoredToken[] matches) @@ -54,15 +69,29 @@ public static FakeIndexNavigator ReturningExactMatches(params ScoredToken[] matc public static FakeIndexNavigator ReturningExactAndChildMatches(params ScoredToken[] matches) { return new FakeIndexNavigator(true, matches); + } + + public IntermediateQueryResult GetExactAndChildMatches(double weighting = 1D) + { + return this.GetExactAndChildMatches(QueryContext.Empty, weighting); } - public IntermediateQueryResult GetExactAndChildMatches(double weighting = 1D) - { + public IntermediateQueryResult GetExactMatches(double weighting = 1D) + { + return this.GetExactMatches(QueryContext.Empty, weighting); + } + + public IntermediateQueryResult GetExactAndChildMatches(QueryContext queryContext, double weighting = 1D) + { + this.ProvidedWeightings.Add(weighting); + this.ProvidedQueryContexts.Add(queryContext); return this.ExpectedExactAndChildMatches; } - public IntermediateQueryResult GetExactMatches(double weighting = 1D) - { + public IntermediateQueryResult GetExactMatches(QueryContext queryContext, double weighting = 1D) + { + this.ProvidedWeightings.Add(weighting); + this.ProvidedQueryContexts.Add(queryContext); return this.ExpectedExactMatches; } @@ -90,7 +119,8 @@ public IEnumerable EnumerateIndexedTokens() } public void Dispose() - { + { + GC.SuppressFinalize(this); } public IIndexNavigatorBookmark CreateBookmark() @@ -101,6 +131,16 @@ public IIndexNavigatorBookmark CreateBookmark() public IEnumerable EnumerateNextCharacters() { throw new NotImplementedException(); - } + } + + public void AddExactAndChildMatches(QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting = 1) + { + throw new NotImplementedException(); + } + + public void AddExactMatches(QueryContext queryContext, DocumentMatchCollector documentMatchCollector, double weighting = 1) + { + throw new NotImplementedException(); + } } } diff --git a/test/Lifti.Tests/Querying/FakeQueryPart.cs b/test/Lifti.Tests/Querying/FakeQueryPart.cs index 7ffaf01a..645d5622 100644 --- a/test/Lifti.Tests/Querying/FakeQueryPart.cs +++ b/test/Lifti.Tests/Querying/FakeQueryPart.cs @@ -7,11 +7,18 @@ namespace Lifti.Tests.Querying { public class FakeQueryPart : QueryTestBase, IQueryPart { - private readonly IntermediateQueryResult results; + private readonly IntermediateQueryResult results; + private readonly double weighting = 1D; public FakeQueryPart(params ScoredToken[] matches) { - this.results = new IntermediateQueryResult(matches); + this.results = new IntermediateQueryResult(matches.ToList(), false); + } + + public FakeQueryPart(double weighting, params ScoredToken[] matches) + : this(matches) + { + this.weighting = weighting; } public FakeQueryPart(params int[] matchedItems) @@ -20,10 +27,17 @@ public FakeQueryPart(params int[] matchedItems) matchedItems.Select( m => new ScoredToken( m, - new[] { ScoredFieldMatch(m, (byte)m, m) }))); - } - - public IntermediateQueryResult Evaluate(Func navigatorCreator, IQueryContext queryContext) + new[] { ScoredFieldMatch(m, (byte)m, m) })) + .ToList(), + false); + } + + public double CalculateWeighting(Func navigatorCreator) + { + return this.weighting; + } + + public IntermediateQueryResult Evaluate(Func navigatorCreator, QueryContext queryContext) { return this.results; } diff --git a/test/Lifti.Tests/Querying/FieldMatchTests.cs b/test/Lifti.Tests/Querying/FieldMatchTests.cs deleted file mode 100644 index ef188a2a..00000000 --- a/test/Lifti.Tests/Querying/FieldMatchTests.cs +++ /dev/null @@ -1,27 +0,0 @@ -using FluentAssertions; -using Lifti.Querying; -using System.Collections.Generic; -using Xunit; - -namespace Lifti.Tests.Querying -{ - public class FieldMatchTests : QueryTestBase - { - [Fact] - public void ShouldReturnUniqueLocationsInOrder() - { - var sut = new FieldMatch(1, new List { CompositeMatch(4, 5, 6, 9), TokenMatch(6), CompositeMatch(5, 6), TokenMatch(7) }); - - sut.GetTokenLocations().Should().BeEquivalentTo( - new[] - { - new TokenLocation(4, 4, 4), - new TokenLocation(5, 5, 5), - new TokenLocation(6, 6, 6), - new TokenLocation(7, 7, 7), - new TokenLocation(9, 9, 9) - }, - options => options.WithStrictOrdering()); - } - } -} diff --git a/test/Lifti.Tests/Querying/IndexNavigatorTests.cs b/test/Lifti.Tests/Querying/IndexNavigatorTests.cs index d935b354..a4aa59a2 100644 --- a/test/Lifti.Tests/Querying/IndexNavigatorTests.cs +++ b/test/Lifti.Tests/Querying/IndexNavigatorTests.cs @@ -1,5 +1,6 @@ using FluentAssertions; using Lifti.Querying; +using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; using Xunit; @@ -39,7 +40,7 @@ public Task DisposeAsync() public void GettingExactMatches_WithNoExactMatch_ShouldReturnEmptyResults(string test) { this.sut.Process(test).Should().BeTrue(); - var results = this.sut.GetExactMatches(); + var results = this.sut.GetExactMatches(QueryContext.Empty); results.Should().NotBeNull(); results.Matches.Should().BeEmpty(); } @@ -50,7 +51,7 @@ public void GettingExactMatches_WithNoExactMatch_ShouldReturnEmptyResults(string public void GettingExactMatches_WithNonMatchingTextProcessed_ShouldReturnEmptyResults(string test) { this.sut.Process(test).Should().BeFalse(); - var results = this.sut.GetExactMatches(); + var results = this.sut.GetExactMatches(QueryContext.Empty); results.Should().NotBeNull(); results.Matches.Should().BeEmpty(); } @@ -59,18 +60,94 @@ public void GettingExactMatches_WithNonMatchingTextProcessed_ShouldReturnEmptyRe public void GettingExactMatches_WithMatchingTextProcessed_ShouldReturnResults() { this.sut.Process("INDIFFERENCE").Should().BeTrue(); - var results = this.sut.GetExactMatches(); + var results = this.sut.GetExactMatches(QueryContext.Empty); results.Should().NotBeNull(); results.Matches.Should().BeEquivalentTo( new[] { ScoredToken( 0, - ScoredFieldMatch(double.Epsilon, 0, new SingleTokenLocationMatch(new TokenLocation(5, 42, 12)))) + ScoredFieldMatch(double.Epsilon, 0, new TokenLocation(5, 42, 12))) }, o => o.ComparingByMembers() .ComparingByMembers() .Excluding(i => i.Path.EndsWith("Score"))); + } + + [Fact] + public async Task GettingExactMatches_WithDocumentFilter_ShouldOnlyReturnFilteredDocuments() + { + await this.index.AddAsync(("B", "Elephant", "Ellie")); + await this.index.AddAsync(("C", "Elephant", "Elon")); + + this.sut = this.index.Snapshot.CreateNavigator(); + + this.sut.Process("ELEPHANT").Should().BeTrue(); + + var documentId = this.index.Metadata.GetMetadata("B").Id; + + var results = this.sut.GetExactMatches(new QueryContext(FilterToDocumentIds: new HashSet { documentId })); + + results.Matches.Should().HaveCount(1); + results.Matches[0].DocumentId.Should().Be(documentId); + } + + [Fact] + public async Task GettingExactMatches_WithFieldFilter_ShouldOnlyReturnFilteredDocuments() + { + await this.index.AddAsync(("B", "Elephant", "Ellie")); + await this.index.AddAsync(("C", "Elephant", "Elephant")); + + this.sut = this.index.Snapshot.CreateNavigator(); + + this.sut.Process("ELEPHANT").Should().BeTrue(); + + var fieldId = this.index.FieldLookup.GetFieldInfo("Field2").Id; + var expectedDocumentId = this.index.Metadata.GetMetadata("C").Id; + + var results = this.sut.GetExactMatches(new QueryContext(FilterToFieldId: fieldId)); + + results.Matches.Should().HaveCount(1); + results.Matches[0].DocumentId.Should().Be(expectedDocumentId); + results.Matches[0].FieldMatches.Should().AllSatisfy(x => x.FieldId.Should().Be(fieldId)); + } + + [Fact] + public async Task GettingExactAndChildMatches_WithDocumentFilter_ShouldOnlyReturnFilteredDocuments() + { + await this.index.AddAsync(("B", "Elephant", "Ellie")); + await this.index.AddAsync(("C", "Elephant", "Elon")); + + this.sut = this.index.Snapshot.CreateNavigator(); + + this.sut.Process("ELE").Should().BeTrue(); + + var documentId = this.index.Metadata.GetMetadata("B").Id; + + var results = this.sut.GetExactAndChildMatches(new QueryContext(FilterToDocumentIds: new HashSet { documentId })); + + results.Matches.Should().HaveCount(1); + results.Matches[0].DocumentId.Should().Be(documentId); + } + + [Fact] + public async Task GettingExactAndChildMatches_WithFieldFilter_ShouldOnlyReturnFilteredDocuments() + { + await this.index.AddAsync(("B", "Elephant", "Ellie")); + await this.index.AddAsync(("C", "Elephant", "Elephant")); + + this.sut = this.index.Snapshot.CreateNavigator(); + + this.sut.Process("ELE").Should().BeTrue(); + + var fieldId = this.index.FieldLookup.GetFieldInfo("Field2").Id; + var expectedDocumentId = this.index.Metadata.GetMetadata("C").Id; + + var results = this.sut.GetExactAndChildMatches(new QueryContext(FilterToFieldId: fieldId)); + + results.Matches.Should().HaveCount(1); + results.Matches[0].DocumentId.Should().Be(expectedDocumentId); + results.Matches[0].FieldMatches.Should().AllSatisfy(x => x.FieldId.Should().Be(fieldId)); } [Theory] @@ -82,7 +159,7 @@ public void GettingExactMatches_WithMatchingTextProcessed_ShouldReturnResults() public void GettingExactAndChildMatches_WithNoExactMatch_ShouldReturnNonEmptyResults(string test) { this.sut.Process(test).Should().BeTrue(); - var results = this.sut.GetExactAndChildMatches(); + var results = this.sut.GetExactAndChildMatches(QueryContext.Empty); results.Should().NotBeNull(); results.Matches.Should().NotBeEmpty(); } @@ -146,26 +223,24 @@ public async Task GettingExactAndChildMatches_ShouldMergeResultsAcrossFields() await this.index.AddAsync(("B", "Zoopla Zoo Zammo", "Zany Zippy Llamas")); await this.index.AddAsync(("C", "Zak", "Ziggy Stardust")); - var navigator = this.index.Snapshot.CreateNavigator(); - navigator.Process("Z").Should().BeTrue(); - var results = navigator.GetExactAndChildMatches(); + this.sut = this.index.Snapshot.CreateNavigator(); + this.sut.Process("Z").Should().BeTrue(); + var results = this.sut.GetExactAndChildMatches(QueryContext.Empty); results.Should().NotBeNull(); var expectedTokens = new[] { ScoredToken( 1, - new[] - { - ScoredFieldMatch(0D, 1, SingleTokenLocationMatch(0, 0, 6), SingleTokenLocationMatch(1, 7, 3), SingleTokenLocationMatch(2, 11, 5)), - ScoredFieldMatch(0D, 2, SingleTokenLocationMatch(0, 0, 4), SingleTokenLocationMatch(1, 5, 5)) - }), + [ + ScoredFieldMatch(0D, 1, TokenLocation(0, 0, 6), TokenLocation(1, 7, 3), TokenLocation(2, 11, 5)), + ScoredFieldMatch(0D, 2, TokenLocation(0, 0, 4), TokenLocation(1, 5, 5)) + ]), ScoredToken( 2, - new[] - { - ScoredFieldMatch(0D, 1, SingleTokenLocationMatch(0, 0, 3)), - ScoredFieldMatch(0D, 2, SingleTokenLocationMatch(0, 0, 5)) - }) + [ + ScoredFieldMatch(0D, 1, TokenLocation(0, 0, 3)), + ScoredFieldMatch(0D, 2, TokenLocation(0, 0, 5)) + ]) }; results.Matches.Should().BeEquivalentTo( @@ -181,7 +256,7 @@ public async Task GettingExactAndChildMatches_ShouldMergeResultsAcrossFields() public void GettingExactAndChildMatches_WithNonMatchingTextProcessed_ShouldReturnEmptyResults(string test) { this.sut.Process(test).Should().BeFalse(); - var results = this.sut.GetExactAndChildMatches(); + var results = this.sut.GetExactAndChildMatches(QueryContext.Empty); results.Should().NotBeNull(); results.Matches.Should().BeEmpty(); } @@ -227,20 +302,41 @@ public void Bookmarking_WhenRewinding_ShouldResetToCapturedState() var bookmark = this.sut.CreateBookmark(); this.sut.Process("VIDUAL"); - VerifyMatchedWordIndexes(13); + this.VerifyMatchedWordIndexes(13); bookmark.Apply(); this.sut.Process("F"); - VerifyMatchedWordIndexes(5); + this.VerifyMatchedWordIndexes(5); bookmark.Apply(); - VerifyMatchedWordIndexes(5, 13); + this.VerifyMatchedWordIndexes(5, 13); + } + + [Fact] + public void Bookmarking_ShouldReuseDisposedBookmark() + { + this.sut.Process("INDI"); + + var bookmark = this.sut.CreateBookmark(); + + bookmark.Dispose(); + + this.sut.Process("VIDUAL"); + + var nextBookmark = this.sut.CreateBookmark(); + + nextBookmark.Should().BeSameAs(bookmark); + + // And the new bookmark should be usable at the current location, not the old + this.sut.Process("S"); + nextBookmark.Apply(); + this.VerifyMatchedWordIndexes(13); } private void VerifyMatchedWordIndexes(params int[] indexes) { - var results = this.sut.GetExactAndChildMatches(); + var results = this.sut.GetExactAndChildMatches(QueryContext.Empty); results.Matches.Should().HaveCount(1); results.Matches[0].FieldMatches.Should().HaveCount(1); var fieldMatch = results.Matches[0].FieldMatches[0]; diff --git a/test/Lifti.Tests/Querying/IntermediateQueryResultTests.cs b/test/Lifti.Tests/Querying/IntermediateQueryResultTests.cs new file mode 100644 index 00000000..a24dfc7b --- /dev/null +++ b/test/Lifti.Tests/Querying/IntermediateQueryResultTests.cs @@ -0,0 +1,35 @@ +using FluentAssertions; +using Xunit; + +namespace Lifti.Tests.Querying +{ + public class IntermediateQueryResultTests : QueryTestBase + { + [Fact] + public void Equals_ReturnsTrueForMatchingData() + { + var a = IntermediateQueryResult(ScoredToken(10, ScoredFieldMatch(2D, 1, 1, 2, 3))); + var b = IntermediateQueryResult(ScoredToken(10, ScoredFieldMatch(2D, 1, 1, 2, 3))); + + a.Equals(b).Should().BeTrue(); + } + + [Fact] + public void Equals_ReturnsFalseForNonMatchingData() + { + var a = IntermediateQueryResult(ScoredToken(10, ScoredFieldMatch(2D, 1, 1, 2, 3))); + + // Differs by locations + a.Equals(IntermediateQueryResult(ScoredToken(10, ScoredFieldMatch(2D, 1, 1, 2, 4)))).Should().BeFalse(); + + // Differs by score + a.Equals(IntermediateQueryResult(ScoredToken(10, ScoredFieldMatch(9D, 1, 1, 2, 3)))).Should().BeFalse(); + + // Differs by field id + a.Equals(IntermediateQueryResult(ScoredToken(10, ScoredFieldMatch(2D, 2, 1, 2, 3)))).Should().BeFalse(); + + // Differs by document id + a.Equals(IntermediateQueryResult(ScoredToken(1, ScoredFieldMatch(2D, 1, 1, 2, 3)))).Should().BeFalse(); + } + } +} diff --git a/test/Lifti.Tests/Querying/OkapiBm25ScorerTests.cs b/test/Lifti.Tests/Querying/OkapiBm25ScorerTests.cs index cabe2906..7ec4596c 100644 --- a/test/Lifti.Tests/Querying/OkapiBm25ScorerTests.cs +++ b/test/Lifti.Tests/Querying/OkapiBm25ScorerTests.cs @@ -1,7 +1,8 @@ using FluentAssertions; using Lifti.Querying; using Lifti.Tests.Fakes; -using System.Collections.Immutable; +using System; +using System.Collections.Generic; using System.Linq; using Xunit; @@ -11,54 +12,97 @@ public class OkapiBm25ScorerTests : QueryTestBase { private const double expectedScore1 = 2.536599214033677D; private const double expectedScore2 = 2.3792189708272073D; - - private readonly OkapiBm25Scorer sut; - private readonly QueryTokenMatch[] tokenMatches; - - public OkapiBm25ScorerTests() - { - var itemStore = new FakeItemStore( + + private static readonly FakeIndexMetadata looseTextIndexMetadata = new( 10, - new IndexStatistics(ImmutableDictionary.Empty.Add(1, 100), 100), // 100 total tokens in 1 field + new IndexStatistics(new Dictionary() { { 1, 100 } }, 100), // 100 total tokens in 1 field Enumerable.Range(0, 10) - .Select(id => (id, new ItemMetadata(id, id, new DocumentStatistics(1, id * 3)))) - .ToArray()); // Each item will have (id * 3) tokens in it - - this.sut = new OkapiBm25Scorer(1.2D, 0.75D, itemStore); - - this.tokenMatches = new[] -{ - new QueryTokenMatch(1, new[] { FieldMatch(1, 3, 6) }), - new QueryTokenMatch(3, new[] { FieldMatch(1, 8, 2, 5) }) - }; - } + .Select(id => (id, DocumentMetadata.ForLooseText(id, id, new DocumentStatistics(1, id * 3)))) + .ToArray(), // Each item will have (id * 3) tokens in it + Array.Empty<(byte, Func)>()); + + private static readonly FakeIndexMetadata objectTextIndexMetadata = new( + 10, + new IndexStatistics(new Dictionary() { { 1, 100 } }, 100), // 100 total tokens in 1 field + Enumerable.Range(0, 10) + .Select(id => (id, DocumentMetadata.ForObject( + (byte)((id % 3) + 1), // Each item will be assigned to object type 1, 2, or 3 + id, + id, + new DocumentStatistics(1, id * 3), + null, + null))) + .ToArray(), // Each item will have (id * 3) tokens in it + new (byte, Func)[] + { + (1, (DocumentMetadata metadata) => 10D), // 10x score boost for object type 1 + (2, (DocumentMetadata metadata) => 1D), // No field score boost for object type 2 + (3, (DocumentMetadata metadata) => 1D) // No field score boost for object type 3 + }); [Fact] public void VerifyScoreWithoutWeighting() - { - var results = this.sut.Score(tokenMatches, 1D); - - results.Should().BeEquivalentTo( - new[] - { - new ScoredToken(1, new[] { ScoredFieldMatch(expectedScore1, 1, 3, 6) }), - new ScoredToken(3, new[] { ScoredFieldMatch(expectedScore2, 1, 8, 2, 5) }) - }, - o => o.Using(ctx => ctx.Subject.Should().BeApproximately(ctx.Expectation, 0.00001D)).When(i => i.RuntimeType == typeof(double))); - } - + { + var sut = CreateSut(looseTextIndexMetadata); + VerifyScore(sut, 2, 1, 1, TokenLocations(3, 6), 1D, expectedScore1); + VerifyScore(sut, 2, 3, 1, TokenLocations(8, 2, 5), 1D, expectedScore2); + } + [Fact] public void VerifyScoreWithWeighting() - { - var results = this.sut.Score(tokenMatches, 0.5D); - - results.Should().BeEquivalentTo( - new[] - { - new ScoredToken(1, new[] { ScoredFieldMatch(expectedScore1 / 2D, 1, 3, 6) }), - new ScoredToken(3, new[] { ScoredFieldMatch(expectedScore2 / 2D, 1, 8, 2, 5) }) - }, - o => o.Using(ctx => ctx.Subject.Should().BeApproximately(ctx.Expectation, 0.00001D)).When(i => i.RuntimeType == typeof(double))); + { + var sut = CreateSut(looseTextIndexMetadata); + VerifyScore(sut, 2, 1, 1, TokenLocations(3, 6), 0.5D, expectedScore1 / 2); + VerifyScore(sut, 2, 3, 1, TokenLocations(8, 2, 5), 0.5D, expectedScore2 / 2); + } + + [Fact] + public void VerifyScoreWithFieldWeighting() + { + var sut = CreateSut(looseTextIndexMetadata, new FakeFieldScoreBoostProvider((1, 10D))); + + // Results are calculated with a field score boost of 10, but a multiplier of 0.5D, so the resulting boost is 5 + VerifyScore(sut, 2, 1, 1, TokenLocations(3, 6), 0.5D, expectedScore1 * 5); + VerifyScore(sut, 2, 3, 1, TokenLocations(8, 2, 5), 0.5D, expectedScore2 * 5); + } + + [Fact] + public void VerifyScoreWithObjectTypeWeighting() + { + var sut = CreateSut(objectTextIndexMetadata, new FakeFieldScoreBoostProvider()); + + // Document 1 has an object type of 2 + VerifyScore(sut, 2, 1, 1, TokenLocations(3, 6), 1D, expectedScore1); + // Document 3 has an object type of 1, so gets a 10x boost + VerifyScore(sut, 2, 3, 1, TokenLocations(8, 2, 5), 1D, expectedScore2 * 10); + } + + private static void VerifyScore( + OkapiBm25Scorer sut, + int totalMatchedDocuments, + int documentId, + byte fieldId, + IReadOnlyList tokenLocations, + double weighting, + double expectedScore) + { + var result = sut.CalculateScore( + totalMatchedDocuments, + documentId, + fieldId, + tokenLocations, + weighting); + + result.Should().BeApproximately(expectedScore, 0.00001D); + } + + private static OkapiBm25Scorer CreateSut(FakeIndexMetadata itemStore, FakeFieldScoreBoostProvider? fieldScoreBoostProvider = null) + { + return new OkapiBm25Scorer( + 1.2D, + 0.75D, + itemStore, + fieldScoreBoostProvider ?? new FakeFieldScoreBoostProvider((2, 10D))); } } -} +} \ No newline at end of file diff --git a/test/Lifti.Tests/Querying/PrecedingIntersectMergerTests.cs b/test/Lifti.Tests/Querying/PrecedingIntersectMergerTests.cs index 37d6fd89..79588049 100644 --- a/test/Lifti.Tests/Querying/PrecedingIntersectMergerTests.cs +++ b/test/Lifti.Tests/Querying/PrecedingIntersectMergerTests.cs @@ -6,8 +6,8 @@ namespace Lifti.Tests.Querying { public class PrecedingIntersectMergerTests : QueryTestBase { - private static readonly ScoredToken[] expectedResults = new[] - { + private static readonly ScoredToken[] expectedResults = + [ ScoredToken( 7, ScoredFieldMatch(5D, 1, 34, 35, 99, 100), @@ -15,7 +15,7 @@ public class PrecedingIntersectMergerTests : QueryTestBase ScoredToken( 8, ScoredFieldMatch(10D, 2, 80, 85)) - }; + ]; [Fact] public void ForMatchingItemsAndFields_ShouldOnlyReturnWordsWhereTheEarliestLeftWordIsBeforeTheWordsOnTheRight() diff --git a/test/Lifti.Tests/Querying/QueryParserTests.cs b/test/Lifti.Tests/Querying/QueryParserTests.cs index f1671105..6a33fdfb 100644 --- a/test/Lifti.Tests/Querying/QueryParserTests.cs +++ b/test/Lifti.Tests/Querying/QueryParserTests.cs @@ -33,8 +33,8 @@ public QueryParserTests() var nullFieldReader = (TestObject x, CancellationToken token) => new ValueTask>(Array.Empty()); this.fieldLookup = new FakeIndexedFieldLookup( - ("testfield", IndexedFieldDetails.Static(testFieldId, "testfield", nullFieldReader, textExtractor, this.field1Tokenizer, thesaurus)), - ("otherfield", IndexedFieldDetails.Static(otherFieldId, "otherfield", nullFieldReader, textExtractor, this.field2Tokenizer, thesaurus)) + ("testfield", IndexedFieldDetails.Static(testFieldId, "testfield", nullFieldReader, textExtractor, this.field1Tokenizer, thesaurus, 1D)), + ("otherfield", IndexedFieldDetails.Static(otherFieldId, "otherfield", nullFieldReader, textExtractor, this.field2Tokenizer, thesaurus, 1D)) ); } @@ -54,6 +54,18 @@ public void ParsingTwoWordsWithNoOperator_WithOrOperatorAsDefault_ShouldComposeW VerifyResult(result, expectedQuery); } + [Fact] + public void ParsingWordsWithScoreBoost_ShouldApplyScoreBoostToQueryParts() + { + var result = this.Parse("wordone^2 ?wordtwo^3 te*^5"); + var expectedQuery = new AndQueryOperator( + new AndQueryOperator( + new ExactWordQueryPart("wordone", 2), + new FuzzyMatchQueryPart("wordtwo", scoreBoost: 3)), + new WildcardQueryPart(new[] { WildcardQueryFragment.CreateText("te"), WildcardQueryFragment.MultiCharacter }, 5)); + VerifyResult(result, expectedQuery); + } + [Fact] public void ParsingTwoWordsWithNoOperator_ShouldComposeWithAndOperator() { diff --git a/test/Lifti.Tests/Querying/QueryParts/AdjacentWordsQueryOperatorTests.cs b/test/Lifti.Tests/Querying/QueryParts/AdjacentWordsQueryOperatorTests.cs index d8028f6c..b4577004 100644 --- a/test/Lifti.Tests/Querying/QueryParts/AdjacentWordsQueryOperatorTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/AdjacentWordsQueryOperatorTests.cs @@ -36,11 +36,11 @@ public void ShouldOnlyReturnMatchesForAppropriateField() { ScoredToken( 7, - ScoredFieldMatch(9D, 1, CompositeMatch(8, 9, 10))), + ScoredFieldMatch(9D, 1, CompositeTokenLocation(8, 9, 10))), ScoredToken( 8, - ScoredFieldMatch(12D, 1, CompositeMatch(101, 102, 103)), - ScoredFieldMatch(306D, 2, CompositeMatch(8, 9, 10))) + ScoredFieldMatch(12D, 1, CompositeTokenLocation(101, 102, 103)), + ScoredFieldMatch(306D, 2, CompositeTokenLocation(8, 9, 10))) }, config => config.AllowingInfiniteRecursion()); } @@ -60,6 +60,14 @@ public void ShouldNotCombineSameTokensTogether() // The first and second query parts should not combine together results.Matches.Should().BeEmpty(); + } + + [Fact] + public void CalculateWeighting_ShouldReturnWeightOfFirstDividedByPartCount() + { + var op = new AdjacentWordsQueryOperator(new[] { new FakeQueryPart(9D), new FakeQueryPart(13D), new FakeQueryPart(33D) }); + + op.CalculateWeighting(() => new FakeIndexNavigator()).Should().Be(3D); } } } diff --git a/test/Lifti.Tests/Querying/QueryParts/AndQueryOperatorTests.cs b/test/Lifti.Tests/Querying/QueryParts/AndQueryOperatorTests.cs index 24f69b0b..8a89738f 100644 --- a/test/Lifti.Tests/Querying/QueryParts/AndQueryOperatorTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/AndQueryOperatorTests.cs @@ -18,7 +18,7 @@ public void ShouldOnlyReturnItemsAppearingOnBothSides() var result = op.Evaluate(() => new FakeIndexNavigator(), QueryContext.Empty); - result.Matches.Select(m => m.ItemId).Should().BeEquivalentTo( + result.Matches.Select(m => m.DocumentId).Should().BeEquivalentTo( new[] { 5, 9 }); } @@ -59,5 +59,13 @@ public void CombineAll_WithMultipleElements_ShouldReturnElementCombinedWithOrSta op.ToString().Should().Be("test & test2 & test3"); } + + [Fact] + public void CalculateWeighting_ShouldReturnSmallestWeightingOfParts() + { + var op = new AndQueryOperator(new FakeQueryPart(2D), new FakeQueryPart(3D)); + + op.CalculateWeighting(() => new FakeIndexNavigator()).Should().Be(2D); + } } } diff --git a/test/Lifti.Tests/Querying/QueryParts/ExactWordQueryPartTests.cs b/test/Lifti.Tests/Querying/QueryParts/ExactWordQueryPartTests.cs index b1a84446..109c2af7 100644 --- a/test/Lifti.Tests/Querying/QueryParts/ExactWordQueryPartTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/ExactWordQueryPartTests.cs @@ -17,8 +17,23 @@ public void Evaluating_ShouldNavigateThroughTextAndGetAllDirectMatches() var actual = part.Evaluate(() => navigator, QueryContext.Empty); actual.Should().BeEquivalentTo(navigator.ExpectedExactMatches); - navigator.NavigatedStrings.Should().BeEquivalentTo(new[] { "test" }); - navigator.NavigatedCharacters.Should().BeEmpty(); + navigator.NavigatedStrings.Should().BeEquivalentTo(["test"]); + navigator.NavigatedCharacters.Should().BeEmpty(); + navigator.ProvidedWeightings.Should().BeEquivalentTo(new[] { 1D }); + } + + [Fact] + public void Evaluating_ShouldPassThroughScoreBoostToNavigator() + { + var part = new ExactWordQueryPart("test", 5D); + var navigator = FakeIndexNavigator.ReturningExactMatches(1, 2); + + var actual = part.Evaluate(() => navigator, QueryContext.Empty); + + actual.Should().BeEquivalentTo(navigator.ExpectedExactMatches); + navigator.NavigatedStrings.Should().BeEquivalentTo(["test"]); + navigator.NavigatedCharacters.Should().BeEmpty(); + navigator.ProvidedWeightings.Should().BeEquivalentTo(new[] { 5D }); } [Fact] @@ -26,12 +41,56 @@ public void ShouldApplyQueryContextToResults() { var part = new ExactWordQueryPart("test"); var navigator = FakeIndexNavigator.ReturningExactMatches(1, 2); + var queryContext = new QueryContext(); - var contextResults = new IntermediateQueryResult(); - var queryContext = new FakeQueryContext(contextResults); - var result = part.Evaluate(() => new FakeIndexNavigator(), queryContext); + var result = part.Evaluate(() => navigator, queryContext); - result.Should().Be(contextResults); + navigator.ProvidedQueryContexts.Should().BeEquivalentTo(new[] { queryContext }); + } + + [Fact] + public void ToString_ShouldReturnCorrectRepresentation() + { + var part = new ExactWordQueryPart("test"); + part.ToString().Should().Be("test"); + } + + [Fact] + public void ToString_WithScoreBoost_ShouldReturnCorrectRepresentation() + { + var part = new ExactWordQueryPart("test", 5.123); + part.ToString().Should().Be("test^5.123"); + } + + [Fact] + public void CalculateWeighting_ShouldReturnWeightingBasedOnNumberOfMatchedDocuments() + { + var navigator = FakeIndexNavigator.ReturningExactMatches(1, 2); + navigator.Snapshot = new FakeIndexSnapshot(new FakeIndexMetadata(10)); + var part = new ExactWordQueryPart("test", 5.123); + + // 2 matches out of 10 documents results in a weighting of 0.2 + part.CalculateWeighting(() => navigator).Should().Be(0.2D); + + navigator.Snapshot = new FakeIndexSnapshot(new FakeIndexMetadata(2)); + part = new ExactWordQueryPart("test", 5.123); + + // 2 matches out of 2 documents results in a weighting of 1 + part.CalculateWeighting(() => navigator).Should().Be(1D); + } + + [Fact] + public void CalculateWeighting_ShouldCacheWeighting() + { + var navigator = FakeIndexNavigator.ReturningExactMatches(1, 2); + navigator.Snapshot = new FakeIndexSnapshot(new FakeIndexMetadata(10)); + var part = new ExactWordQueryPart("test", 5.123); + part.CalculateWeighting(() => navigator).Should().Be(0.2D); + + // Changing the snapshot is a hacky way of checking that the score is cached - + // if it isn't, the weighting will be recalculated and will be different + navigator.Snapshot = new FakeIndexSnapshot(new FakeIndexMetadata(2)); + part.CalculateWeighting(() => navigator).Should().Be(0.2D); } - } + } } diff --git a/test/Lifti.Tests/Querying/QueryParts/FieldFilterQueryOperatorTests.cs b/test/Lifti.Tests/Querying/QueryParts/FieldFilterQueryOperatorTests.cs index 783f60d6..3a48e062 100644 --- a/test/Lifti.Tests/Querying/QueryParts/FieldFilterQueryOperatorTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/FieldFilterQueryOperatorTests.cs @@ -1,4 +1,5 @@ using FluentAssertions; +using Lifti.Querying; using Lifti.Querying.QueryParts; using Xunit; @@ -7,22 +8,26 @@ namespace Lifti.Tests.Querying.QueryParts public class FieldFilterQueryOperatorTests : QueryTestBase { [Fact] - public void ShouldFilterAllItemResultsToRequiredField() + public void ShouldPassFieldInQueryContext() { - var navigator = FakeIndexNavigator.ReturningExactMatches( - ScoredToken(2, ScoredFieldMatch(1D, 2, 1, 2), ScoredFieldMatch(2D, 4, 1)), - ScoredToken(4, ScoredFieldMatch(3D, 3, 3), ScoredFieldMatch(4D, 4, 44, 99), ScoredFieldMatch(5D, 5, 2))); + var navigator = new FakeIndexNavigator(); var sut = new FieldFilterQueryOperator("Test", 4, new ExactWordQueryPart("x")); var results = sut.Evaluate(() => navigator, QueryContext.Empty); - results.Matches.Should().BeEquivalentTo( - new[] - { - ScoredToken(2, ScoredFieldMatch(2D, 4, 1)), - ScoredToken(4, ScoredFieldMatch(4D, 4, 44, 99)) - }); + navigator.ProvidedQueryContexts.Should().BeEquivalentTo( + [ + new QueryContext(4) + ]); + } + + [Fact] + public void CalculateWeighting_ShouldReturnHalfOfChildPartWeighting() + { + var op = new FieldFilterQueryOperator("Field", 1, new FakeQueryPart(4D)); + + op.CalculateWeighting(() => new FakeIndexNavigator()).Should().Be(2D); } } } diff --git a/test/Lifti.Tests/Querying/QueryParts/FuzzyWordQueryPartTests.cs b/test/Lifti.Tests/Querying/QueryParts/FuzzyWordQueryPartTests.cs index 557d7ac3..2609952b 100644 --- a/test/Lifti.Tests/Querying/QueryParts/FuzzyWordQueryPartTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/FuzzyWordQueryPartTests.cs @@ -13,11 +13,11 @@ namespace Lifti.Tests.Querying.QueryParts { public class FuzzyWordQueryPartTestsFixture : IAsyncLifetime { - public string[] IndexedText { get; } = { + public string[] IndexedText { get; } = [ "Some sample comics text to match on", "Samples sounds like a solid plan to me", "Odius ogres obey Mobius" - }; + ]; public FullTextIndex Index { get; private set; } = null!; @@ -44,7 +44,7 @@ public async Task InitializeAsync() public class FuzzyWordQueryPartTests : IClassFixture { - + private static readonly Regex expectedMatchRegex = new Regex(@"(^|\s)*((?[^\s]*)($|\s))+", RegexOptions.Compiled); private readonly ITestOutputHelper outputHelper; private readonly FuzzyWordQueryPartTestsFixture fixture; @@ -102,12 +102,6 @@ public void WhenMatchEndsOnExactMatch_PotentialDeletionsShouldStillBeReturned() this.RunTest("SAMPE", 2, 1, "some", "sample", "samples"); } - [Fact] - public void ToString_WithDefaultParameters_ShouldReturnSimpleExpression() - { - new FuzzyMatchQueryPart("Test").ToString().Should().Be("?Test"); - } - [Fact] public async Task WithFieldFilteredInContext_ShouldOnlyMatchOnRequestedField() { @@ -125,13 +119,51 @@ public async Task WithFieldFilteredInContext_ShouldOnlyMatchOnRequestedField() new TestObject(3, "Item number 3", "Item number three content") }); - var query = new Query(FieldFilterQueryOperator.CreateForField(index.FieldLookup, "title", new FuzzyMatchQueryPart("NUMBE", 1, 1))); + var query = new Query( + FieldFilterQueryOperator.CreateForField( + index.FieldLookup, + "title", + new FuzzyMatchQueryPart("NUMBE", 1, 1))); var results = index.Search(query).ToList(); results.Select(x => x.Key).Should().BeEquivalentTo(new[] { 1, 3 }); } + [Fact] + public void CalculateWeighting_ShouldTakeIntoAccountNumberOfExactlyMatchedDocuments() + { + var part = new FuzzyMatchQueryPart("SOUNDS", 1, 1); + var weight = part.CalculateWeighting(this.fixture.Index.CreateNavigator); + + // With a max distance of 1, it should be 1 + the base exact word score + // Base score would be approx 0.3 because only one document contains the word + weight.Should().BeApproximately(1.3D, 0.1D); + + part = new FuzzyMatchQueryPart("TO", 1, 1); + weight = part.CalculateWeighting(this.fixture.Index.CreateNavigator); + + // The base score here would be approx 0.6 because 2/3 documents contain the text + weight.Should().BeApproximately(1.6D, 0.1D); + } + + [Fact] + public void CalculateWeighting_ShouldBeMoreExpensiveWithHigherMaxEditDistance() + { + var part = new FuzzyMatchQueryPart("SOUNDS", 3, 2); + var weight = part.CalculateWeighting(this.fixture.Index.CreateNavigator); + + // 3 edits, 2 sequential edits = 3 + ((2 - 1) * 2) = 5 + // Add the base exact word score of approx 0.3 + weight.Should().BeApproximately(5.3D, 0.1D); + } + + [Fact] + public void ToString_WithDefaultParameters_ShouldReturnSimpleExpression() + { + new FuzzyMatchQueryPart("Test").ToString().Should().Be("?Test"); + } + [Theory] [InlineData(null, 4, "?,4?Test")] [InlineData(9, null, "?9,?Test")] @@ -142,6 +174,12 @@ public void ToString_WithParameters_ShouldReturnCorrectlyFormattedExpression(int .ToString().Should().Be(expectedOutput); } + [Fact] + public void ToString_WithScoreBoost_ShouldReturnCorrectlyFormattedExpression() + { + new FuzzyMatchQueryPart("Test", 1, 3, 5.123).ToString().Should().Be("?1,3?Test^5.123"); + } + [Fact] public void WhenFuzzyMatchingWord_ScoreShouldBeLessThanExactMatch() { @@ -155,9 +193,18 @@ public void WhenFuzzyMatchingWord_ScoreShouldBeLessThanExactMatch() expectedScoreOrders.Should().BeInDescendingOrder(); } - private double GetScore(string search, ushort maxDistance, ushort maxSequentialEdits) + [Fact] + public void WhenScoreBoosting_ShouldApplyBoostToScore() + { + var baseScore = this.GetScore("SAMPLE", 1, 1); + var boostedScore = this.GetScore("SAMPLE", 1, 1, 2D); + + boostedScore.Should().Be(baseScore * 2D); + } + + private double GetScore(string search, ushort maxDistance, ushort maxSequentialEdits, double? scoreBoost = null) { - var part = new FuzzyMatchQueryPart(search, maxDistance, maxSequentialEdits); + var part = new FuzzyMatchQueryPart(search, maxDistance, maxSequentialEdits, scoreBoost); var results = this.fixture.Index.Search(new Query(part)).ToList(); return results.Where(r => r.FieldMatches.Any(m => m.Locations.Any(l => l.TokenIndex == 1)) && r.Key == 0) .Select(s => s.Score) @@ -167,7 +214,6 @@ private double GetScore(string search, ushort maxDistance, ushort maxSequentialE private void RunTest(string word, ushort maxEditDistance, ushort maxSequentialEdits, params string[] expectedWords) { var expectedWordLookup = expectedWords.ToHashSet(StringComparer.OrdinalIgnoreCase); - var expectedMatchRegex = new Regex(@"(^|\s)*((?[^\s]*)($|\s))+"); var expectedResultCaptures = this.fixture.IndexedText.Select( (text, id) => ( @@ -187,7 +233,7 @@ private void RunTest(string word, ushort maxEditDistance, ushort maxSequentialEd var expectedResults = expectedResultCaptures.Select( r => Tuple.Create( - r.id, + r.id, r.Item2.Select( x => new TokenLocation(x.index, x.startLocation, (ushort)x.Value.Length)).ToList() )) diff --git a/test/Lifti.Tests/Querying/QueryParts/NearQueryOperatorTests.cs b/test/Lifti.Tests/Querying/QueryParts/NearQueryOperatorTests.cs index e2d16021..94e309de 100644 --- a/test/Lifti.Tests/Querying/QueryParts/NearQueryOperatorTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/NearQueryOperatorTests.cs @@ -1,4 +1,5 @@ using FluentAssertions; +using Lifti.Querying; using Lifti.Querying.QueryParts; using Xunit; @@ -29,12 +30,20 @@ public void ShouldOnlyReturnMatchesForAppropriateField() { ScoredToken( 7, - ScoredFieldMatch(4D, 1, CompositeMatch(8, 6), CompositeMatch(100, 102))), + ScoredFieldMatch(4D, 1, CompositeTokenLocation(8, 6), CompositeTokenLocation(100, 102))), ScoredToken( 8, - ScoredFieldMatch(6D, 1, CompositeMatch(101, 106)), - ScoredFieldMatch(13D, 2, CompositeMatch(8, 3), CompositeMatch(104, 105))) + ScoredFieldMatch(6D, 1, CompositeTokenLocation(101, 106)), + ScoredFieldMatch(13D, 2, CompositeTokenLocation(8, 3), CompositeTokenLocation(104, 105))) }); + } + + [Fact] + public void CalculateWeighting_ShouldReturnSmallestWeightingOfParts() + { + var op = new NearQueryOperator(new FakeQueryPart(3D), new FakeQueryPart(2D)); + + op.CalculateWeighting(() => new FakeIndexNavigator()).Should().Be(2D); } } } diff --git a/test/Lifti.Tests/Querying/QueryParts/OrQueryOperatorTests.cs b/test/Lifti.Tests/Querying/QueryParts/OrQueryOperatorTests.cs index 40e26a13..29220d1b 100644 --- a/test/Lifti.Tests/Querying/QueryParts/OrQueryOperatorTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/OrQueryOperatorTests.cs @@ -18,7 +18,7 @@ public void ShouldReturnItemsAppearingOnBothSides() var result = op.Evaluate(() => new FakeIndexNavigator(), QueryContext.Empty); - result.Matches.Select(m => m.ItemId).Should().BeEquivalentTo( + result.Matches.Select(m => m.DocumentId).Should().BeEquivalentTo( new[] { 2, 5, 8, 9 }); } @@ -27,11 +27,11 @@ public void ShouldMergeAllFieldMatchesInCorrectWordOrder() { var op = new OrQueryOperator( new FakeQueryPart( - ScoredToken(4, ScoredFieldMatch(1D, 1, 5, 6) ), + ScoredToken(4, ScoredFieldMatch(1D, 1, 5, 6)), ScoredToken(5, ScoredFieldMatch(2D, 1, 9, 11))), new FakeQueryPart( ScoredToken(5, ScoredFieldMatch(3D, 1, 1, 103), ScoredFieldMatch(9D, 2, 2, 18)), - ScoredToken(7, ScoredFieldMatch(4D, 1, 18) ))); + ScoredToken(7, ScoredFieldMatch(4D, 1, 18)))); var result = op.Evaluate(() => new FakeIndexNavigator(), QueryContext.Empty); @@ -64,5 +64,13 @@ public void CombineAll_WithMultipleElements_ShouldReturnElementCombinedWithOrSta op.ToString().Should().Be("test | test2 | test3"); } + + [Fact] + public void CalculateWeighting_ShouldReturnSumOfBothPartsWeightings() + { + var op = new OrQueryOperator(new FakeQueryPart(2D), new FakeQueryPart(3D)); + + op.CalculateWeighting(() => new FakeIndexNavigator()).Should().Be(5D); + } } } diff --git a/test/Lifti.Tests/Querying/QueryParts/PrecedingNearQueryOperatorTests.cs b/test/Lifti.Tests/Querying/QueryParts/PrecedingNearQueryOperatorTests.cs index 3ebc6590..6562c5bc 100644 --- a/test/Lifti.Tests/Querying/QueryParts/PrecedingNearQueryOperatorTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/PrecedingNearQueryOperatorTests.cs @@ -1,4 +1,5 @@ using FluentAssertions; +using Lifti.Querying; using Lifti.Querying.QueryParts; using System.Linq; using System.Threading.Tasks; @@ -32,11 +33,11 @@ public void ShouldOnlyReturnMatchesForAppropriateField() { ScoredToken( 7, - ScoredFieldMatch(4D, 1, CompositeMatch(100, 102))), + ScoredFieldMatch(4D, 1, CompositeTokenLocation(100, 102))), ScoredToken( 8, - ScoredFieldMatch(6D, 1, CompositeMatch(101, 106)), - ScoredFieldMatch(13D, 2, CompositeMatch(104, 105))) + ScoredFieldMatch(6D, 1, CompositeTokenLocation(101, 106)), + ScoredFieldMatch(13D, 2, CompositeTokenLocation(104, 105))) }); } @@ -58,6 +59,14 @@ public async Task ShouldOnlyReturnResultsWhereFirstWordIsBeforeSecond() new TokenLocation(11, 67, 8), new TokenLocation(12, 76, 7) }); + } + + [Fact] + public void CalculateWeighting_ShouldReturnSmallestWeightingOfParts() + { + var op = new PrecedingNearQueryOperator(new FakeQueryPart(3D), new FakeQueryPart(2D)); + + op.CalculateWeighting(() => new FakeIndexNavigator()).Should().Be(2D); } protected static async Task> CreateTestIndexAsync() diff --git a/test/Lifti.Tests/Querying/QueryParts/PrecedingQueryOperatorTests.cs b/test/Lifti.Tests/Querying/QueryParts/PrecedingQueryOperatorTests.cs index a756fe5c..c66982ce 100644 --- a/test/Lifti.Tests/Querying/QueryParts/PrecedingQueryOperatorTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/PrecedingQueryOperatorTests.cs @@ -31,12 +31,20 @@ public void ShouldOnlyReturnMatchesForAppropriateField() { ScoredToken( 7, - ScoredFieldMatch(4D, 1, TokenMatch(8), TokenMatch(14), TokenMatch(20), TokenMatch(100), TokenMatch(102))), + ScoredFieldMatch(4D, 1, TokenLocation(8), TokenLocation(14), TokenLocation(20), TokenLocation(100), TokenLocation(102))), ScoredToken( 8, - ScoredFieldMatch(6D, 1, TokenMatch(11), TokenMatch(101), TokenMatch(106)), - ScoredFieldMatch(13D, 2, TokenMatch(8), TokenMatch(104), TokenMatch(105))) + ScoredFieldMatch(6D, 1, TokenLocation(11), TokenLocation(101), TokenLocation(106)), + ScoredFieldMatch(13D, 2, TokenLocation(8), TokenLocation(104), TokenLocation(105))) }); + } + + [Fact] + public void CalculateWeighting_ShouldReturnSmallestWeightingOfParts() + { + var op = new PrecedingQueryOperator(new FakeQueryPart(3D), new FakeQueryPart(2D)); + + op.CalculateWeighting(() => new FakeIndexNavigator()).Should().Be(2D); } } } diff --git a/test/Lifti.Tests/Querying/QueryParts/WildcardQueryPartTests.cs b/test/Lifti.Tests/Querying/QueryParts/WildcardQueryPartTests.cs index 089dfbd6..ffed635d 100644 --- a/test/Lifti.Tests/Querying/QueryParts/WildcardQueryPartTests.cs +++ b/test/Lifti.Tests/Querying/QueryParts/WildcardQueryPartTests.cs @@ -28,7 +28,7 @@ public async Task InitializeAsync() [Fact] public void Evaluating_WithSingleTextFragment_ShouldReturnOnlyExactMatches() { - var part = new WildcardQueryPart(new[] { WildcardQueryFragment.CreateText("ALSO") }); + var part = new WildcardQueryPart([WildcardQueryFragment.CreateText("ALSO")]); var results = this.index.Search(new Query(part)).ToList(); results.Should().HaveCount(1); @@ -38,17 +38,28 @@ public void Evaluating_WithSingleTextFragment_ShouldReturnOnlyExactMatches() { new TokenLocation(2, 16, 4) })); + } + + [Fact] + public void Evaluating_WithSScoreBoost_ShouldApplyBoostToResultingScore() + { + var part = new WildcardQueryPart([WildcardQueryFragment.CreateText("ALSO")]); + var unboostedScore = this.index.Search(new Query(part)).ToList()[0].Score; + part = new WildcardQueryPart(new[] { WildcardQueryFragment.CreateText("ALSO") }, 2D); + var boostedScore = this.index.Search(new Query(part)).ToList()[0].Score; + + boostedScore.Should().Be(unboostedScore * 2D); } [Fact] public void Evaluating_WithSingleCharacterReplacement_ShouldReturnCorrectResults() { - var part = new WildcardQueryPart(new[] - { + var part = new WildcardQueryPart( + [ WildcardQueryFragment.CreateText("TH"), WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.CreateText("S") - }); + ]); var results = this.index.Search(new Query(part)).ToList(); @@ -70,12 +81,12 @@ public void Evaluating_WithSingleCharacterReplacement_ShouldReturnCorrectResults [Fact] public void Evaluating_WithTerminatingSingleCharacterReplacement_ShouldReturnCorrectResults() { - var part = new WildcardQueryPart(new[] - { + var part = new WildcardQueryPart( + [ WildcardQueryFragment.CreateText("TH"), WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.SingleCharacter - }); + ]); var results = this.index.Search(new Query(part)).ToList(); @@ -97,11 +108,11 @@ public void Evaluating_WithTerminatingSingleCharacterReplacement_ShouldReturnCor [Fact] public void Evaluating_WithTerminatingMultiCharacterReplacement_ShouldReturnAllMatchesStartingWithText() { - var part = new WildcardQueryPart(new[] - { + var part = new WildcardQueryPart( + [ WildcardQueryFragment.CreateText("A"), WildcardQueryFragment.MultiCharacter - }); + ]); var results = this.index.Search(new Query(part)).ToList(); @@ -127,11 +138,11 @@ public void Evaluating_WithTerminatingMultiCharacterReplacement_ShouldReturnAllM [Fact] public void Evaluating_WithLeadingMultiCharacterReplacement_ShouldReturnAllMatchesEndingWithText() { - var part = new WildcardQueryPart(new[] - { + var part = new WildcardQueryPart( + [ WildcardQueryFragment.MultiCharacter, WildcardQueryFragment.CreateText("ES") - }); + ]); var results = this.index.Search(new Query(part)).ToList(); @@ -154,11 +165,11 @@ public void Evaluating_WithLeadingMultiCharacterReplacement_ShouldReturnAllMatch [Fact] public void Evaluating_WithSequenceOfSingleCharacterWildcards_ShouldOnlyMatchWordsWithMatchingCharacterCounts() { - var part = new WildcardQueryPart(new[] - { + var part = new WildcardQueryPart( + [ WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.SingleCharacter - }); + ]); var results = this.index.Search(new Query(part)).ToList(); @@ -174,8 +185,8 @@ public void Evaluating_WithSequenceOfSingleCharacterWildcards_ShouldOnlyMatchWor [Fact] public void Evaluating_WithSequenceOfSingleCharacterWildcardsFollowedByMultiCharacterWildcard_ShouldMatchWordsWithAtLeastCharacterCount() { - var part = new WildcardQueryPart(new[] - { + var part = new WildcardQueryPart( + [ WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.SingleCharacter, @@ -183,7 +194,7 @@ public void Evaluating_WithSequenceOfSingleCharacterWildcardsFollowedByMultiChar WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.MultiCharacter - }); + ]); var results = this.index.Search(new Query(part)).ToList(); @@ -207,13 +218,13 @@ public void Evaluating_WithSequenceOfSingleCharacterWildcardsFollowedByMultiChar [Fact] public void Evaluating_WithConsecutiveSingleCharacterReplacement_ShouldReturnCorrectResults() { - var part = new WildcardQueryPart(new[] - { + var part = new WildcardQueryPart( + [ WildcardQueryFragment.CreateText("T"), WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.SingleCharacter, WildcardQueryFragment.CreateText("S") - }); + ]); var results = this.index.Search(new Query(part)).ToList(); @@ -260,6 +271,71 @@ public async Task WithFieldFilteredInContext_ShouldOnlyMatchOnRequestedField() results.Select(x => x.Key).Should().BeEquivalentTo(new[] { 1, 3 }); } + [Fact] + public void CalculateWeighting_ShouldHeavilyPenaliseFullIndexSearch() + { + var part = new WildcardQueryPart(WildcardQueryFragment.MultiCharacter); + var weight = part.CalculateWeighting(this.index.CreateNavigator); + + weight.Should().Be(1000D); + } + + [Fact] + public void CalculateWeighting_ShouldCalculateBaseScoreOfFragments() + { + var part = new WildcardQueryPart( + WildcardQueryFragment.SingleCharacter, + WildcardQueryFragment.MultiCharacter, + WildcardQueryFragment.CreateText("H"), + WildcardQueryFragment.SingleCharacter, + WildcardQueryFragment.MultiCharacter, + WildcardQueryFragment.CreateText("A")); + + var weight = part.CalculateWeighting(this.index.CreateNavigator); + + // Two multi, two single, two text + // 2 * 4 + 2 * 1 + 2 * 1 = 12 + weight.Should().Be(12D); + } + + [Fact] + public void CalculateWeighting_ShouldPenaliseLeadingMulticharacter() + { + var part = new WildcardQueryPart( + WildcardQueryFragment.MultiCharacter, + WildcardQueryFragment.CreateText("H"), + WildcardQueryFragment.SingleCharacter, + WildcardQueryFragment.SingleCharacter, + WildcardQueryFragment.MultiCharacter, + WildcardQueryFragment.CreateText("A")); + + var weight = part.CalculateWeighting(this.index.CreateNavigator); + + // Two multi, two single, two text + // 2 * 4 + 2 * 1 + 2 * 1 = 12 + // Penalisation of 20% = 14.4 + weight.Should().BeApproximately(14.4D, 0.1D); + } + + [Fact] + public void CalculateWeighting_ShouldBoostLeadingText() + { + var part = new WildcardQueryPart( + WildcardQueryFragment.CreateText("H"), + WildcardQueryFragment.SingleCharacter, + WildcardQueryFragment.MultiCharacter, + WildcardQueryFragment.CreateText("A"), + WildcardQueryFragment.SingleCharacter, + WildcardQueryFragment.MultiCharacter); + + var weight = part.CalculateWeighting(this.index.CreateNavigator); + + // Two multi, two single, two text + // 2 * 4 + 2 * 1 + 2 * 1 = 12 + // Boost = 12 * 0.8 = 9.6 + weight.Should().BeApproximately(9.6D, 0.1D); + } + private record TestObject(int Id, string Title, string Content); } } diff --git a/test/Lifti.Tests/Querying/QueryTestBase.cs b/test/Lifti.Tests/Querying/QueryTestBase.cs index a0774246..b4380f82 100644 --- a/test/Lifti.Tests/Querying/QueryTestBase.cs +++ b/test/Lifti.Tests/Querying/QueryTestBase.cs @@ -1,75 +1,76 @@ using Lifti.Querying; +using System; +using System.Collections.Generic; using System.Linq; namespace Lifti.Tests.Querying { public abstract class QueryTestBase { - protected static CompositeTokenMatchLocation CompositeMatch(int leftWordIndex, int rightWordIndex) + internal static CompositeTokenLocation CompositeTokenLocation(int leftWordIndex, int rightWordIndex) { - return new CompositeTokenMatchLocation(TokenMatch(leftWordIndex), TokenMatch(rightWordIndex)); + return new CompositeTokenLocation( + [.. TokenLocations(leftWordIndex, rightWordIndex)], + Math.Min(leftWordIndex, rightWordIndex), + Math.Max(leftWordIndex, rightWordIndex)); } - protected static CompositeTokenMatchLocation CompositeMatch(params int[] wordIndexes) - { - var match = CompositeMatch(wordIndexes[0], wordIndexes[1]); - - for (var i = 2; i < wordIndexes.Length; i++) - { - match = new CompositeTokenMatchLocation(match, TokenMatch(wordIndexes[i])); - } - - return match; + internal static CompositeTokenLocation CompositeTokenLocation(params int[] wordIndexes) + { + return new CompositeTokenLocation( + [.. TokenLocations(wordIndexes)], + wordIndexes.Min(), + wordIndexes.Max()); } - protected static ScoredFieldMatch ScoredFieldMatch(double score, byte fieldId, params int[] wordIndexes) + internal static ScoredFieldMatch ScoredFieldMatch(double score, byte fieldId, params int[] wordIndexes) { - return new ScoredFieldMatch( + return Lifti.Querying.ScoredFieldMatch.CreateFromPresorted( score, - FieldMatch(fieldId, wordIndexes)); + fieldId, + TokenLocations(wordIndexes)); } - protected static FieldMatch FieldMatch(byte fieldId, params int[] wordIndexes) + internal static List TokenLocations(params int[] wordIndexes) { - return new FieldMatch( - fieldId, - wordIndexes.Select(i => TokenMatch(i))); + return wordIndexes.Select(TokenLocation).ToList(); } - protected static ScoredFieldMatch ScoredFieldMatch(double score, byte fieldId, params (int, int)[] compositeMatches) + internal static ScoredFieldMatch ScoredFieldMatch(double score, byte fieldId, params (int, int)[] compositeMatches) { return ScoredFieldMatch( score, fieldId, - compositeMatches.Select(i => (ITokenLocationMatch)CompositeMatch(i.Item1, i.Item2)).ToArray()); + compositeMatches.Select(i => (ITokenLocation)CompositeTokenLocation(i.Item1, i.Item2)).ToArray()); } - protected static ScoredFieldMatch ScoredFieldMatch(double score, byte fieldId, params ITokenLocationMatch[] compositeMatches) + internal static ScoredFieldMatch ScoredFieldMatch(double score, byte fieldId, params ITokenLocation[] compositeMatches) { - return new ScoredFieldMatch( + return Lifti.Querying.ScoredFieldMatch.CreateFromUnsorted( score, - new FieldMatch(fieldId, compositeMatches)); + fieldId, + [.. compositeMatches]); } - protected static ITokenLocationMatch TokenMatch(int index) + internal static TokenLocation TokenLocation(int index) { - return new SingleTokenLocationMatch(new TokenLocation(index, index, (ushort)index)); + return new TokenLocation(index, index, (ushort)index); } - protected static ITokenLocationMatch SingleTokenLocationMatch(int index, int start, int length) + internal static TokenLocation TokenLocation(int index, int start, int length) { - return new SingleTokenLocationMatch(new TokenLocation(index, start, (ushort)length)); + return new TokenLocation(index, start, (ushort)length); } - protected static IntermediateQueryResult IntermediateQueryResult(params ScoredToken[] matches) + internal static IntermediateQueryResult IntermediateQueryResult(params ScoredToken[] matches) { - return new IntermediateQueryResult(matches); + return new IntermediateQueryResult(matches.ToList(), false); } - protected static ScoredToken ScoredToken(int itemId, params ScoredFieldMatch[] matches) + internal static ScoredToken ScoredToken(int documentId, params ScoredFieldMatch[] matches) { return new ScoredToken( - itemId, + documentId, matches); } } diff --git a/test/Lifti.Tests/Querying/QueryTokenizerTests.cs b/test/Lifti.Tests/Querying/QueryTokenizerTests.cs index 7026f940..8e3b5799 100644 --- a/test/Lifti.Tests/Querying/QueryTokenizerTests.cs +++ b/test/Lifti.Tests/Querying/QueryTokenizerTests.cs @@ -12,6 +12,7 @@ public class QueryTokenizerTests private readonly QueryTokenizer sut; private readonly IIndexTokenizer defaultIndexTokenizer; private readonly IIndexTokenizer fieldIndexTokenizer; + private readonly FakeIndexTokenizer alternativeFieldIndexTokenizer; private readonly IIndexTokenizerProvider tokenizerProvider; public QueryTokenizerTests() @@ -20,7 +21,12 @@ public QueryTokenizerTests() this.defaultIndexTokenizer = new FakeIndexTokenizer(); this.fieldIndexTokenizer = new FakeIndexTokenizer(true); - this.tokenizerProvider = new FakeIndexTokenizerProvider(this.defaultIndexTokenizer, ("test", this.fieldIndexTokenizer)); + this.alternativeFieldIndexTokenizer = new FakeIndexTokenizer(true); + this.tokenizerProvider = new FakeIndexTokenizerProvider( + this.defaultIndexTokenizer, + ("test", this.fieldIndexTokenizer), + ("test field", this.fieldIndexTokenizer), + ("test []", this.alternativeFieldIndexTokenizer)); } [Fact] @@ -40,7 +46,32 @@ public void SingleWordYieldsOneResult() { this.sut.ParseQueryTokens("Testing", this.tokenizerProvider).Should().BeEquivalentTo(new[] { - QueryToken.ForText("Testing", this.defaultIndexTokenizer) + QueryToken.ForText("Testing", this.defaultIndexTokenizer, null) + }); + } + + [Fact] + public void ScoreBoostingOnlyAppliedToRequestedToken() + { + this.sut.ParseQueryTokens("(Testing^2) | Testing", this.tokenizerProvider).Should().BeEquivalentTo(new[] + { + QueryToken.ForOperator(QueryTokenType.OpenBracket), + QueryToken.ForText("Testing", this.defaultIndexTokenizer, 2D), + QueryToken.ForOperator(QueryTokenType.CloseBracket), + QueryToken.ForOperator(QueryTokenType.OrOperator), + QueryToken.ForText("Testing", this.defaultIndexTokenizer, null) + }); + } + + [InlineData("2", 2D)] + [InlineData("100.2927", 100.2927D)] + [InlineData("001.001", 1.001D)] + [Theory] + public void SingleWordWithScoreBoostYieldsOneResult(string textScoreBoost, double expectedScoreBoost) + { + this.sut.ParseQueryTokens($"Testing^{textScoreBoost}", this.tokenizerProvider).Should().BeEquivalentTo(new[] + { + QueryToken.ForText("Testing", this.defaultIndexTokenizer, expectedScoreBoost) }); } @@ -49,9 +80,9 @@ public void FuzzySearchTermsYieldedCorrectly() { this.sut.ParseQueryTokens("?Testing ?1,2?Test ?,?Test", this.tokenizerProvider).Should().BeEquivalentTo(new[] { - QueryToken.ForText("?Testing", this.defaultIndexTokenizer), - QueryToken.ForText("?1,2?Test", this.defaultIndexTokenizer), - QueryToken.ForText("?,?Test", this.defaultIndexTokenizer) + QueryToken.ForText("?Testing", this.defaultIndexTokenizer, null), + QueryToken.ForText("?1,2?Test", this.defaultIndexTokenizer, null), + QueryToken.ForText("?,?Test", this.defaultIndexTokenizer, null) }); } @@ -69,9 +100,21 @@ public void UnexpectedCloseBracketShouldThrowException(string query) public void FuzzySearchTermsSeparatedByCommasYieldedCorrectly() { this.sut.ParseQueryTokens("?Testing,?Test2", this.tokenizerProvider).Should().BeEquivalentTo( - new[] { - QueryToken.ForText("?Testing", this.defaultIndexTokenizer), - QueryToken.ForText("?Test2", this.defaultIndexTokenizer) + new[] + { + QueryToken.ForText("?Testing", this.defaultIndexTokenizer, null), + QueryToken.ForText("?Test2", this.defaultIndexTokenizer, null) + }); + } + + [Fact] + public void EscapedCharacters_ShouldBeReturnedAsTokenText() + { + this.sut.ParseQueryTokens(@"\\hello\=\"" \&\|", this.tokenizerProvider).Should().BeEquivalentTo( + new[] + { + QueryToken.ForText(@"\hello=""", this.defaultIndexTokenizer, null), + QueryToken.ForText("&|", this.defaultIndexTokenizer, null) }); } @@ -86,7 +129,7 @@ public void SingleWordWithSpacePaddingYieldsOneResult() { this.sut.ParseQueryTokens(" \t Testing \t ", this.tokenizerProvider).Should().BeEquivalentTo(new[] { - QueryToken.ForText("Testing", this.defaultIndexTokenizer) + QueryToken.ForText("Testing", this.defaultIndexTokenizer, null) }); } @@ -96,9 +139,9 @@ public void CompositeStringYieldsOneResult() this.sut.ParseQueryTokens("\"Jack be quick\"", this.tokenizerProvider).Should().BeEquivalentTo(new[] { QueryToken.ForOperator(QueryTokenType.BeginAdjacentTextOperator), - QueryToken.ForText("Jack", this.defaultIndexTokenizer), - QueryToken.ForText("be", this.defaultIndexTokenizer), - QueryToken.ForText("quick", this.defaultIndexTokenizer), + QueryToken.ForText("Jack", this.defaultIndexTokenizer, null), + QueryToken.ForText("be", this.defaultIndexTokenizer, null), + QueryToken.ForText("quick", this.defaultIndexTokenizer, null), QueryToken.ForOperator(QueryTokenType.EndAdjacentTextOperator) }); } @@ -109,12 +152,12 @@ public void TwoCompositeStringsYieldsSixResults() this.sut.ParseQueryTokens(@"""First string"" ""Second string""", this.tokenizerProvider).Should().BeEquivalentTo(new[] { QueryToken.ForOperator(QueryTokenType.BeginAdjacentTextOperator), - QueryToken.ForText("First", this.defaultIndexTokenizer), - QueryToken.ForText("string", this.defaultIndexTokenizer), + QueryToken.ForText("First", this.defaultIndexTokenizer, null), + QueryToken.ForText("string", this.defaultIndexTokenizer, null), QueryToken.ForOperator(QueryTokenType.EndAdjacentTextOperator), QueryToken.ForOperator(QueryTokenType.BeginAdjacentTextOperator), - QueryToken.ForText("Second", this.defaultIndexTokenizer), - QueryToken.ForText("string", this.defaultIndexTokenizer), + QueryToken.ForText("Second", this.defaultIndexTokenizer, null), + QueryToken.ForText("string", this.defaultIndexTokenizer, null), QueryToken.ForOperator(QueryTokenType.EndAdjacentTextOperator) }); } @@ -126,25 +169,25 @@ public void TextTokensForField_ShouldHaveCorrectTokenizerAssociatedToThem() { QueryToken.ForFieldFilter("test"), QueryToken.ForOperator(QueryTokenType.BeginAdjacentTextOperator), - QueryToken.ForText("test", this.fieldIndexTokenizer), - QueryToken.ForText("string", this.fieldIndexTokenizer), + QueryToken.ForText("test", this.fieldIndexTokenizer, null), + QueryToken.ForText("string", this.fieldIndexTokenizer, null), QueryToken.ForOperator(QueryTokenType.EndAdjacentTextOperator), - QueryToken.ForText("notfield", this.defaultIndexTokenizer), + QueryToken.ForText("notfield", this.defaultIndexTokenizer, null), QueryToken.ForFieldFilter("test"), - QueryToken.ForText("sim%le", this.fieldIndexTokenizer), - QueryToken.ForText("nofield", this.defaultIndexTokenizer), + QueryToken.ForText("sim%le", this.fieldIndexTokenizer, null), + QueryToken.ForText("nofield", this.defaultIndexTokenizer, null), QueryToken.ForFieldFilter("test"), QueryToken.ForOperator(QueryTokenType.OpenBracket), - QueryToken.ForText("yes*", this.fieldIndexTokenizer), + QueryToken.ForText("yes*", this.fieldIndexTokenizer, null), QueryToken.ForOperator(QueryTokenType.OpenBracket), QueryToken.ForOperator(QueryTokenType.BeginAdjacentTextOperator), - QueryToken.ForText("field", this.fieldIndexTokenizer), - QueryToken.ForText("too", this.fieldIndexTokenizer), + QueryToken.ForText("field", this.fieldIndexTokenizer, null), + QueryToken.ForText("too", this.fieldIndexTokenizer, null), QueryToken.ForOperator(QueryTokenType.BeginAdjacentTextOperator), QueryToken.ForOperator(QueryTokenType.CloseBracket), - QueryToken.ForText("?stillfield", this.fieldIndexTokenizer), + QueryToken.ForText("?stillfield", this.fieldIndexTokenizer, null), QueryToken.ForOperator(QueryTokenType.CloseBracket), - QueryToken.ForText("notinfieldagain", this.defaultIndexTokenizer) + QueryToken.ForText("notinfieldagain", this.defaultIndexTokenizer, null) }, options => options .WithStrictOrdering() @@ -153,6 +196,59 @@ public void TextTokensForField_ShouldHaveCorrectTokenizerAssociatedToThem() .WhenTypeIs()); } + [Fact] + public void BracketedFieldNames_ShouldBeTreatedAsFieldNameWithoutSquareBrackets() + { + this.sut.ParseQueryTokens(@"[test]=foo [test field]=bar", this.tokenizerProvider).Should().BeEquivalentTo(new[] + { + QueryToken.ForFieldFilter("test"), + QueryToken.ForText("foo", this.fieldIndexTokenizer, null), + QueryToken.ForFieldFilter("test field"), + QueryToken.ForText("bar", this.fieldIndexTokenizer, null) + }, + options => options + .WithStrictOrdering() + .Using( + x => x.Subject.IndexTokenizer.Should().BeSameAs(x.Expectation.IndexTokenizer)) + .WhenTypeIs()); + } + + [Fact] + public void BracketedFieldNamesWithEscapedCharacters_ShouldReturnUnescapedCharacters() + { + this.sut.ParseQueryTokens(@"[\t\e\s\t\ \[\]]=foo", this.tokenizerProvider).Should().BeEquivalentTo(new[] + { + QueryToken.ForFieldFilter("test []"), + QueryToken.ForText("foo", this.alternativeFieldIndexTokenizer, null) + }, + options => options + .WithStrictOrdering() + .Using( + x => x.Subject.IndexTokenizer.Should().BeSameAs(x.Expectation.IndexTokenizer)) + .WhenTypeIs()); + } + + [Fact] + public void EmptyBracketedFieldName_ShouldThrowException() + { + Assert.Throws(() => this.sut.ParseQueryTokens("[]=foo", this.tokenizerProvider).ToList()) + .Message.Should().Be("Empty field name encountered"); + } + + [Fact] + public void UnclosedFieldNameBracket_ShouldThrowException() + { + Assert.Throws(() => this.sut.ParseQueryTokens("[test=foo", this.tokenizerProvider).ToList()) + .Message.Should().Be("Unclosed [ encountered"); + } + + [Fact] + public void BracketFieldNameWithoutFollowingQuery_ShouldThrowException() + { + Assert.Throws(() => this.sut.ParseQueryTokens("[test] foo", this.tokenizerProvider).ToList()) + .Message.Should().Be("Expected = after bracketed field name"); + } + [Fact] public void OperatorTokensAreParsedCorrectly() { diff --git a/test/Lifti.Tests/Querying/ScoredFieldMatchTests.cs b/test/Lifti.Tests/Querying/ScoredFieldMatchTests.cs new file mode 100644 index 00000000..46da4001 --- /dev/null +++ b/test/Lifti.Tests/Querying/ScoredFieldMatchTests.cs @@ -0,0 +1,70 @@ +using FluentAssertions; +using Lifti.Querying; +using Xunit; + +namespace Lifti.Tests.Querying +{ + public class FieldMatchTests : QueryTestBase + { + [Fact] + public void MixOfCompositeAndTokenLocations_ShouldReturnUniqueLocationsInOrder() + { + var sut = ScoredFieldMatch( + 1D, + 1, + [CompositeTokenLocation(4, 5, 6, 9), TokenLocation(6), CompositeTokenLocation(5, 6), TokenLocation(7)]); + + sut.GetTokenLocations().Should().BeEquivalentTo( + new[] + { + new TokenLocation(4, 4, 4), + new TokenLocation(5, 5, 5), + new TokenLocation(6, 6, 6), + new TokenLocation(7, 7, 7), + new TokenLocation(9, 9, 9) + }, + options => options.WithStrictOrdering()); + } + + [Fact] + public void TokenLocationsOnly_ShouldReturnExactListProvided() + { + var tokenLocations = TokenLocations(4, 7, 9, 13); + var sut = Lifti.Querying.ScoredFieldMatch.CreateFromPresorted(1D, 1, tokenLocations); + + sut.GetTokenLocations().Should().BeSameAs(tokenLocations); + } + + [Fact] + public void Merging_BothWithTokenLists_ShouldCreateOrderedAndUniqueList() + { + var leftLocations = TokenLocations(4, 7, 9, 13); + var rightLocations = TokenLocations(5, 9, 13, 17); + var merged = Lifti.Querying.ScoredFieldMatch.Merge( + Lifti.Querying.ScoredFieldMatch.CreateFromPresorted(1D, 1, leftLocations), + Lifti.Querying.ScoredFieldMatch.CreateFromPresorted(1D, 1, rightLocations)); + + merged.Locations.Should().BeEquivalentTo( + TokenLocations(4, 7, 5, 9, 13, 17)); + } + + [Fact] + public void Merging_BothWithMixedLists_ShouldCreateOrderedAndUniqueList() + { + var leftLocations = TokenLocations(4, 8); + ITokenLocation[] rightLocations = [CompositeTokenLocation(4, 5, 6, 9), CompositeTokenLocation(24, 30)]; + var merged = Lifti.Querying.ScoredFieldMatch.Merge( + Lifti.Querying.ScoredFieldMatch.CreateFromPresorted(1D, 1, leftLocations), + Lifti.Querying.ScoredFieldMatch.CreateFromPresorted(1D, 1, rightLocations)); + + merged.Locations.Should().BeEquivalentTo( + new ITokenLocation[] + { + TokenLocation(4), + CompositeTokenLocation(4, 5, 6, 9), + TokenLocation(8), + CompositeTokenLocation(24, 30) + }); + } + } +} diff --git a/test/Lifti.Tests/Querying/WildcardQueryPartParserTests.cs b/test/Lifti.Tests/Querying/WildcardQueryPartParserTests.cs index fe711f4d..6fc5b0da 100644 --- a/test/Lifti.Tests/Querying/WildcardQueryPartParserTests.cs +++ b/test/Lifti.Tests/Querying/WildcardQueryPartParserTests.cs @@ -15,13 +15,22 @@ public class WildcardQueryPartParserTests [Fact] public void TextOnly_ShouldReturnFalse() { - RunTest("Foo", null!, false); + RunTest("Foo", null!, expectedResult: false); } [Fact] public void TextWithWildcard_ShouldNormalizeText() { RunTest("Foo*", new WildcardQueryPart(CreateText("FOO"), MultiCharacter)); + } + + [Fact] + public void WithScoreBoost_ShouldReturnScoreBoostInQueryPart() + { + RunTest( + "Foo*", + new WildcardQueryPart(new[] { CreateText("FOO"), MultiCharacter }, 23.3), + 23.3); } [Fact] @@ -54,9 +63,13 @@ public void MixOfFragments_ShouldReturnValidPart() RunTest("%%foo*bar", new WildcardQueryPart(SingleCharacter, SingleCharacter, CreateText("FOO"), MultiCharacter, CreateText("BAR"))); } - private static void RunTest(string text, WildcardQueryPart? expectedQueryPart, bool expectedResult = true) + private static void RunTest(string text, WildcardQueryPart? expectedQueryPart, double? scoreBoost = null, bool expectedResult = true) { - var result = WildcardQueryPartParser.TryParse(text.AsSpan(), new FakeIndexTokenizer(normalizeToUppercase: true), out var part); + var result = WildcardQueryPartParser.TryParse( + text.AsSpan(), + new FakeIndexTokenizer(normalizeToUppercase: true), + scoreBoost, + out var part); result.Should().Be(expectedResult); if (expectedQueryPart != null) diff --git a/test/Lifti.Tests/ScoreBoostValuesTests.cs b/test/Lifti.Tests/ScoreBoostValuesTests.cs new file mode 100644 index 00000000..89a07410 --- /dev/null +++ b/test/Lifti.Tests/ScoreBoostValuesTests.cs @@ -0,0 +1,143 @@ +using FluentAssertions; +using Xunit; + +namespace Lifti.Tests +{ + public class ScoreBoostValuesTests + { + private readonly DoubleScoreBoostValues sut; + + public ScoreBoostValuesTests() + { + this.sut = new DoubleScoreBoostValues(); + } + + [Fact] + public void AddingFirstValue_ShouldInitializeMinAndMax() + { + this.TestAdd(100D, 100D, 100D); + } + + [Fact] + public void AddingMultipleValues_ShouldTrackMinAndMaxCorrectly() + { + this.TestAdd(50D, 50D, 50D); + this.TestAdd(150D, 50D, 150D); + this.TestAdd(25D, 25D, 150D); + this.TestAdd(150D, 25D, 150D); + this.TestAdd(175D, 25D, 175D); + } + + [Fact] + public void RemovingLastValueAndAddingNewValue_ShouldInitializeMinAndMax() + { + this.TestAdd(50D, 50D, 50D); + this.TestAdd(100D, 50D, 100D); + this.TestAdd(25D, 25D, 100D); + + this.TestRemove(100D, 25D, 50D); + this.TestRemove(50D, 25D, 25D); + + // We don't care what the min and max are when we remove the last one + this.sut.Remove(25D); + + // But the values should be reinitialized when we add a new one + this.TestAdd(10D, 10D, 10D); + } + + [Fact] + public void AddingTheSameMaxValueMultipleTimes_ShouldRefCountUsage() + { + this.TestAdd(50D, 50D, 50D); + + // Add 100 twice + this.TestAdd(100D, 50D, 100D); + this.TestAdd(100D, 50D, 100D); + + // The first removal shouldn't change the min/max + this.TestRemove(100D, 50D, 100D); + + // The second removal should + this.TestRemove(100D, 50D, 50D); + } + + [Fact] + public void AddingTheSameMinValueMultipleTimes_ShouldRefCountUsage() + { + this.TestAdd(50D, 50D, 50D); + + // Add 25 twice + this.TestAdd(25D, 25D, 50D); + this.TestAdd(25D, 25D, 50D); + + // The first removal shouldn't change the min/max + this.TestRemove(25D, 25D, 50D); + + // The second removal should + this.TestRemove(25D, 50D, 50D); + } + + [Fact] + public void RemovingAnUntrackedValue_ShouldThrowException() + { + this.TestAdd(50D, 50D, 50D); + this.TestAdd(100D, 50D, 100D); + + Assert.Throws(() => this.sut.Remove(25D)) + .Message.Should().Be("Internal error - unexpected value removal from score boost metadata"); + } + + [Fact] + public void ScoreBoostCalculation_ShouldReturnCorrectValue() + { + this.TestAdd(50D, 50D, 50D); + this.TestAdd(100D, 50D, 100D); + this.TestAdd(25D, 25D, 100D); + + this.sut.CalculateBoost(2D, 25D).Should().Be(1D); + this.sut.CalculateBoost(2D, 100D).Should().Be(2D); + this.sut.CalculateBoost(2D, 62.5).Should().Be(1.5D); + } + + [Fact] + public void AddingItems_AffectsScoreBoostCalculation() + { + this.TestAdd(50D, 50D, 50D); + this.TestAdd(100D, 50D, 100D); + + this.sut.CalculateBoost(2D, 90D).Should().Be(1.8D); + + this.TestAdd(25D, 25D, 100D); + + this.sut.CalculateBoost(2D, 90D).Should().BeApproximately(1.866666D, 0.000001); + } + + [Fact] + public void RemovingItems_AffectsScoreBoostCalculation() + { + this.TestAdd(50D, 50D, 50D); + this.TestAdd(100D, 50D, 100D); + this.TestAdd(25D, 25D, 100D); + + this.sut.CalculateBoost(2D, 90D).Should().BeApproximately(1.866666D, 0.000001); + + this.TestRemove(25D, 50D, 100D); + + this.sut.CalculateBoost(2D, 90D).Should().Be(1.8D); + } + + private void TestAdd(double value, double expectedMin, double expectedMax) + { + this.sut.Add(value); + this.sut.Minimum.Should().Be(expectedMin); + this.sut.Maximum.Should().Be(expectedMax); + } + + private void TestRemove(double value, double expectedMin, double expectedMax) + { + this.sut.Remove(value); + this.sut.Minimum.Should().Be(expectedMin); + this.sut.Maximum.Should().Be(expectedMax); + } + } +} diff --git a/test/Lifti.Tests/SearchResultsTests.cs b/test/Lifti.Tests/SearchResultsTests.cs index 1a31c9b4..83d3484b 100644 --- a/test/Lifti.Tests/SearchResultsTests.cs +++ b/test/Lifti.Tests/SearchResultsTests.cs @@ -163,39 +163,39 @@ public async Task CreateMatchPhrasesAsync_ForDefaultFieldTextLoadedAsync_Cancell async () => await this.sut.CreateMatchPhrasesAsync(async (x, ct) => await Task.Run(() => this.defaultFieldTestData[x], ct), cts.Token)); } - private void VerifyDefaultFieldPhrases(IEnumerable> phrases) + private void VerifyDefaultFieldPhrases(IEnumerable> phrases) { phrases.Should().BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( this.sut.Single(x => x.Key == 101), new[] { new FieldPhrases(IndexedFieldLookup.DefaultFieldName, "quick brown fox") }), - new ItemPhrases( + new DocumentPhrases( this.sut.Single(x => x.Key == 102), new[] { new FieldPhrases(IndexedFieldLookup.DefaultFieldName, "brown") }), - new ItemPhrases( + new DocumentPhrases( this.sut.Single(x => x.Key == 103), new[] { new FieldPhrases(IndexedFieldLookup.DefaultFieldName, "quick fox", "brown") }) }); } - private void VerifyObjectResults(Dictionary sourceItems, string expectedFieldName, IEnumerable> phrases) + private void VerifyObjectResults(Dictionary sourceItems, string expectedFieldName, IEnumerable> phrases) { var source = sourceItems.ToList(); phrases.Should().BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( source[0].Value, this.sut.Single(x => x.Key == source[0].Key), new[] { new FieldPhrases(expectedFieldName, "quick brown fox" )}), - new ItemPhrases( + new DocumentPhrases( source[1].Value, this.sut.Single(x => x.Key == source[1].Key), new[] { new FieldPhrases(expectedFieldName, "brown" )}), - new ItemPhrases( + new DocumentPhrases( source[2].Value, this.sut.Single(x => x.Key == source[2].Key), new[] { new FieldPhrases(expectedFieldName, "quick fox", "brown") }) diff --git a/test/Lifti.Tests/Serialization/BinarySerializerTests.cs b/test/Lifti.Tests/Serialization/BinarySerializerTests.cs index 8d37e7e7..68835080 100644 --- a/test/Lifti.Tests/Serialization/BinarySerializerTests.cs +++ b/test/Lifti.Tests/Serialization/BinarySerializerTests.cs @@ -103,12 +103,11 @@ public async Task WhenNewStaticFieldsIntroduced_FieldIdsShouldBeMapped() await SerializeAndDeserializeAsync(index, deserializedIndex); deserializedIndex.FieldLookup.AllFieldNames.Should().BeEquivalentTo( - new[] - { + [ "Name", "Foo", "Bar" - }); + ]); deserializedIndex.FieldLookup.GetFieldInfo("Foo").Id.Should().Be(2); @@ -183,17 +182,34 @@ public async Task DeserializingIndexWhenDynamicFieldReaderRemoved_ShouldThrowExc exception.Message.Should().Be("An unknown dynamic field reader name was encountered: DynFields - this would likely indicate that an index was serialized with a differently configured set of dynamic field readers."); } + [Fact] + public async Task ShouldDeserializeV6Index() + { + var index = CreateObjectIndex(); + + var serializer = new BinarySerializer(); + using (var stream = new MemoryStream(TestResources.v6Index)) + { + await serializer.DeserializeAsync(index, stream); + } + + index.Search("serialized").Should().HaveCount(1); + index.Search("亜").Should().HaveCount(1); + + var objectScoreBoostMetadata = index.Metadata.GetObjectTypeScoreBoostMetadata(1); + // The first item in the index wins for both score boosts, each with a weighting of 10. + objectScoreBoostMetadata.CalculateScoreBoost(index.Metadata.GetDocumentMetadata(0)) + .Should().Be(20D); + + // The first item in the index is at the bottom end of both scales, so should return 2. + objectScoreBoostMetadata.CalculateScoreBoost(index.Metadata.GetDocumentMetadata(1)) + .Should().Be(2D); + } + [Fact] public async Task ShouldDeserializeV5Index() { - var index = new FullTextIndexBuilder() - .WithObjectTokenization( - cfg => cfg - .WithKey(x => x.Id) - .WithField("Name", x => x.Name) - .WithField("SomethingElse", x => x.SomethingElse) - .WithDynamicFields("DynFields", x => x.Fields)) - .Build(); + var index = CreateObjectIndex(); var serializer = new BinarySerializer(); using (var stream = new MemoryStream(TestResources.v5Index)) @@ -203,6 +219,10 @@ public async Task ShouldDeserializeV5Index() index.Search("serialized").Should().HaveCount(1); index.Search("亜").Should().HaveCount(1); + + // The old index won't have any scoring data associated to the documents, so should return 1 + index.Metadata.GetObjectTypeScoreBoostMetadata(1).CalculateScoreBoost(index.Metadata.GetDocumentMetadata(0)) + .Should().Be(1D); } [Fact] @@ -280,7 +300,14 @@ public async Task ShouldRoundTripIndexStructure() using (var stream = File.Open(fileName, FileMode.CreateNew)) { var stopwatch = Stopwatch.StartNew(); + +/* Unmerged change from project 'Lifti.Tests (net8.0)' +Before: var index = await this.CreateWikipediaIndexAsync(); +After: + var index = await CreateWikipediaIndexAsync(); +*/ + var index = await BinarySerializerTests.CreateWikipediaIndexAsync(); await serializer.SerializeAsync(index, stream, false); this.output.WriteLine($"Serialized in {stopwatch.ElapsedMilliseconds}ms"); @@ -296,7 +323,7 @@ public async Task ShouldRoundTripIndexStructure() this.output.WriteLine($"Deserialized in {stopwatch.ElapsedMilliseconds}ms"); - newIndex.Items.GetIndexedItems().Should().BeEquivalentTo(index.Items.GetIndexedItems()); + newIndex.Metadata.GetIndexedDocuments().Should().BeEquivalentTo(index.Metadata.GetIndexedDocuments()); newIndex.Count.Should().Be(index.Count); newIndex.Root.ToString().Should().Be(index.Root.ToString()); @@ -338,22 +365,69 @@ public async Task ShouldBeAbleToSerializeAndDeserializeMultipleIndexesToTheSameS } } + [Fact] + public async Task ShouldRoundTripMixedScoringMetadata() + { + var index = CreateObjectIndex(); + + // One object with score boost info + await index.AddAsync(new DynamicFieldObject + { + Id = 1, + CreatedDate = new DateTime(2022, 10, 1), + Importance = 5D, + Name = "Blah", + SomethingElse = "Great", + Fields = new Dictionary + { + { "Foo", "Some serialized data" }, + { "Bar", "More text" } + } + }); + + // One object without score boost info + await index.AddAsync(new DynamicFieldObject + { + Id = 2, + Name = "Cheese", + SomethingElse = "Great", + Fields = new Dictionary + { + { "Foo", "Other data" }, + { "Bar", "亜" } + } + }); + + // One piece of loose text + await index.AddAsync(3, "Loose text"); + + var deserialized = CreateObjectIndex(); + await SerializeAndDeserializeAsync(index, deserialized); + + var metadata = deserialized.Metadata.GetDocumentMetadata(0); + metadata.ScoringFreshnessDate.Should().Be(new DateTime(2022, 10, 1)); + metadata.ScoringMagnitude.Should().Be(5D); + + metadata = deserialized.Metadata.GetDocumentMetadata(1); + metadata.ScoringFreshnessDate.Should().BeNull(); + metadata.ScoringMagnitude.Should().BeNull(); + + metadata = deserialized.Metadata.GetDocumentMetadata(2); + metadata.ScoringFreshnessDate.Should().BeNull(); + metadata.ScoringMagnitude.Should().BeNull(); + } + // Used to create test indexes when defining a new serialization version //[Fact] //public async Task CreateTestIndex() //{ - // var index = new FullTextIndexBuilder() - // .WithObjectTokenization( - // cfg => cfg - // .WithKey(x => x.Id) - // .WithField("Name", x => x.Name) - // .WithField("SomethingElse", x => x.SomethingElse) - // .WithDynamicFields("DynFields", x => x.Fields)) - // .Build(); + // var index = CreateObjectIndex(); // await index.AddAsync(new DynamicFieldObject // { // Id = 1, + // CreatedDate = new DateTime(2022, 10, 1), + // Importance = 5D, // Name = "Blah", // SomethingElse = "Great", // Fields = new Dictionary @@ -366,6 +440,8 @@ public async Task ShouldBeAbleToSerializeAndDeserializeMultipleIndexesToTheSameS // await index.AddAsync(new DynamicFieldObject // { // Id = 2, + // CreatedDate = new DateTime(2021, 10, 1), + // Importance = 2D, // Name = "Cheese", // SomethingElse = "Great", // Fields = new Dictionary @@ -376,10 +452,25 @@ public async Task ShouldBeAbleToSerializeAndDeserializeMultipleIndexesToTheSameS // }); // var serializer = new BinarySerializer(); - // using var stream = File.Open("../../../V5.dat", FileMode.Create); + // using var stream = File.Open("../../../V6.dat", FileMode.Create); // await serializer.SerializeAsync(index, stream, true); //} + private static FullTextIndex CreateObjectIndex() + { + return new FullTextIndexBuilder() + .WithObjectTokenization( + cfg => cfg + .WithKey(x => x.Id) + .WithField("Name", x => x.Name) + .WithField("SomethingElse", x => x.SomethingElse) + .WithDynamicFields("DynFields", x => x.Fields) + .WithScoreBoosting(x => x + .Freshness(o => o.CreatedDate, 10) + .Magnitude(o => o.Importance, 10))) + .Build(); + } + private static string CreateRandomIndexFileName() { return Guid.NewGuid().ToString() + ".dat"; @@ -407,7 +498,7 @@ private static async Task> CreateIndexAsync(string text) return index; } - private async Task> CreateWikipediaIndexAsync() + private static async Task> CreateWikipediaIndexAsync() { var index = new FullTextIndexBuilder() .WithTextExtractor() @@ -441,6 +532,8 @@ private class DynamicFieldObject public string Name { get; set; } = null!; public string SomethingElse { get; set; } = null!; public Dictionary? Fields { get; set; } + public DateTime? CreatedDate { get; internal set; } + public double? Importance { get; internal set; } } } } diff --git a/test/Lifti.Tests/SharedPoolTests.cs b/test/Lifti.Tests/SharedPoolTests.cs index 86318d64..300ee998 100644 --- a/test/Lifti.Tests/SharedPoolTests.cs +++ b/test/Lifti.Tests/SharedPoolTests.cs @@ -6,7 +6,7 @@ namespace Lifti.Tests { public class SharedPoolTests { - private SharedPool> sut; + private readonly SharedPool> sut; public SharedPoolTests() { @@ -18,27 +18,27 @@ public SharedPoolTests() [Fact] public void WhenNoItemsInPool_ShouldCreateNew() { - this.sut.Create().Should().NotBeNull(); + this.sut.Take().Should().NotBeNull(); } [Fact] public void WhenItemInPool_ShouldReturnPooledItem() { - var first = this.sut.Create(); + var first = this.sut.Take(); this.sut.Return(first); - var second = this.sut.Create(); + var second = this.sut.Take(); first.Should().BeSameAs(second); } [Fact] public void WhenReturningItemToPool_ShouldApplyReturnFunction() { - var first = this.sut.Create(); + var first = this.sut.Take(); first.Add("1"); this.sut.Return(first); - var second = this.sut.Create(); + var second = this.sut.Take(); second.Should().BeEquivalentTo("1", "Returned"); } } diff --git a/test/Lifti.Tests/TestResources.Designer.cs b/test/Lifti.Tests/TestResources.Designer.cs index b30e7986..ecda4942 100644 --- a/test/Lifti.Tests/TestResources.Designer.cs +++ b/test/Lifti.Tests/TestResources.Designer.cs @@ -99,5 +99,15 @@ internal class TestResources { return ((byte[])(obj)); } } + + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] v6Index { + get { + object obj = ResourceManager.GetObject("v6Index", resourceCulture); + return ((byte[])(obj)); + } + } } } diff --git a/test/Lifti.Tests/TestResources.resx b/test/Lifti.Tests/TestResources.resx index 7a82587f..c9792a86 100644 --- a/test/Lifti.Tests/TestResources.resx +++ b/test/Lifti.Tests/TestResources.resx @@ -130,4 +130,7 @@ V5.dat;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + V6.dat;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + \ No newline at end of file diff --git a/test/Lifti.Tests/ThesaurusBuilderTests.cs b/test/Lifti.Tests/ThesaurusBuilderTests.cs index 0ac9c246..a17553de 100644 --- a/test/Lifti.Tests/ThesaurusBuilderTests.cs +++ b/test/Lifti.Tests/ThesaurusBuilderTests.cs @@ -1,5 +1,6 @@ using FluentAssertions; using Lifti.Tokenization; +using Lifti.Tokenization.Stemming; using System.Linq; using Xunit; @@ -95,7 +96,7 @@ public void UsingStemmingTokenizer_ShouldCombineTermsThatResultInTheSameStemmedF sut.WithSynonyms("HYPOCRITICAL", "DECEPTIVE", "INSINCERE"); VerifyResults( - new IndexTokenizer(new TokenizationOptions { Stemming = true }), + new IndexTokenizer(new TokenizationOptions { Stemmer = new PorterStemmer() }), ("HYPOCRIT", new[] { "HYPOCRIT", "DECEPT", "INSINCER" }), ("DECEPT", new[] { "HYPOCRIT", "DECEPT", "INSINCER" }), ("INSINCER", new[] { "HYPOCRIT", "DECEPT", "INSINCER" })); diff --git a/test/Lifti.Tests/ThesaurusTests.cs b/test/Lifti.Tests/ThesaurusTests.cs index 05b30518..882ddf04 100644 --- a/test/Lifti.Tests/ThesaurusTests.cs +++ b/test/Lifti.Tests/ThesaurusTests.cs @@ -29,7 +29,7 @@ public async Task LooseTextIndex_SearchingForSynonym_ShouldReturnMatch() .BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 1), new[] { new FieldPhrases(IndexedFieldLookup.DefaultFieldName , "big") }) }); @@ -47,7 +47,7 @@ public async Task LooseTextIndex_SearchingForHyponym_ShouldOnlyReturnSearchedWor .BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 3), new[] { new FieldPhrases(IndexedFieldLookup.DefaultFieldName, "vehicle") }) }); @@ -65,10 +65,10 @@ public async Task LooseTextIndex_SearchingForHypernym_ShouldReturnMatch() .BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 2), new[] { new FieldPhrases(IndexedFieldLookup.DefaultFieldName, "car") }), - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 3), new[] { new FieldPhrases(IndexedFieldLookup.DefaultFieldName, "vehicle") }) }); @@ -86,7 +86,7 @@ public async Task ObjectIndex_SearchingForSynonym_ShouldReturnMatch() .BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 1), new[] { new FieldPhrases("Field" , "big") }) }); @@ -104,7 +104,7 @@ public async Task ObjectIndex_SearchingForHyponym_ShouldOnlyReturnSearchedWord() .BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 3), new[] { new FieldPhrases("Field", "vehicle") }) }); @@ -122,10 +122,10 @@ public async Task ObjectIndex_SearchingForHypernym_ShouldReturnMatch() .BeEquivalentTo( new[] { - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 2), new[] { new FieldPhrases("Field", "car") }), - new ItemPhrases( + new DocumentPhrases( results.Single(x => x.Key == 3), new[] { new FieldPhrases("Field", "vehicle") }) }); diff --git a/test/Lifti.Tests/Tokenization/IndexTokenizerTests.cs b/test/Lifti.Tests/Tokenization/IndexTokenizerTests.cs index f87b4cb2..6447ec4e 100644 --- a/test/Lifti.Tests/Tokenization/IndexTokenizerTests.cs +++ b/test/Lifti.Tests/Tokenization/IndexTokenizerTests.cs @@ -25,7 +25,7 @@ public void ShouldReturnNoTokensForDefaultMemoryFromNullString() { var sut = WithConfiguration(); - var output = Execute(sut, new string[] { null! }); + var output = Execute(sut, [null!]); output.Should().BeEmpty(); } @@ -76,7 +76,7 @@ public class WithIgnoredCharacters : IndexTokenizerTests [Fact] public void WhenIgnoredCharacterIsAlsoSplitCharacter_ShouldNotSplitOnCharacter() { - var output = WithConfiguration(ignoreChars: new[] { ',' }, additionalSplitChars: new[] { ',' }) + var output = WithConfiguration(ignoreChars: [','], additionalSplitChars: [',']) .Process("test,test test".AsSpan()); output.Should().BeEquivalentTo(new[] @@ -89,7 +89,7 @@ public void WhenIgnoredCharacterIsAlsoSplitCharacter_ShouldNotSplitOnCharacter() [Fact] public void WithIgnoredCharacter_ShouldNotIncludeCharactersInTokenMatches() { - var output = WithConfiguration(ignoreChars: new[] { '\'' }) + var output = WithConfiguration(ignoreChars: ['\'']) .Process("O'Reilly's".AsSpan()); output.Should().BeEquivalentTo(new[] @@ -259,7 +259,7 @@ public void WhenNotSplittingAtPunctuation_ShouldTokenizeAtWordBreaksOnly() [Fact] public void WhenSplittingOnAdditionalCharacters_ShouldTokenizeAtWordBreaksAndAdditionalCharacters() { - var sut = WithConfiguration(splitOnPunctuation: false, additionalSplitChars: new[] { '@', '¬' }); + var sut = WithConfiguration(splitOnPunctuation: false, additionalSplitChars: ['@', '¬']); var input = "Test@string¬with custom\tsplits"; diff --git a/test/Lifti.Tests/Tokenization/Objects/DictionaryDynamicFieldReaderTests.cs b/test/Lifti.Tests/Tokenization/Objects/DictionaryDynamicFieldReaderTests.cs index 93c9a760..b5fbabc1 100644 --- a/test/Lifti.Tests/Tokenization/Objects/DictionaryDynamicFieldReaderTests.cs +++ b/test/Lifti.Tests/Tokenization/Objects/DictionaryDynamicFieldReaderTests.cs @@ -52,8 +52,8 @@ public async Task GettingFieldValue_ShouldReturnTextAssociatedToSpecificField() await sut.ReadAsync(new TestObject(fieldValues), default); - (await sut.ReadAsync(new TestObject(fieldValues), "Foo", default)).Should().BeEquivalentTo(new[] { "Bar" }); - (await sut.ReadAsync(new TestObject(fieldValues), "Baz", default)).Should().BeEquivalentTo(new[] { "Bam" }); + (await sut.ReadAsync(new TestObject(fieldValues), "Foo", default)).Should().BeEquivalentTo(["Bar"]); + (await sut.ReadAsync(new TestObject(fieldValues), "Baz", default)).Should().BeEquivalentTo(["Bam"]); } [Fact] @@ -63,8 +63,8 @@ public async Task WithPrefix_GettingFieldValue_ShouldReturnTextAssociatedToSpeci await sut.ReadAsync(new TestObject(fieldValues), default); - (await sut.ReadAsync(new TestObject(fieldValues), "Test_Foo", default)).Should().BeEquivalentTo(new[] { "Bar" }); - (await sut.ReadAsync(new TestObject(fieldValues), "Test_Baz", default)).Should().BeEquivalentTo(new[] { "Bam" }); + (await sut.ReadAsync(new TestObject(fieldValues), "Test_Foo", default)).Should().BeEquivalentTo(["Bar"]); + (await sut.ReadAsync(new TestObject(fieldValues), "Test_Baz", default)).Should().BeEquivalentTo(["Bam"]); } [Fact] @@ -87,7 +87,8 @@ private static StringDictionaryDynamicFieldReader CreateSut(string? fieldPrefix, new FakeIndexTokenizer(), new PlainTextExtractor(), - new ThesaurusBuilder().Build(new FakeIndexTokenizer())); + new ThesaurusBuilder().Build(new FakeIndexTokenizer()), + 1D); } private class TestObject diff --git a/test/Lifti.Tests/Tokenization/Preprocessing/InputPreprocessorPipelineTests.cs b/test/Lifti.Tests/Tokenization/Preprocessing/InputPreprocessorPipelineTests.cs index b153d63e..7c696abf 100644 --- a/test/Lifti.Tests/Tokenization/Preprocessing/InputPreprocessorPipelineTests.cs +++ b/test/Lifti.Tests/Tokenization/Preprocessing/InputPreprocessorPipelineTests.cs @@ -52,7 +52,7 @@ public void WithIgnoreCharacters_ShouldReturnEmptyForIgnoredCharacter() { var input = 'Ч'; - var pipeline = CreatePipeline(ignoreCharacters: new[] { input }); + var pipeline = CreatePipeline(ignoreCharacters: [input]); pipeline.Process(input).Should().BeEmpty(); diff --git a/test/Lifti.Tests/TokenizerBuilderTests.cs b/test/Lifti.Tests/TokenizerBuilderTests.cs index 34140287..53e8da1c 100644 --- a/test/Lifti.Tests/TokenizerBuilderTests.cs +++ b/test/Lifti.Tests/TokenizerBuilderTests.cs @@ -1,17 +1,18 @@ using FluentAssertions; using Lifti.Tests.Querying; using Lifti.Tokenization; +using Lifti.Tokenization.Stemming; using System; +using System.Text; using Xunit; namespace Lifti.Tests { public class TokenizerBuilderTests { - private static readonly TokenizationOptions expectedDefaultOptions = new TokenizationOptions() + private static readonly TokenizationOptions expectedDefaultOptions = new() { AccentInsensitive = true, - Stemming = false, AdditionalSplitCharacters = Array.Empty(), CaseInsensitive = true, SplitOnPunctuation = true @@ -40,17 +41,52 @@ public void WithoutApplyingAnyOptions_ShouldSetDefaultsCorrectly() builder.Build().Should().BeOfType().Subject .Options.Should().BeEquivalentTo(expectedDefaultOptions); } + + [Fact] + public void WithoutStemming_ShouldLeaveStemmerNull() + { + var builder = new TokenizerBuilder(); + builder.Build().Should().BeOfType().Subject.Options.Stemmer.Should().BeNull(); + } - [Theory] - [InlineData(true)] - [InlineData(false)] - public void WithStemming_ShouldSetTheStemmingPropertyCorrectly(bool setting) + [Fact] + public void WithStemming_ShouldSetTheStemmerToAPorterStemmer() { var builder = new TokenizerBuilder(); - builder.WithStemming(setting); - builder.Build().Should().BeOfType().Subject.Options.Stemming.Should().Be(setting); - } - + builder.WithStemming(); + builder.Build().Should().BeOfType().Subject.Options.Stemmer.Should().BeOfType(); + } + + [Fact] + public void WithCustomStemmer_ShouldSetTheStemmerToAProvidedStemmer() + { + var builder = new TokenizerBuilder(); + builder.WithStemming(new CustomStemmer(true, true)); + builder.Build().Should().BeOfType().Subject.Options.Stemmer.Should().BeOfType(); + } + + [Fact] + public void StemmerInsensitivityRequirements_ShouldAffectIndexInsensitivityOptions() + { + var builder = new TokenizerBuilder() + .AccentInsensitive(false) + .CaseInsensitive(false) + .WithStemming(new CustomStemmer(true, false)); + + var options = builder.Build().Should().BeOfType().Subject.Options; + options.CaseInsensitive.Should().BeTrue(); + options.AccentInsensitive.Should().BeFalse(); + + builder = new TokenizerBuilder() + .AccentInsensitive(false) + .CaseInsensitive(false) + .WithStemming(new CustomStemmer(false, true)); + + options = builder.Build().Should().BeOfType().Subject.Options; + options.CaseInsensitive.Should().BeFalse(); + options.AccentInsensitive.Should().BeTrue(); + } + [Theory] [InlineData(true)] [InlineData(false)] @@ -95,6 +131,24 @@ public void WithIgnoreCharacters_ShouldSetUpIgnoreCharacterListCorrectly() var builder = new TokenizerBuilder(); builder.IgnoreCharacters('\'', '`'); builder.Build().Should().BeOfType().Subject.Options.IgnoreCharacters.Should().BeEquivalentTo(new[] { '\'', '`' }); + } + + private class CustomStemmer : IStemmer + { + public CustomStemmer(bool requireCaseInsensitivity, bool requireAccentInsensitivity) + { + this.RequiresCaseInsensitivity = requireCaseInsensitivity; + this.RequiresAccentInsensitivity = requireAccentInsensitivity; + } + + public bool RequiresCaseInsensitivity { get; private set; } + + public bool RequiresAccentInsensitivity { get; private set; } + + public void Stem(StringBuilder builder) + { + builder.Length = 1; + } } } } diff --git a/test/Lifti.Tests/V6.dat b/test/Lifti.Tests/V6.dat new file mode 100644 index 0000000000000000000000000000000000000000..45703d4617a55f259767501323482e97fa67454a GIT binary patch literal 376 zcmYLE%SyyR5UlR$1XmC|_%ohN*37zrsIb|CcncUbu$#cH7w>+Cp8P63`XgFBiVk!a z{iv#*rp{p^cGYcrm7{jr_Ky$4cpo?WK4@4>)2woZv7Z@NIuyHMe189(p#WxcKpIBLSVEYE33i@pNF+uTw2=d!NUAxdr+?5r5hEJd*fPa`q&6CE!(xc*yWOU(P`Jr`Xv=!R8NB%uY=)m*jC1!Ys; l#wz(rcIY&hjOlvbKq;4T#tno1%(RfqUS0|OVltF3zHb~)D@6bR literal 0 HcmV?d00001 diff --git a/test/Lifti.Tests/WordLocationTests.cs b/test/Lifti.Tests/WordLocationTests.cs index 9ad0e1d8..b5bc7c10 100644 --- a/test/Lifti.Tests/WordLocationTests.cs +++ b/test/Lifti.Tests/WordLocationTests.cs @@ -24,9 +24,10 @@ public void WordsWithLowerStartValues_ShouldBeLessThanWordsWithHigherStartValues (firstWord >= secondWord).Should().BeFalse(); (secondWord >= firstWord).Should().BeTrue(); - firstWord.Should().BeLessThan(secondWord); - secondWord.Should().BeGreaterThan(firstWord); - firstWord.Should().NotBe(secondWord); + firstWord.CompareTo(secondWord).Should().Be(-1); + secondWord.CompareTo(firstWord).Should().Be(1); + + firstWord.Equals(secondWord).Should().Be(false); } [Fact] diff --git a/test/PerformanceProfiling/ChildNodeMapBenchmarks.cs b/test/PerformanceProfiling/ChildNodeMapBenchmarks.cs new file mode 100644 index 00000000..42165d2d --- /dev/null +++ b/test/PerformanceProfiling/ChildNodeMapBenchmarks.cs @@ -0,0 +1,109 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Jobs; +using Lifti; +using System; + +namespace PerformanceProfiling +{ + public class ChildNodeMapBenchmarks : IndexBenchmarkBase + { + private const int OperationCount = 1000000; + private ChildNodeMap childNodeMapSingleEntry; + private ChildNodeMap childNodeMapTwoEntries; + private ChildNodeMap childNodeMapMultipleEntries; + + [IterationSetup] + public void SetUp() + { + var testIndexNode = new IndexNode("test".AsMemory(), new ChildNodeMap(), new DocumentTokenMatchMap()); + this.childNodeMapSingleEntry = new( + [ + new ChildNodeMapEntry('A', testIndexNode) + ]); + + this.childNodeMapTwoEntries = new( + [ + new ChildNodeMapEntry('A', testIndexNode), + new ChildNodeMapEntry('E', testIndexNode), + ]); + + this.childNodeMapMultipleEntries = new( + [ + new ChildNodeMapEntry('F', testIndexNode), + new ChildNodeMapEntry('T', testIndexNode), + new ChildNodeMapEntry('V', testIndexNode), + new ChildNodeMapEntry('W', testIndexNode), + new ChildNodeMapEntry('X', testIndexNode), + ]); + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? SingleEntry_NotMatched() + { + var success = this.childNodeMapSingleEntry.TryGetValue('Z', out var nextNode); + + return nextNode; + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? SingleEntry_Matched() + { + var success = this.childNodeMapSingleEntry.TryGetValue('A', out var nextNode); + + return nextNode; + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? TwoEntries_NotMatched() + { + var success = this.childNodeMapTwoEntries.TryGetValue('D', out var nextNode); + + return nextNode; + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? TwoEntries_Matched() + { + var success = this.childNodeMapTwoEntries.TryGetValue('A', out var nextNode) + || this.childNodeMapTwoEntries.TryGetValue('E', out nextNode); + + return nextNode; + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? MultipleEntries_NotMatched_BeforeStartCharacter() + { + var success = this.childNodeMapMultipleEntries.TryGetValue('A', out var nextNode); + + return nextNode; + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? MultipleEntries_NotMatched_AfterLastCharacter() + { + var success = this.childNodeMapMultipleEntries.TryGetValue('Z', out var nextNode); + + return nextNode; + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? MultipleEntries_NotMatched_InCharacterSet() + { + var success = this.childNodeMapMultipleEntries.TryGetValue('U', out var nextNode); + + return nextNode; + } + + [Benchmark(OperationsPerInvoke = OperationCount)] + public IndexNode? MultipleEntries_Matched() + { + var success = this.childNodeMapMultipleEntries.TryGetValue('F', out var nextNode) + || this.childNodeMapMultipleEntries.TryGetValue('T', out nextNode) + || this.childNodeMapMultipleEntries.TryGetValue('V', out nextNode) + || this.childNodeMapMultipleEntries.TryGetValue('W', out nextNode) + || this.childNodeMapMultipleEntries.TryGetValue('X', out nextNode); + + return nextNode; + } + } +} diff --git a/test/PerformanceProfiling/IndexPopulationTests.cs b/test/PerformanceProfiling/IndexPopulationTests.cs index 69091953..60fe1a6a 100644 --- a/test/PerformanceProfiling/IndexPopulationTests.cs +++ b/test/PerformanceProfiling/IndexPopulationTests.cs @@ -1,5 +1,5 @@ using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Jobs; + using System.Threading.Tasks; namespace PerformanceProfiling diff --git a/test/PerformanceProfiling/IndexSearchingBenchmarks.cs b/test/PerformanceProfiling/IndexSearchingBenchmarks.cs index 30d09a14..83839373 100644 --- a/test/PerformanceProfiling/IndexSearchingBenchmarks.cs +++ b/test/PerformanceProfiling/IndexSearchingBenchmarks.cs @@ -7,12 +7,11 @@ namespace PerformanceProfiling { public class IndexSearchingBenchmarks : IndexBenchmarkBase { - private IFullTextIndex index; + private readonly IFullTextIndex index = CreateNewIndex(4); [GlobalSetup] public async Task SetUp() { - this.index = CreateNewIndex(4); await this.PopulateIndexAsync(this.index); } @@ -32,7 +31,7 @@ public async Task SetUp() "and | they", "and ~ they" )] - public string SearchCriteria { get; set; } + public string SearchCriteria { get; set; } = null!; [Benchmark] public object Searching() diff --git a/test/PerformanceProfiling/PerformanceProfiling.csproj b/test/PerformanceProfiling/PerformanceProfiling.csproj index 57733036..7c2b38f2 100644 --- a/test/PerformanceProfiling/PerformanceProfiling.csproj +++ b/test/PerformanceProfiling/PerformanceProfiling.csproj @@ -4,6 +4,7 @@ Exe net6.0;net7.0;net8.0;netframework4.8.1 latest + enable diff --git a/test/PerformanceProfiling/RemovalFromIndexBenchmarks.cs b/test/PerformanceProfiling/RemovalFromIndexBenchmarks.cs index 037508ab..7b3f88c2 100644 --- a/test/PerformanceProfiling/RemovalFromIndexBenchmarks.cs +++ b/test/PerformanceProfiling/RemovalFromIndexBenchmarks.cs @@ -6,12 +6,11 @@ namespace PerformanceProfiling { public class RemovalFromIndexBenchmarks : IndexBenchmarkBase { - private IFullTextIndex index; + private readonly IFullTextIndex index = CreateNewIndex(4); [GlobalSetup] public async Task SetUp() { - this.index = CreateNewIndex(4); await this.PopulateIndexAsync(this.index); } diff --git a/test/PerformanceProfiling/SerializationBenchmarks.cs b/test/PerformanceProfiling/SerializationBenchmarks.cs index 6d8e1b9e..fe21c6a4 100644 --- a/test/PerformanceProfiling/SerializationBenchmarks.cs +++ b/test/PerformanceProfiling/SerializationBenchmarks.cs @@ -8,8 +8,8 @@ namespace PerformanceProfiling { public class SerializationBenchmarks : IndexBenchmarkBase { - private BinarySerializer serializer; - private string fileName; + private readonly BinarySerializer serializer = new(); + private readonly string fileName = $"{Guid.NewGuid()}.dat"; [GlobalSetup] public async Task Setup() @@ -17,8 +17,6 @@ public async Task Setup() var index = CreateNewIndex(2); await this.PopulateIndexAsync(index); - this.serializer = new BinarySerializer(); - this.fileName = $"{Guid.NewGuid()}.dat"; using var stream = File.OpenWrite(this.fileName); await this.serializer.SerializeAsync(index, stream, true); } diff --git a/test/PerformanceProfiling/StartsWithMultiCharacterWildcardBenchmark.cs b/test/PerformanceProfiling/StartsWithMultiCharacterWildcardBenchmark.cs index 63857ca7..90c9e1cd 100644 --- a/test/PerformanceProfiling/StartsWithMultiCharacterWildcardBenchmark.cs +++ b/test/PerformanceProfiling/StartsWithMultiCharacterWildcardBenchmark.cs @@ -1,5 +1,4 @@ using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Jobs; using Lifti; using System.Threading.Tasks; @@ -7,12 +6,11 @@ namespace PerformanceProfiling { public class StartsWithMultiCharacterWildcardBenchmark : IndexBenchmarkBase { - private IFullTextIndex index; + private readonly IFullTextIndex index = CreateNewIndex(4); [GlobalSetup] public async Task SetUp() { - this.index = CreateNewIndex(4); await this.PopulateIndexAsync(this.index); }