V6.0.0 (#107)

Among other things... * Use latest C# version * Added support for bracketed field names #76 * Added field score boosting #72 (#83) * Added field score boosting #72 * Added score boosting query syntax #72 * Add .NET 8 as a target * Item score boosting (#95) * Allow characters to be escaped in query syntax #85 * Removing ImmutableCollections (#97) * Speed up field collection prior to scoring (#102) * Added support for adding custom stemmers #82 (#103) * Apply field filters while collecting results * Filter documents at navigator level #105 * Added query part weight calculations #105 Refactor query match collection primitives
mikegoatly · Jan 16, 2024 · 125ae87 · 125ae87
1 parent 77808a4
commit 125ae87
Show file tree

Hide file tree

Showing 278 changed files with 9,725 additions and 4,536 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -100,7 +100,7 @@ csharp_prefer_simple_using_statement = true:suggestion
 csharp_prefer_simple_default_expression = true:suggestion
 csharp_style_pattern_local_over_anonymous_function = true:suggestion
 csharp_style_prefer_index_operator = false:suggestion
-csharp_style_prefer_range_operator = true:suggestion
+csharp_style_prefer_range_operator = false:suggestion
 csharp_style_unused_value_assignment_preference = discard_variable:none
 csharp_style_unused_value_expression_statement_preference = discard_variable:none
 
@@ -213,6 +213,7 @@ csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimenta
 csharp_style_prefer_pattern_matching = true:silent
 csharp_style_prefer_not_pattern = true:suggestion
 csharp_style_prefer_extended_property_pattern = true:suggestion
+csharp_style_prefer_primary_constructors = false:suggestion
 
 [*.{cs,vb}]
 dotnet_style_operator_placement_when_wrapping = beginning_of_line
@@ -248,4 +249,5 @@ dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent
 dotnet_style_qualification_for_field = true:silent
 dotnet_style_qualification_for_property = true:silent
 dotnet_style_qualification_for_method = true:silent
-dotnet_style_qualification_for_event = true:silent
+dotnet_style_qualification_for_event = true:silent
+dotnet_style_prefer_collection_expression = true:suggestion
diff --git a/Lifti.sln b/Lifti.sln
@@ -13,6 +13,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
 		.gitattributes = .gitattributes
 		.gitignore = .gitignore
 		azure-pipelines.yml = azure-pipelines.yml
+		global.json = global.json
 		LICENSE = LICENSE
 		README.md = README.md
 	EndProjectSection

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -8,9 +8,9 @@ trigger:
     - test
 
 variables:
-  majorVersion: 5
+  majorVersion: 6
   minorVersion: 0
-  patchVersion: 0
+  patchVersion: 1
   project: src/Lifti.Core/Lifti.Core.csproj
   testProject: test/Lifti.Tests/Lifti.Tests.csproj
   buildConfiguration: 'Release'
@@ -36,6 +36,11 @@ stages:
       inputs:
         packageType: 'sdk'
         version: '7.0.x'
+    - task: UseDotNet@2
+      displayName: "Use .NET 8"
+      inputs:
+        packageType: 'sdk'
+        version: '8.0.x'
     - task: DotNetCoreCLI@2
       displayName: "NuGet Restore"
       inputs:
@@ -82,7 +87,7 @@ stages:
 - stage: PublishCINugetPackage
   displayName: Publish to CI feed
   dependsOn: Build
-  condition: succeeded()
+  condition: and(succeeded(), ne(variables['Build.SourceBranch'], 'refs/heads/master'))
 
   jobs:
   - job: PublishCI
@@ -95,12 +100,13 @@ stages:
     - download: current
       artifact: 'packages'
 
-    - task: DotNetCoreCLI@2
+    - task: NuGetCommand@2
+      displayName: 'Push NuGet Package'
       inputs:
         command: 'push'
         packagesToPush: '$(Pipeline.Workspace)/packages/ci/*.nupkg'
-        nuGetFeedType: 'internal'
-        publishVstsFeed: '21c23043-21b0-4e5a-8557-00b88fc52fd4/9f4e269d-a35a-4657-b2a3-b56b01c01f8c'
+        nuGetFeedType: 'external'
+        publishFeedCredentials: 'NuGet'
 
 - stage: 'PublishReleaseNuGetPackage'
   displayName: 'Publish Release NuGet Package'

diff --git a/docs/assets/icons/logo.svg b/docs/assets/icons/logo.svg
diff --git a/docs/content/en/_index.html b/docs/content/en/_index.html
@@ -3,7 +3,7 @@
 linkTitle = "LIFTI"
 +++
 
-{{< blocks/cover title="LIFTI" image_anchor="top" height="full" color="orange" >}}
+{{< blocks/cover title="LIFTI" image_anchor="top" height="full"  >}}
 <div class="mx-auto">
 	<a class="btn btn-lg btn-primary mr-3 mb-4" href="{{< relref "/docs" >}}">
 		Learn More <i class="fas fa-arrow-alt-circle-right ml-2"></i>

diff --git a/docs/content/en/docs/Custom stemmers/_index.md b/docs/content/en/docs/Custom stemmers/_index.md
@@ -0,0 +1,38 @@
+---
+title: "Custom stemmers"
+linkTitle: "Custom stemmers"
+weight: 7
+description: >
+  You can implement a custom stemmer if the default English Porter stemmer doesn't meet your needs.
+---
+
+Let's say that for some reason you needed to stem every indexed token so that it was at most 3 characters long:
+
+```csharp
+public class FirstThreeLettersStemmer : IStemmer
+{
+    public bool RequiresCaseInsensitivity => false;
+
+    public bool RequiresAccentInsensitivity => false;
+
+    public void Stem(StringBuilder builder)
+    {
+        if (builder.Length > 3)
+        {
+            builder.Length = 3;
+        }
+    }
+}
+```
+
+`RequiresCaseInsensitivity` and `RequiresAccentInsensitivity` are hints used by the index at creation time that force it to enable
+case/accent sensitivity.  Case insensitivity means that any text passed to your stemmer will already be uppercase. Accent insensitivity means 
+that accents will automatically be stripped prior to being sent to the stemmer.
+
+Once you've got your stemmer implemented, you just need to give it to the `FullTextIndexBuilder`:
+
+``` csharp
+var index = new FullTextIndexBuilder<int>()
+    .WithDefaultTokenization(o => o.WithStemming(new FirstThreeLettersStemmer()))
+    .Build();
+```
diff --git a/docs/content/en/docs/Getting started/_index.md b/docs/content/en/docs/Getting started/_index.md
@@ -11,10 +11,10 @@ Perhaps the simplest way to work with LIFTI is to index text against a key that
 In this example, we're just indexing three pieces of text against 3 integer keys:
 
 ``` c#
-// Create a full text index with default settings and integer item keys
+// Create a full text index with default settings and integer keys
 var index = new FullTextIndexBuilder<int>().Build();
 
-// Index item keys with their associated text
+// Index keys with their associated text
 await index.AddAsync(1, "This is some text associated with A: fizz");
 await index.AddAsync(2, "Some buzz text for B");
 await index.AddAsync(3, "Text associated with C is both fizz and buzz");
@@ -24,17 +24,17 @@ await index.AddAsync(3, "Text associated with C is both fizz and buzz");
 You can search in this index using:
 
 ``` c#
-// Search for text containing both Fizz *and* Buzz
+// Search for documents containing both Fizz *and* Buzz
 var results = index.Search("Fizz Buzz").ToList();
 
-// Output: Items with both Fizz and Buzz: 1
-Console.WriteLine($"Items with both Fizz and Buzz: {results.Count}");
+// Output: Documents with both Fizz and Buzz: 1
+Console.WriteLine($"Documents with both Fizz and Buzz: {results.Count}");
 
-// Search for text containing both Fizz *or* Buzz
+// Search for documents containing both Fizz *or* Buzz
 results = index.Search("Fizz | Buzz").ToList();
 
-// Outputs: Items with Fizz or Buzz: 3
-Console.WriteLine($"Items with Fizz or Buzz: {results.Count}");
+// Outputs: Documents with Fizz or Buzz: 3
+Console.WriteLine($"Documents with Fizz or Buzz: {results.Count}");
 ```
 
 Each set of results returns the keys that the text was indexed against. For example, the first set of results will return a key of 3, 

diff --git a/docs/content/en/docs/Getting started/indexing-objects.md b/docs/content/en/docs/Getting started/indexing-objects.md
@@ -26,7 +26,7 @@ Where you want users to be able to search for text in all three Title, Abstract
 // Books are indexed by their BookId property, which is an int.
 var bookIndex = new FullTextIndexBuilder<int>()
     .WithObjectTokenization<Book>(
-        itemOptions => itemOptions
+        options => options
             .WithKey(b => b.BookId)
             .WithField("Title", b => b.Title,
                 tokenOptions => tokenOptions.WithStemming())
@@ -63,15 +63,15 @@ await bookIndex.AddRangeAsync(books);
 When you get search results back, they will be against the key stored in the index, i.e. the book's id:
 
 ``` csharp
-// Both books contain "first" - prints "Matched items: 1, 2 with respective scores 0.274884808704732, 0.265418822719626"
+// Both books contain "first" - prints "Matched documents: 1, 2 with respective scores 0.274884808704732, 0.265418822719626"
 var results = bookIndex.Search("first");
 Console.WriteLine(
-    "Matched items: " + 
+    "Matched documents: " + 
     string.Join(", ", results.Select(i => i.Key)) +
     " with respective scores: " +
     string.Join(", ", results.Select(i => i.Score)));
 
-// Only first book contains "the" in the title - prints "Matched items: 1"
+// Only first book contains "the" in the title - prints "Matched documents: 1"
 results = bookIndex.Search("title=the");
-Console.WriteLine("Matched items: " + string.Join(", ", results.Select(i => i.Key)));
+Console.WriteLine("Matched documents: " + string.Join(", ", results.Select(i => i.Key)));
 ```
diff --git a/docs/content/en/docs/Index construction/WithDefaultThesaurus.md b/docs/content/en/docs/Index construction/WithDefaultThesaurus.md
@@ -1,6 +1,6 @@
 ---
-title: "WithDefaultThesaurus"
-linkTitle: "WithDefaultThesaurus"
+title: "Thesaurus synonyms"
+linkTitle: "Thesaurus synonyms"
 weight: 5
 description: >
   Prescribes how the index should treat terms as synonymous when they are being added to the index.

diff --git a/docs/content/en/docs/Index construction/WithDefaultTokenization.md b/docs/content/en/docs/Index construction/WithDefaultTokenization.md
@@ -1,9 +1,9 @@
 ---
-title: "WithDefaultTokenization"
-linkTitle: "WithDefaultTokenization"
+title: "Default tokenization"
+linkTitle: "Default tokenization"
 weight: 2
 description: >
-  Specifies the default tokenization options that should be used when searching or indexing when no other options are provided.
+  Specifies the default tokenization options that should be used when searching or indexing when tokenization options are not explicitly specified for an object type.
 ---
 
 ## Example usage
@@ -47,12 +47,12 @@ Additionally, characters that can be logically expressed as two characters are e
 
 `false`: The tokenizer will be case sensitive. Searching for `Cat` will match `Cat` but not `cat`.
 
-#### `WithStemming(bool)`
+#### `WithStemming()`
 
-`true`: Words will be stemmed using an implementation of the Porter Stemmer algorithm. For example, `ABANDON`, `ABANDONED` and `ABANDONING` will all
+Words will be stemmed using an implementation of the Porter Stemmer algorithm. For example, `ABANDON`, `ABANDONED` and `ABANDONING` will all
 be treated as `ABANDON`. Currently only English is supported.
 
-`false`: **Default** No stemming will be performed on words.
+A [custom stemmer](../../custom-stemmers/) can be used by implementing an `IStemmer` and using `WithStemming(new YourStemmerImplementation())`.
 
 ### Word break modifiers
 

diff --git a/docs/content/en/docs/Index construction/WithDuplicateItemBehavior.md b/docs/content/en/docs/Index construction/WithDuplicateItemBehavior.md
diff --git a/docs/content/en/docs/Index construction/WithDuplicateKeyBehavior.md b/docs/content/en/docs/Index construction/WithDuplicateKeyBehavior.md
@@ -0,0 +1,20 @@
+---
+title: "Managing duplicate keys"
+linkTitle: "Managing duplicate keys"
+weight: 5
+description: >
+  Configure how the index should behave when indexing an item that is already present in the index.
+---
+
+`FullTextIndexBuilder<TKey> WithDuplicateKeyBehavior(DuplicateKeyBehavior duplicateKeyBehavior)`
+
+* `DuplicateKeyBehavior.Replace`: **Default** The document associated to the key will first be removed from the index, then indexed
+* `DuplicateKeyBehavior.ThrowException`: An exception will be thrown. You can use this if you're not expecting keys to be re-indexed and want some indication that your code isn't behaving correctly.
+
+## Example usage
+
+``` csharp
+var index = new FullTextIndexBuilder<int>()
+    .WithDuplicateKeyBehavior(DuplicateKeyBehavior.ThrowException)
+    .Build();
+```
diff --git a/docs/content/en/docs/Index construction/WithIndexModificationAction.md b/docs/content/en/docs/Index construction/WithIndexModificationAction.md
@@ -1,23 +1,23 @@
 ---
-title: "WithIndexModificationAction"
-linkTitle: "WithIndexModificationAction"
+title: "Adding index modification actions"
+linkTitle: "Adding index modification actions"
 weight: 10
 description: >
-  Registers an async action that needs to occur when mutations to the index are committed and a new snapshot is generated.
+  You can register an async action that needs to occur when mutations to the index are committed and a new snapshot is generated.
 ---
 
 Every time an index is modified, either with a single document being added or a batch change being completed, a new immutable snapshot is created. 
 This is part of LIFTI's thread safety mechanism.
 
 You can hook into this process by registering an action using the `FullTextIndexBuilder<TKey>.WithIndexModificationAction` method.
 
-This trivial example just logs to the console the number of items in the index whenever a new snapshot is created.
+This trivial example just logs to the console the number of documents in the index whenever a new snapshot is created.
 
 ``` csharp
 var index = new FullTextIndexBuilder<Guid>()
     .WithIndexModificationAction(async (idx) =>
     {
-        Console.WriteLine($"Index now contains {idx.IdLookup.Count} items");
+        Console.WriteLine($"Index now contains {idx.IdLookup.Count} documents");
     })
     .Build();
 ```