Skip to content

Commit

Permalink
Inverse document frequency will be ignored
Browse files Browse the repository at this point in the history
This probably makes sense, because
  • Loading branch information
murermader committed Oct 23, 2022
1 parent 6d0485a commit 27c762e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
13 changes: 13 additions & 0 deletions CreateSearchIndex/IgnoreInverseDocumentCount.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using Lucene.Net.Search.Similarities;

namespace CreateSearchIndex;

public class IgnoreInverseDocumentCount : DefaultSimilarity
{
public override float Idf(long docFreq, long numDocs)
{
// Very common and very rare terms are not
// penalized by the search engine.
return 1;
}
}
9 changes: 7 additions & 2 deletions CreateSearchIndex/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ public static int Main(string[] args)
StandardAnalyzer analyzer = new StandardAnalyzer(AppLuceneVersion);

// CreateIndex(dir, analyzer);
// return 0;

string? searchTerms = string.Join(" ", args);
// string? searchTerms = Console.ReadLine();
if (string.IsNullOrWhiteSpace(searchTerms))
{
// No search terms -> No output
Expand Down Expand Up @@ -69,8 +71,11 @@ public static void CreateIndex(Directory dir, StandardAnalyzer analyzer)
{
Trace.WriteLine("Creating index");
Stopwatch watch = Stopwatch.StartNew();

var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);

var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer)
{
Similarity = new IgnoreInverseDocumentCount()
};
using var writer = new IndexWriter(dir, indexConfig);
using var reader = new StreamReader(@"/Users/rb/Downloads/recipes_w_search_terms.csv");
using var csvReader = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
Expand Down

0 comments on commit 27c762e

Please sign in to comment.