diff --git a/CHANGELOG.md b/CHANGELOG.md index e8410c1804..944d82ed59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,65 @@ +Tantivy 0.22 (unreleased) +================================ + +Tantivy 0.22 will be able to read indices created with Tantivy 0.21. + +#### Bugfixes +- Fix null byte handling in JSON paths (null bytes in json keys caused panic during indexing) [#2345](https://github.com/quickwit-oss/tantivy/pull/2345)(@PSeitz) +- Fix bug that can cause `get_docids_for_value_range` to panic. [#2295](https://github.com/quickwit-oss/tantivy/pull/2295)(@fulmicoton) +- Avoid 1 document indices by increase min memory to 15MB for indexing [#2176](https://github.com/quickwit-oss/tantivy/pull/2176)(@PSeitz) +- Fix merge panic for JSON fields [#2284](https://github.com/quickwit-oss/tantivy/pull/2284)(@PSeitz) +- Fix bug occuring when merging JSON object indexed with positions. [#2253](https://github.com/quickwit-oss/tantivy/pull/2253)(@fulmicoton) +- Fix empty DateHistogram gap bug [#2183](https://github.com/quickwit-oss/tantivy/pull/2183)(@PSeitz) +- Fix range query end check (fields with less than 1 value per doc are affected) [#2226](https://github.com/quickwit-oss/tantivy/pull/2226)(@PSeitz) +- Handle exclusive out of bounds ranges on fastfield range queries [#2174](https://github.com/quickwit-oss/tantivy/pull/2174)(@PSeitz) + +#### Breaking API Changes +- rename ReloadPolicy onCommit to onCommitWithDelay [#2235](https://github.com/quickwit-oss/tantivy/pull/2235)(@giovannicuccu) +- Move exports from the root into modules [#2220](https://github.com/quickwit-oss/tantivy/pull/2220)(@PSeitz) +- Accept field name instead of `Field` in FilterCollector [#2196](https://github.com/quickwit-oss/tantivy/pull/2196)(@PSeitz) +- remove deprecated IntOptions and DateTime [#2353](https://github.com/quickwit-oss/tantivy/pull/2353)(@PSeitz) + +#### Features/Improvements +- Tantivy documents as a trait: Index data directly without converting to tantivy types first [#2071](https://github.com/quickwit-oss/tantivy/pull/2071)(@ChillFish8) +- encode some part of posting list as -1 instead of direct values (smaller inverted indices) [#2185](https://github.com/quickwit-oss/tantivy/pull/2185)(@trinity-1686a) +- **Aggregation** + - Support to deserialize f64 from string [#2311](https://github.com/quickwit-oss/tantivy/pull/2311)(@PSeitz) + - Add a top_hits aggregator [#2198](https://github.com/quickwit-oss/tantivy/pull/2198)(@ditsuke) + - Support bool type in term aggregation [#2318](https://github.com/quickwit-oss/tantivy/pull/2318)(@PSeitz) + - Support ip adresses in term aggregation [#2319](https://github.com/quickwit-oss/tantivy/pull/2319)(@PSeitz) + - Support date type in term aggregation [#2172](https://github.com/quickwit-oss/tantivy/pull/2172)(@PSeitz) + - Support escaped dot when addressing field [#2250](https://github.com/quickwit-oss/tantivy/pull/2250)(@PSeitz) + +- Add ExistsQuery to check documents that have a value [#2160](https://github.com/quickwit-oss/tantivy/pull/2160)(@imotov) +- Expose TopDocs::order_by_u64_field again [#2282](https://github.com/quickwit-oss/tantivy/pull/2282)(@ditsuke) + +- **Memory/Performance** + - Faster TopN: replace BinaryHeap with TopNComputer [#2186](https://github.com/quickwit-oss/tantivy/pull/2186)(@PSeitz) + - reduce number of allocations during indexing [#2257](https://github.com/quickwit-oss/tantivy/pull/2257)(@PSeitz) + - Less Memory while indexing: docid deltas while indexing [#2249](https://github.com/quickwit-oss/tantivy/pull/2249)(@PSeitz) + - Faster indexing: use term hashmap in fastfield [#2243](https://github.com/quickwit-oss/tantivy/pull/2243)(@PSeitz) + - term hashmap remove copy in is_empty, unused unordered_id [#2229](https://github.com/quickwit-oss/tantivy/pull/2229)(@PSeitz) + - add method to fetch block of first values in columnar [#2330](https://github.com/quickwit-oss/tantivy/pull/2330)(@PSeitz) + - Faster aggregations: add fast path for full columns in fetch_block [#2328](https://github.com/quickwit-oss/tantivy/pull/2328)(@PSeitz) + - Faster sstable loading: use fst for sstable index [#2268](https://github.com/quickwit-oss/tantivy/pull/2268)(@trinity-1686a) + +- **QueryParser** + - allow newline where we allow space in query parser [#2302](https://github.com/quickwit-oss/tantivy/pull/2302)(@trinity-1686a) + - allow some mixing of occur and bool in strict query parser [#2323](https://github.com/quickwit-oss/tantivy/pull/2323)(@trinity-1686a) + - handle * inside term in lenient query parser [#2228](https://github.com/quickwit-oss/tantivy/pull/2228)(@trinity-1686a) + - add support for exists query syntax in query parser [#2170](https://github.com/quickwit-oss/tantivy/pull/2170)(@trinity-1686a) +- Add shared search executor [#2312](https://github.com/quickwit-oss/tantivy/pull/2312)(@MochiXu) +- Truncate keys to u16::MAX in term hashmap [#2299](https://github.com/quickwit-oss/tantivy/pull/2299)(@PSeitz) +- report if a term matched when warming up posting list [#2309](https://github.com/quickwit-oss/tantivy/pull/2309)(@trinity-1686a) +- Support json fields in FuzzyTermQuery [#2173](https://github.com/quickwit-oss/tantivy/pull/2173)(@PingXia-at) +- Read list of fields encoded in term dictionary for JSON fields [#2184](https://github.com/quickwit-oss/tantivy/pull/2184)(@PSeitz) +- add collect_block to BoxableSegmentCollector [#2331](https://github.com/quickwit-oss/tantivy/pull/2331)(@PSeitz) +- expose collect_block buffer size [#2326](https://github.com/quickwit-oss/tantivy/pull/2326)(@PSeitz) +- Forward regex parser errors [#2288](https://github.com/quickwit-oss/tantivy/pull/2288)(@adamreichold) +- Make FacetCounts defaultable and cloneable. [#2322](https://github.com/quickwit-oss/tantivy/pull/2322)(@adamreichold) +- Derive Debug for SchemaBuilder [#2254](https://github.com/quickwit-oss/tantivy/pull/2254)(@GodTamIt) +- add missing inlines to tantivy options [#2245](https://github.com/quickwit-oss/tantivy/pull/2245)(@PSeitz) + Tantivy 0.21.1 ================================ #### Bugfixes diff --git a/cliff.toml b/cliff.toml index 99bd506620..03424f52bd 100644 --- a/cliff.toml +++ b/cliff.toml @@ -1,6 +1,10 @@ # configuration file for git-cliff{ pattern = "foo", replace = "bar"} # see https://github.com/orhun/git-cliff#configuration-file +[remote.github] +owner = "quickwit-oss" +repo = "tantivy" + [changelog] # changelog header header = """ @@ -8,15 +12,43 @@ header = """ # template for the changelog body # https://tera.netlify.app/docs/#introduction body = """ -{% if version %}\ - {{ version | trim_start_matches(pat="v") }} ({{ timestamp | date(format="%Y-%m-%d") }}) - ================== -{% else %}\ - ## [unreleased] -{% endif %}\ +## What's Changed + +{%- if version %} in {{ version }}{%- endif -%} {% for commit in commits %} - - {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | split(pat="\n") | first | trim | upper_first }}(@{{ commit.author.name }})\ -{% endfor %} + {% if commit.github.pr_title -%} + {%- set commit_message = commit.github.pr_title -%} + {%- else -%} + {%- set commit_message = commit.message -%} + {%- endif -%} + - {{ commit_message | split(pat="\n") | first | trim }}\ + {% if commit.github.pr_number %} \ + [#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}){% if commit.github.username %}(@{{ commit.github.username }}){%- endif -%} \ + {%- endif %} +{%- endfor -%} + +{% if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %} + {% raw %}\n{% endraw -%} + ## New Contributors +{%- endif %}\ +{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %} + * @{{ contributor.username }} made their first contribution + {%- if contributor.pr_number %} in \ + [#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \ + {%- endif %} +{%- endfor -%} + +{% if version %} + {% if previous.version %} + **Full Changelog**: {{ self::remote_url() }}/compare/{{ previous.version }}...{{ version }} + {% endif %} +{% else -%} + {% raw %}\n{% endraw %} +{% endif %} + +{%- macro remote_url() -%} + https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }} +{%- endmacro -%} """ # remove the leading and trailing whitespace from the template trim = true @@ -25,53 +57,24 @@ footer = """ """ postprocessors = [ - { pattern = 'Paul Masurel', replace = "fulmicoton"}, # replace with github user - { pattern = 'PSeitz', replace = "PSeitz"}, # replace with github user - { pattern = 'Adam Reichold', replace = "adamreichold"}, # replace with github user - { pattern = 'trinity-1686a', replace = "trinity-1686a"}, # replace with github user - { pattern = 'Michael Kleen', replace = "mkleen"}, # replace with github user - { pattern = 'Adrien Guillo', replace = "guilload"}, # replace with github user - { pattern = 'François Massot', replace = "fmassot"}, # replace with github user - { pattern = 'Naveen Aiathurai', replace = "naveenann"}, # replace with github user - { pattern = '', replace = ""}, # replace with github user ] [git] # parse the commits based on https://www.conventionalcommits.org # This is required or commit.message contains the whole commit message and not just the title -conventional_commits = true +conventional_commits = false # filter out the commits that are not conventional -filter_unconventional = false +filter_unconventional = true # process each line of a commit as an individual commit split_commits = false # regex for preprocessing the commit messages commit_preprocessors = [ - { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "[#${2}](https://github.com/quickwit-oss/tantivy/issues/${2})"}, # replace issue numbers + { pattern = '\((\w+\s)?#([0-9]+)\)', replace = ""}, ] #link_parsers = [ #{ pattern = "#(\\d+)", href = "https://github.com/quickwit-oss/tantivy/pulls/$1"}, #] # regex for parsing and grouping commits -commit_parsers = [ - { message = "^feat", group = "Features"}, - { message = "^fix", group = "Bug Fixes"}, - { message = "^doc", group = "Documentation"}, - { message = "^perf", group = "Performance"}, - { message = "^refactor", group = "Refactor"}, - { message = "^style", group = "Styling"}, - { message = "^test", group = "Testing"}, - { message = "^chore\\(release\\): prepare for", skip = true}, - { message = "(?i)clippy", skip = true}, - { message = "(?i)dependabot", skip = true}, - { message = "(?i)fmt", skip = true}, - { message = "(?i)bump", skip = true}, - { message = "(?i)readme", skip = true}, - { message = "(?i)comment", skip = true}, - { message = "(?i)spelling", skip = true}, - { message = "^chore", group = "Miscellaneous Tasks"}, - { body = ".*security", group = "Security"}, - { message = ".*", group = "Other", default_scope = "other"}, -] # protect breaking changes from being skipped due to matching a skipping commit_parser protect_breaking_commits = false # filter out the commits that are not matched by commit parsers