Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ability to apply >1 affix rule pattern #47

Merged
merged 3 commits into from
Jan 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions benches/results/v0.3.3-3-g1e7d5fd_2023-01-02_0118.bench
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
v0.3.3-3-g1e7d5fd_2023-01-02_0118.bench

Benchmark from 2023-01-02_0118 on commit v0.3.3-3-g1e7d5fd
rustc 1.68.0-nightly (77429957a 2023-01-01)

CPU Information:
Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz

Running: 'cargo bench --features benchmarking --bench dict_integration'


Parse affix file time: [1.7233 ms 1.7281 ms 1.7355 ms]
change: [-0.7695% +0.2918% +1.6412%] (p = 0.67 > 0.05)
No change in performance detected.
Found 15 outliers among 100 measurements (15.00%)
3 (3.00%) high mild
12 (12.00%) high severe

Parse dict file time: [68.950 ms 69.097 ms 69.264 ms]
change: [-1.8725% -1.4052% -0.9341%] (p = 0.00 < 0.05)
Change within noise threshold.
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe

Spellcheck: compile dictionary
time: [289.23 ms 293.48 ms 300.70 ms]
change: [+1.3808% +2.8849% +5.6673%] (p = 0.00 < 0.05)
Performance has regressed.
Found 5 outliers among 100 measurements (5.00%)
2 (2.00%) high mild
3 (3.00%) high severe

Spellcheck: 1 correct word
time: [177.69 ns 177.83 ns 178.01 ns]
change: [-5.0761% -2.1543% -0.1512%] (p = 0.09 > 0.05)
No change in performance detected.
Found 17 outliers among 100 measurements (17.00%)
6 (6.00%) high mild
11 (11.00%) high severe

Spellcheck: 1 incorrect word
time: [207.22 ns 207.87 ns 208.60 ns]
change: [+2.3077% +2.8911% +3.6198%] (p = 0.00 < 0.05)
Performance has regressed.
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe

Spellcheck: 15 correct words
time: [5.7807 µs 5.7842 µs 5.7888 µs]
change: [+0.1442% +0.9362% +1.8589%] (p = 0.02 < 0.05)
Change within noise threshold.
Found 14 outliers among 100 measurements (14.00%)
4 (4.00%) high mild
10 (10.00%) high severe

Spellcheck: 15 incorrect words
time: [6.6361 µs 6.6398 µs 6.6443 µs]
change: [-1.1998% -0.6706% -0.0092%] (p = 0.02 < 0.05)
Change within noise threshold.
Found 15 outliers among 100 measurements (15.00%)
4 (4.00%) high mild
11 (11.00%) high severe

Spellcheck: 188 word paragraph
time: [1.0548 µs 1.0564 µs 1.0585 µs]
change: [+0.5199% +1.3356% +2.0162%] (p = 0.00 < 0.05)
Change within noise threshold.
Found 13 outliers among 100 measurements (13.00%)
5 (5.00%) high mild
8 (8.00%) high severe


Total execution time: 00:04:21
5 changes: 2 additions & 3 deletions crates/zspell/src/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ use crate::{suggestions, ParsedCfg};
/// Internally, this is represented as the following:
///
/// - A main wordlist
/// - A list of words to accept byt never suggest
/// - A list of words to accept but never suggest, from the `NOSUGGEST` flag
/// - A list of words that are usually allowed but are forbidden by a personal
/// dictionary
/// dictionary or the `FORBIDDENWORD` flag
/// - A list of stem words and source information
/// - Configuration information
///
Expand Down Expand Up @@ -302,7 +302,6 @@ impl Dictionary {
/// Return type is vector of `(new_word, rule, second_rule)` where
/// `second_rule` is available if both a prefix and a suffix were applied
// PERF: benchmark taking a vec reference instead of returning
// TODO: include morph data for generated words
fn create_affixed_words(&mut self, stem: &str, flags: &[u32], _morph: &[MorphInfo]) {
let mut prefix_rules = Vec::new();
let mut suffix_rules = Vec::new();
Expand Down
59 changes: 34 additions & 25 deletions crates/zspell/src/dict/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,46 @@ pub(super) fn create_affixed_word_map(
}

// Store words with prefixes that can also have suffixes
let mut prefixed_words: Vec<(String, &Arc<AfxRule>)> = Vec::new();
let mut prefixed_words: Vec<(String, &Arc<AfxRule>, usize)> = Vec::new();
let mut rule_found = false;

for &rule in prefix_rules.iter() {
let result = rule.apply_pattern(stem).ok_or(())?;
let meta = Meta::new(stem_rc.clone(), Source::Affix(rule.clone()));
let meta_vec = dest.0.entry_ref(&result).or_insert_with(Vec::new);
meta_vec.push(meta);

if rule.can_combine() {
prefixed_words.push((result, rule));
for (idx, result) in rule.apply_patterns(stem) {
let meta = Meta::new(stem_rc.clone(), Source::Affix(rule.clone()));
let meta_vec = dest.0.entry_ref(&result).or_insert_with(Vec::new);
meta_vec.push(meta);
rule_found = true;

if rule.can_combine() {
prefixed_words.push((result, rule, idx));
}
}
}

for &rule in suffix_rules.iter() {
let result = rule.apply_pattern(stem).ok_or(())?;
let meta = Meta::new(stem_rc.clone(), Source::Affix(rule.clone()));
let meta_vec = dest.0.entry_ref(&result).or_insert_with(Vec::new);
meta_vec.push(meta);

if rule.can_combine() {
let words_iter = prefixed_words.iter().filter_map(|(tmp_res, pfx_rule)| {
rule.apply_pattern(tmp_res)
.map(|newword| (newword, pfx_rule))
});

for (newword, &pfx_rule) in words_iter {
let meta_vec = dest.0.entry_ref(&newword).or_insert_with(Vec::new);
let meta1 = Meta::new(stem_rc.clone(), Source::Affix(rule.clone()));
let meta2 = Meta::new(stem_rc.clone(), Source::Affix(pfx_rule.clone()));
meta_vec.push(meta1);
meta_vec.push(meta2);
for (idx, result) in rule.apply_patterns(stem) {
let meta = Meta::new(stem_rc.clone(), Source::Affix(rule.clone()));
let meta_vec = dest.0.entry_ref(&result).or_insert_with(Vec::new);
meta_vec.push(meta);
rule_found = true;

if rule.can_combine() {
// Find words where there's both a prefix and suffix applicable
let words_iter = prefixed_words
.iter()
.map(|(tmp_res, pfx_rule, idx_pfx)| {
rule.apply_patterns(tmp_res)
.map(move |(idx_sfx, newword)| (newword, pfx_rule, idx_pfx, idx_sfx))
})
.flatten();

for (newword, &pfx_rule, _idx_pfx, _idx_sfx) in words_iter {
let meta_vec = dest.0.entry_ref(&newword).or_insert_with(Vec::new);
let meta1 = Meta::new(stem_rc.clone(), Source::Affix(rule.clone()));
let meta2 = Meta::new(stem_rc.clone(), Source::Affix(pfx_rule.clone()));
meta_vec.push(meta1);
meta_vec.push(meta2);
}
}
}
}
Expand Down
15 changes: 10 additions & 5 deletions crates/zspell/src/dict/rule.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Implementation for a store rule
//! Implementation for a stored rule

use std::hash::Hash;
use std::ops::Deref;
Expand Down Expand Up @@ -88,15 +88,20 @@ impl AfxRule {
self.can_combine
}

/// Apply one of this rule's patterns, error if none apply
pub fn apply_pattern(&self, stem: &str) -> Option<String> {
/// Apply this rules patterns. Returns an iterator over the index of the
/// pattern and the resulting string
pub fn apply_patterns<'a>(
&'a self,
stem: &'a str,
) -> impl Iterator<Item = (usize, String)> + 'a {
self.patterns
.iter()
.find_map(|pat| pat.apply_pattern(stem, self.kind))
.enumerate()
.filter_map(|(idx, pat)| pat.apply_pattern(stem, self.kind).map(|s| (idx, s)))
}
}

/// A single rule
/// A single affix rule application
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
pub struct AfxRulePattern {
affix: String,
Expand Down
43 changes: 0 additions & 43 deletions crates/zspell/tests/files/1_pfxsfx.test

This file was deleted.

70 changes: 70 additions & 0 deletions crates/zspell/tests/managed/1_pfxsfx.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
%% Test basic affix, including:
%% - Only prefix
%% - Only suffix
%% - Stripping characters
%% - Patterns
%% - Combined prefix and suffix
%% - Noncombining prefix and suffix

==== afx_str ====
SET UTF-8

PFX A Y 1
PFX A 0 aa .

SFX B Y 3
SFX B 0 bb .
SFX B y cc y
SFX B 0 dd [^y]

PFX C N 2
PFX C yy ee .
PFX C 0 ff .


==== dic_str ====
4
xxx/A
yyy/B
zzz/AB
yyyy/AC


==== check_valid ====
xxx
yyy
zzz
yyyy
aaxxx
yyybb
yycc
aazzz
zzzbb
zzzdd
aazzzbb
aazzzdd
aayyyy
eeyy
ffyyyy


==== check_invalid ====
%% Nothing to see here
nothing

==== wordlist ====
xxx
yyy
zzz
yyyy
aaxxx
yyybb
yycc
aazzz
zzzbb
zzzdd
aazzzbb
aazzzdd
aayyyy
eeyy
ffyyyy
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%% Verify our nosuggest flag works
%% Verify our nosuggest anf forbid flags works

==== afx_str ====
NOSUGGEST !
Expand Down
2 changes: 1 addition & 1 deletion crates/zspell/util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ impl TestManager {
let mut fpath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
fpath.pop();
fpath.push("tests");
fpath.push("files");
fpath.push("managed");
fpath.push(fname);

let f_content = fs::read_to_string(&fpath)
Expand Down