Skip to content

Commit

Permalink
Merge pull request #96 from pluots/duplicate-flags
Browse files Browse the repository at this point in the history
Fix duplicate flag issue for #93
  • Loading branch information
tgross35 committed Dec 13, 2023
2 parents 364c343 + d20f5e1 commit aa42d13
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 72 deletions.
93 changes: 63 additions & 30 deletions zspell/src/affix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub use self::types::{
CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, PartOfSpeech, Phonetic,
RuleType,
};
use crate::dict::{AfxRule, FlagValue};
use crate::dict::{AfxRule, Flag, FlagValue};
use crate::error::{BuildError, Error, ParseError};

/// A representation of an affix file
Expand Down Expand Up @@ -59,10 +59,10 @@ pub struct ParsedCfg {
try_characters: String,

/// Flag used to indicate words that should not be suggested
nosuggest_flag: Option<u32>,
nosuggest_flag: Option<Flag>,

/// Note rare (i.e. commonly misspelled) words with this flag
warn_rare_flag: Option<u32>,
warn_rare_flag: Option<Flag>,

/// Don't suggest anything with spaces
no_split_suggestions: bool,
Expand Down Expand Up @@ -105,14 +105,14 @@ pub struct ParsedCfg {
/*
Other options
*/
afx_circumflex_flag: Option<u32>,
forbidden_word_flag: Option<u32>,
afx_circumflex_flag: Option<Flag>,
forbidden_word_flag: Option<Flag>,
afx_full_strip: bool,
afx_keep_case_flag: Option<u32>,
afx_keep_case_flag: Option<Flag>,
input_conversions: Vec<Conversion>,
output_conversions: Vec<Conversion>,
afx_needed_flag: Option<u32>,
afx_substandard_flag: Option<u32>,
afx_needed_flag: Option<Flag>,
afx_substandard_flag: Option<Flag>,
afx_word_chars: String,
afx_check_sharps: bool,
name: String,
Expand All @@ -137,33 +137,33 @@ pub struct CompoundConfig {
min_length: u16,

/// Words with this flag may be in compounds
flag: Option<u32>,
flag: Option<Flag>,

/// Words with this flag may start a compound
begin_flag: Option<u32>,
begin_flag: Option<Flag>,

/// Words with this flag may end a compound
end_flag: Option<u32>,
end_flag: Option<Flag>,

/// Words with this flag may be in the middle of a compound
middle_flag: Option<u32>,
middle_flag: Option<Flag>,

/// Words with this flag can't be on their own, only in compounds
only_flag: Option<u32>,
/// Words with this Flagg can't be on their own, only in compounds
only_flag: Option<Flag>,

/// Allow these words inside compounds
permit_flag: Option<u32>,
forbid_flag: Option<u32>,
permit_flag: Option<Flag>,
forbid_flag: Option<Flag>,
more_suffixes: bool,
root_flag: Option<u32>,
root_flag: Option<Flag>,
word_max: u16,
forbid_dup: bool,
forbid_repeat: bool,
check_case: bool,
check_triple: bool,
simplify_triple: bool,
forbid_pats: Vec<CompoundPattern>,
force_upper_flag: Option<u32>,
force_upper_flag: Option<Flag>,
syllable: CompoundSyllable,
syllable_num: String,
}
Expand Down Expand Up @@ -378,17 +378,19 @@ impl ParsedCfg {
}

/// Convert a string to the internal flag type
pub(crate) fn convert_flag(&self, flag: &str) -> Result<u32, ParseError> {
pub(crate) fn convert_flag(&self, flag: &str) -> Result<Flag, ParseError> {
self.flag_type
.str_to_flag(flag)
.map_err(|e| ParseError::new_nospan(e, flag))
}

/// Collect all relevant flags to a map. Returns an error if there are
/// duplicates
pub fn compile_flags(&self) -> Result<BTreeMap<u32, FlagValue>, Error> {
let keysets = [
(self.afx_circumflex_flag, FlagValue::AfxCircumfix),
pub fn compile_flags(&self) -> Result<CompiledFlags, Error> {
// FIXME(circumfix): these flags probably need to be split differently

// Map fields to flags for flags that can apply to stems
let stem_key_sets = [
(self.afx_keep_case_flag, FlagValue::AfxKeepCase),
(self.afx_needed_flag, FlagValue::AfxNeeded),
(self.afx_substandard_flag, FlagValue::AfxSubstandard),
Expand All @@ -409,22 +411,42 @@ impl ParsedCfg {
(self.warn_rare_flag, FlagValue::WarnRare),
];

let mut map: BTreeMap<u32, FlagValue> = BTreeMap::new();
// Flags that apply to other rules, such as affixes
let rule_key_sets = [(self.afx_circumflex_flag, FlagValue::AfxCircumfix)];

let mut affix_flags: BTreeMap<Flag, FlagValue> = BTreeMap::new();
let mut rule_flags: BTreeMap<Flag, FlagValue> = BTreeMap::new();

for (key, value) in stem_key_sets
.iter()
.filter_map(|(kopt, val)| kopt.map(|keyval| (keyval, val)))
{
// Check for duplicate values
if let Some(duplicate) = affix_flags.get(&key) {
return Err(BuildError::DuplicateFlag {
flag: self.flag_type.flag_to_str(key),
t1: duplicate.clone(),
t2: Some(value.clone()),
}
.into());
}
affix_flags.insert(key, value.clone());
}

for (key, value) in keysets
for (key, value) in rule_key_sets
.iter()
.filter_map(|(kopt, val)| kopt.map(|keyval| (keyval, val)))
{
// Check for duplicate values
if let Some(duplicate) = map.get(&key) {
if let Some(duplicate) = rule_flags.get(&key) {
return Err(BuildError::DuplicateFlag {
flag: self.flag_type.flag_to_str(key),
t1: duplicate.clone(),
t2: Some(value.clone()),
}
.into());
}
map.insert(key, value.clone());
rule_flags.insert(key, value.clone());
}

for group in &self.afx_rule_groups {
Expand All @@ -433,8 +455,8 @@ impl ParsedCfg {
.str_to_flag(&group.flag)
.map_err(|e| ParseError::new_nospan(e, &group.flag))?;

// Check for duplicate values
if let Some(duplicate) = map.get(&flag) {
// Check for duplicate values only in affix flags.
if let Some(duplicate) = affix_flags.get(&flag) {
return Err(BuildError::DuplicateFlag {
flag: group.flag.clone(),
t1: duplicate.clone(),
Expand All @@ -444,13 +466,24 @@ impl ParsedCfg {
}

let rule = AfxRule::from_parsed_group(self, group);
map.insert(flag, FlagValue::Rule(Arc::new(rule)));
affix_flags.insert(flag, FlagValue::Rule(Arc::new(rule)));
}

Ok(map)
Ok(CompiledFlags {
affix_flags,
rule_flags,
})
}
}

/// Output type of `compile_flags`
pub struct CompiledFlags {
/// Flags and rules that apply to affixes
pub affix_flags: BTreeMap<Flag, FlagValue>,
/// Flags that apply to other flags or rules
pub rule_flags: BTreeMap<Flag, FlagValue>,
}

/// Indicate a kind of flag

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion zspell/src/affix/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ use super::*;

#[test]
fn test_flagtype_convert_ok() {
assert_eq!(FlagType::Ascii.str_to_flag("T"), Ok(84));
assert_eq!(FlagType::Ascii.str_to_flag("T"), Ok(Flag(84)));
}
60 changes: 33 additions & 27 deletions zspell/src/affix/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::str::FromStr;
use lazy_static::lazy_static;
use regex::Regex;

use crate::dict::Flag;
use crate::error::ParseErrorKind;
use crate::morph::MorphStr;

Expand Down Expand Up @@ -63,7 +64,7 @@ pub enum FlagType {

impl FlagType {
/// Convert a string flag to its u32 representation
pub(crate) fn str_to_flag(self, flag: &str) -> Result<u32, ParseErrorKind> {
pub(crate) fn str_to_flag(self, flag: &str) -> Result<Flag, ParseErrorKind> {
match self {
// Single ascii char
FlagType::Ascii => Self::parse_as_ascii(flag),
Expand All @@ -80,7 +81,7 @@ impl FlagType {
///
/// ASCII and UTF-8 flags just split by characters. Long splits every two
/// characters, numbers split by commas
pub(crate) fn parse_str(self, s: &str) -> Result<Vec<u32>, ParseErrorKind> {
pub(crate) fn parse_str(self, s: &str) -> Result<Vec<Flag>, ParseErrorKind> {
match self {
FlagType::Ascii => s.chars().map(Self::parse_char_ascii).collect(),
FlagType::Utf8 => Ok(s.chars().map(Self::parse_char_utf8).collect()),
Expand All @@ -97,71 +98,76 @@ impl FlagType {
}
}

fn parse_as_ascii(flag: &str) -> Result<u32, ParseErrorKind> {
fn parse_as_ascii(flag: &str) -> Result<Flag, ParseErrorKind> {
if flag.len() == 1 {
Ok(u32::from(flag.bytes().next().unwrap()))
Ok(Flag(flag.bytes().next().unwrap().into()))
} else {
Err(ParseErrorKind::FlagParse(Self::Ascii))
}
}

fn parse_as_utf8(flag: &str) -> Result<u32, ParseErrorKind> {
if flag.chars().count() == 1 {
Ok(flag.chars().next().unwrap() as u32)
} else {
Err(ParseErrorKind::FlagParse(Self::Utf8))
fn parse_as_utf8(flag: &str) -> Result<Flag, ParseErrorKind> {
let mut chars = flag.chars();
let err = Err(ParseErrorKind::FlagParse(Self::Utf8));

let Some(ch) = chars.next() else {
return err;
};

if chars.next().is_some() {
return err;
}

Ok(Flag(ch.into()))
}

/// Parse two ascii characters
fn parse_as_long(flag: &str) -> Result<u32, ParseErrorKind> {
fn parse_as_long(flag: &str) -> Result<Flag, ParseErrorKind> {
if flag.len() != 2 || flag.chars().any(|c| !c.is_ascii()) {
Err(ParseErrorKind::FlagParse(Self::Long))
} else {
Ok(u32::from(u16::from_ne_bytes(
flag[0..=1].as_bytes().try_into().unwrap(),
)))
let v = u16::from_ne_bytes(flag[0..=1].as_bytes().try_into().unwrap());
Ok(Flag(v.into()))
}
}

/// Parse as a number
fn parse_as_number(flag: &str) -> Result<u32, ParseErrorKind> {
fn parse_as_number(flag: &str) -> Result<Flag, ParseErrorKind> {
flag.parse()
.map_err(|_| ParseErrorKind::FlagParse(Self::Number))
.map(Flag)
}

fn parse_char_ascii(c: char) -> Result<u32, ParseErrorKind> {
fn parse_char_ascii(c: char) -> Result<Flag, ParseErrorKind> {
if c.is_ascii() {
Ok(c as u32)
Ok(Flag(c.into()))
} else {
Err(ParseErrorKind::FlagParse(Self::Ascii))
}
}

fn parse_char_utf8(c: char) -> u32 {
c as u32
fn parse_char_utf8(c: char) -> Flag {
Flag(c.into())
}

fn parse_chars_long(chars: [char; 2]) -> Result<u32, ParseErrorKind> {
fn parse_chars_long(chars: [char; 2]) -> Result<Flag, ParseErrorKind> {
if chars.iter().any(|ch| !ch.is_ascii()) {
Err(ParseErrorKind::FlagParse(Self::Long))
} else {
Ok(u32::from(u16::from_ne_bytes([
chars[0] as u8,
chars[1] as u8,
])))
let arr = [chars[0].try_into().unwrap(), chars[1].try_into().unwrap()];
Ok(Flag(u16::from_ne_bytes(arr).into()))
}
}

/// Given a specified flag type (self), turn the value back into a string
#[inline]
pub fn flag_to_str(self, flag: u32) -> String {
pub fn flag_to_str(self, flag: Flag) -> String {
match self {
// Should be OK to unwrap because we created these flags from valid characters
FlagType::Ascii | FlagType::Utf8 => char::from_u32(flag).unwrap().to_string(),
FlagType::Number => flag.to_string(),
FlagType::Ascii | FlagType::Utf8 => char::from_u32(flag.0).unwrap().to_string(),
FlagType::Number => flag.0.to_string(),
FlagType::Long => {
let bytes = (u16::try_from(flag).unwrap()).to_ne_bytes();
let bytes = (u16::try_from(flag.0).unwrap()).to_ne_bytes();
bytes.iter().map(|b| *b as char).collect::<String>()
}
}
Expand Down
Loading

0 comments on commit aa42d13

Please sign in to comment.