diff --git a/Cargo.lock b/Cargo.lock index 6a604dd..e32adbd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,6 +113,7 @@ dependencies = [ name = "clafrica-config" version = "0.4.1" dependencies = [ + "indexmap", "rhai", "serde", "toml", @@ -134,7 +135,9 @@ dependencies = [ name = "clafrica-translator" version = "0.0.1" dependencies = [ + "indexmap", "rhai", + "strsim", ] [[package]] @@ -398,6 +401,7 @@ checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] diff --git a/config/Cargo.toml b/config/Cargo.toml index 34e7fa3..0b282be 100644 --- a/config/Cargo.toml +++ b/config/Cargo.toml @@ -16,5 +16,6 @@ rhai = ["dep:rhai"] [dependencies] rhai = { version = "1.16.2", optional = true } +indexmap = { version = "2.0.2", features = ["serde"] } serde = { version = "1.0.188", features = ["derive"] } toml = "0.8.2" diff --git a/config/src/lib.rs b/config/src/lib.rs index d54040c..9f3e4a3 100644 --- a/config/src/lib.rs +++ b/config/src/lib.rs @@ -3,11 +3,12 @@ #![deny(missing_docs)] +use indexmap::IndexMap; #[cfg(feature = "rhai")] use rhai::{Engine, AST}; use serde::Deserialize; use std::result::Result; -use std::{collections::HashMap, error, fs, path::Path}; +use std::{error, fs, path::Path}; use toml::{self}; /// Hold information about a configuration. @@ -15,10 +16,10 @@ use toml::{self}; pub struct Config { /// The core config. pub core: Option, - data: Option>, + data: Option>, #[cfg(feature = "rhai")] - translators: Option>, - translation: Option>, + translators: Option>, + translation: Option>, } /// Core information about a configuration. @@ -89,7 +90,7 @@ impl Config { .unwrap_or(true); // Data - let mut data = HashMap::new(); + let mut data = IndexMap::new(); config.data.unwrap_or_default().iter().try_for_each( |(key, value)| -> Result<(), Box> { @@ -117,7 +118,7 @@ impl Config { // Translators #[cfg(feature = "rhai")] { - let mut translators = HashMap::new(); + let mut translators = IndexMap::new(); config.translators.unwrap_or_default().iter().try_for_each( |(key, value)| -> Result<(), Box> { @@ -140,7 +141,7 @@ impl Config { } // Translation - let mut translation = HashMap::new(); + let mut translation = IndexMap::new(); config.translation.unwrap_or_default().iter().try_for_each( |(key, value)| -> Result<(), Box> { @@ -174,8 +175,8 @@ impl Config { } /// Extract the data from the configuration. - pub fn extract_data(&self) -> HashMap { - let empty = HashMap::default(); + pub fn extract_data(&self) -> IndexMap { + let empty = IndexMap::default(); self.data .as_ref() @@ -193,8 +194,8 @@ impl Config { /// Extract the translators from the configuration. #[cfg(feature = "rhai")] - pub fn extract_translators(&self) -> Result, Box> { - let empty = HashMap::default(); + pub fn extract_translators(&self) -> Result, Box> { + let empty = IndexMap::default(); let mut engine = Engine::new(); // allow nesting up to 50 layers of expressions/statements @@ -226,8 +227,8 @@ impl Config { } /// Extract the translation from the configuration. - pub fn extract_translation(&self) -> HashMap> { - let empty = HashMap::new(); + pub fn extract_translation(&self) -> IndexMap> { + let empty = IndexMap::new(); self.translation .as_ref() diff --git a/engine/translator/Cargo.toml b/engine/translator/Cargo.toml index 3cda528..05d16db 100644 --- a/engine/translator/Cargo.toml +++ b/engine/translator/Cargo.toml @@ -12,8 +12,11 @@ authors = ["Brady Fomegne "] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["rhai"] +default = ["rhai", "strsim"] rhai = ["dep:rhai"] +strsim = ["dep:strsim"] [dependencies] rhai = { version = "1.16.2", optional = true } +indexmap = { version = "2.0.2", features = ["serde"] } +strsim = { version = "0.10.0", optional = true } diff --git a/engine/translator/src/lib.rs b/engine/translator/src/lib.rs index 04f56f9..4814ca2 100644 --- a/engine/translator/src/lib.rs +++ b/engine/translator/src/lib.rs @@ -5,11 +5,12 @@ //! #[cfg(feature = "rhai")] //! use clafrica_translator::Engine; //! use clafrica_translator::Translator; -//! use std::collections::HashMap; +//! use indexmap::IndexMap; //! //! // Translation via dictionary -//! let mut dictionary = HashMap::new(); -//! dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec()); +//! let mut dictionary = IndexMap::new(); +//! dictionary.insert("jump".to_string(), ["sauter".to_string()].to_vec()); +//! dictionary.insert("jumper".to_string(), ["sauteur".to_string()].to_vec()); //! dictionary.insert("nihao".to_string(), ["hello".to_string()].to_vec()); //! //! // We build the translator. @@ -19,24 +20,52 @@ //! #[cfg(feature = "rhai")] //! { //! let engine = Engine::new(); -//! let hi = engine.compile(r#" +//! let jump = engine.compile(r#" //! fn translate(input) { -//! if input == "hi" { -//! ["hi", "", "hello", true] +//! if input == "jump" { +//! [input, "", "\n", false] //! } //! } //! "#).unwrap(); -//! translator.register("hi".to_string(), hi); +//! translator.register("jump".to_string(), jump); //! } //! -//! #[cfg(feature = "rhai")] //! assert_eq!( -//! translator.translate("hi"), +//! translator.translate("jump"), +//! vec![ +//! ( +//! "jump".to_owned(), +//! "".to_owned(), +//! vec!["sauter".to_owned()], +//! true +//! ), +//! #[cfg(feature = "rhai")] +//! // Programmable translation +//! ( +//! "jump".to_owned(), +//! "".to_owned(), +//! vec!["\n".to_owned()], +//! false +//! ), +//! // Auto-completion +//! ( +//! "jumper".to_owned(), +//! "er".to_owned(), +//! vec!["sauteur".to_owned()], +//! false +//! ) +//! ] +//! ); +//! +//! // Auto-suggestion / Auto-correction +//! #[cfg(feature = "strsim")] +//! assert_eq!( +//! translator.translate("junp"), //! vec![( -//! "hi".to_owned(), +//! "jump".to_owned(), //! "".to_owned(), -//! vec!["hello".to_owned()], -//! true +//! vec!["sauter".to_owned()], +//! false //! )] //! ); //! ``` @@ -44,28 +73,33 @@ #![deny(missing_docs)] +use indexmap::IndexMap; #[cfg(feature = "rhai")] pub use rhai::Engine; #[cfg(feature = "rhai")] use rhai::{Array, Scope, AST}; -use std::collections::HashMap; +use std::cmp::Ordering; +#[cfg(feature = "strsim")] +use strsim::{self}; + +type P = (String, String, Vec, bool); /// Core structure of the translator. pub struct Translator { - dictionary: HashMap>, + dictionary: IndexMap>, #[cfg(feature = "rhai")] - translators: HashMap, + translators: IndexMap, auto_commit: bool, } impl Translator { /// Initiate a new translator. - pub fn new(dictionary: HashMap>, auto_commit: bool) -> Self { + pub fn new(dictionary: IndexMap>, auto_commit: bool) -> Self { Self { dictionary, auto_commit, #[cfg(feature = "rhai")] - translators: HashMap::default(), + translators: IndexMap::default(), } } @@ -82,30 +116,53 @@ impl Translator { } /// Generate a list of predicates based on the input. - pub fn translate(&self, input: &str) -> Vec<(String, String, Vec, bool)> { + pub fn translate(&self, input: &str) -> Vec

{ #[cfg(feature = "rhai")] let mut scope = Scope::new(); #[cfg(feature = "rhai")] let engine = Engine::new(); - let predicates = self.dictionary.iter().filter_map(|(key, value)| { - if key == input { - Some(( + if input.len() < 2 || input.len() > key.len() { + return None; + }; + + let predicate = (key == input).then_some(( + 1.0, + ( key.to_owned(), "".to_owned(), value.to_owned(), self.auto_commit, + ), + )); + #[cfg(feature = "strsim")] + let predicate = predicate.or_else(|| { + if key.len() == input.len() { + let confidence = strsim::hamming(key.as_ref(), input) + .map(|n| 1.0 - (n as f64 / key.len() as f64)) + .unwrap_or(0.0); + + (confidence > 0.7).then(|| { + ( + confidence, + (key.to_owned(), "".to_owned(), value.to_owned(), false), + ) + }) + } else { + None + } + }); + predicate.or_else(|| { + key.starts_with(input).then_some(( + 0.5, + ( + key.to_owned(), + key.chars().skip(input.len()).collect(), + value.to_owned(), + false, + ), )) - } else if input.len() > 1 && key.starts_with(input) { - Some(( - key.to_owned(), - key.chars().skip(input.len()).collect(), - value.to_owned(), - false, - )) - } else { - None - } + }) }); #[cfg(feature = "rhai")] let predicates = @@ -126,10 +183,18 @@ impl Translator { .collect(); let translated = data[3].clone().as_bool().unwrap(); - (code, remaining_code, texts, translated) + (1.0, (code, remaining_code, texts, translated)) }) })); - predicates.collect() + let mut predicates = predicates.collect::>(); + + // from the best to the worst + predicates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal)); + + predicates + .into_iter() + .map(|(_, predicate)| predicate) + .collect() } } @@ -140,10 +205,10 @@ mod tests { #[cfg(feature = "rhai")] use crate::Engine; use crate::Translator; - use std::collections::HashMap; + use indexmap::IndexMap; // We build the translation - let mut dictionary = HashMap::new(); + let mut dictionary = IndexMap::new(); dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec()); // We config the translator @@ -193,13 +258,14 @@ mod tests { false )] ); + #[cfg(feature = "strsim")] assert_eq!( - translator.translate("halo"), + translator.translate("helo"), vec![( "halo".to_owned(), "".to_owned(), vec!["hello".to_owned()], - true + false )] ); } diff --git a/service/Cargo.toml b/service/Cargo.toml index c151c43..f99bffd 100644 --- a/service/Cargo.toml +++ b/service/Cargo.toml @@ -17,8 +17,9 @@ name = "clafrica" path = "./src/main.rs" [features] -default = ["rhai"] +default = ["rhai", "strsim"] rhai = ["clafrica-config/rhai", "clafrica-translator/rhai"] +strsim = ["clafrica-translator/strsim"] [dependencies] clap = { version = "4.4.6", features = ["derive"] } diff --git a/service/data/test.toml b/service/data/test.toml index 3da7d40..9997824 100644 --- a/service/data/test.toml +++ b/service/data/test.toml @@ -27,5 +27,5 @@ hi = "./scripts/hi.rhai" [translation] hello = "hi" heli = "helicopter" -hea = "health" +heal = { value = "health", alias = ["heql"] } vuue = "vʉe" diff --git a/service/src/lib.rs b/service/src/lib.rs index 2a23651..3882ccd 100644 --- a/service/src/lib.rs +++ b/service/src/lib.rs @@ -115,8 +115,11 @@ pub fn run(config: Config, mut frontend: impl Frontend) -> Result<(), Box Result<(), Box