Skip to content

Commit

Permalink
feat(translator): Improve auto-suggestion
Browse files Browse the repository at this point in the history
  • Loading branch information
pythonbrad committed Oct 21, 2023
1 parent cae38dc commit 2755c86
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 60 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ rhai = ["dep:rhai"]

[dependencies]
rhai = { version = "1.16.2", optional = true }
indexmap = { version = "2.0.2", features = ["serde"] }
serde = { version = "1.0.188", features = ["derive"] }
toml = "0.8.2"
27 changes: 14 additions & 13 deletions config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@

#![deny(missing_docs)]

use indexmap::IndexMap;
#[cfg(feature = "rhai")]
use rhai::{Engine, AST};
use serde::Deserialize;
use std::result::Result;
use std::{collections::HashMap, error, fs, path::Path};
use std::{error, fs, path::Path};
use toml::{self};

/// Hold information about a configuration.
#[derive(Deserialize, Debug, Clone)]
pub struct Config {
/// The core config.
pub core: Option<CoreConfig>,
data: Option<HashMap<String, Data>>,
data: Option<IndexMap<String, Data>>,
#[cfg(feature = "rhai")]
translators: Option<HashMap<String, Data>>,
translation: Option<HashMap<String, Data>>,
translators: Option<IndexMap<String, Data>>,
translation: Option<IndexMap<String, Data>>,
}

/// Core information about a configuration.
Expand Down Expand Up @@ -89,7 +90,7 @@ impl Config {
.unwrap_or(true);

// Data
let mut data = HashMap::new();
let mut data = IndexMap::new();

config.data.unwrap_or_default().iter().try_for_each(
|(key, value)| -> Result<(), Box<dyn error::Error>> {
Expand Down Expand Up @@ -117,7 +118,7 @@ impl Config {
// Translators
#[cfg(feature = "rhai")]
{
let mut translators = HashMap::new();
let mut translators = IndexMap::new();

config.translators.unwrap_or_default().iter().try_for_each(
|(key, value)| -> Result<(), Box<dyn error::Error>> {
Expand All @@ -140,7 +141,7 @@ impl Config {
}

// Translation
let mut translation = HashMap::new();
let mut translation = IndexMap::new();

config.translation.unwrap_or_default().iter().try_for_each(
|(key, value)| -> Result<(), Box<dyn error::Error>> {
Expand Down Expand Up @@ -174,8 +175,8 @@ impl Config {
}

/// Extract the data from the configuration.
pub fn extract_data(&self) -> HashMap<String, String> {
let empty = HashMap::default();
pub fn extract_data(&self) -> IndexMap<String, String> {
let empty = IndexMap::default();

self.data
.as_ref()
Expand All @@ -193,8 +194,8 @@ impl Config {

/// Extract the translators from the configuration.
#[cfg(feature = "rhai")]
pub fn extract_translators(&self) -> Result<HashMap<String, AST>, Box<dyn error::Error>> {
let empty = HashMap::default();
pub fn extract_translators(&self) -> Result<IndexMap<String, AST>, Box<dyn error::Error>> {
let empty = IndexMap::default();
let mut engine = Engine::new();

// allow nesting up to 50 layers of expressions/statements
Expand Down Expand Up @@ -226,8 +227,8 @@ impl Config {
}

/// Extract the translation from the configuration.
pub fn extract_translation(&self) -> HashMap<String, Vec<String>> {
let empty = HashMap::new();
pub fn extract_translation(&self) -> IndexMap<String, Vec<String>> {
let empty = IndexMap::new();

self.translation
.as_ref()
Expand Down
5 changes: 4 additions & 1 deletion engine/translator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ authors = ["Brady Fomegne <fomegnemeudje@outlook.com>"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = ["rhai"]
default = ["rhai", "strsim"]
rhai = ["dep:rhai"]
strsim = ["dep:strsim"]

[dependencies]
rhai = { version = "1.16.2", optional = true }
indexmap = { version = "2.0.2", features = ["serde"] }
strsim = { version = "0.10.0", optional = true }
140 changes: 103 additions & 37 deletions engine/translator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
//! #[cfg(feature = "rhai")]
//! use clafrica_translator::Engine;
//! use clafrica_translator::Translator;
//! use std::collections::HashMap;
//! use indexmap::IndexMap;
//!
//! // Translation via dictionary
//! let mut dictionary = HashMap::new();
//! dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec());
//! let mut dictionary = IndexMap::new();
//! dictionary.insert("jump".to_string(), ["sauter".to_string()].to_vec());
//! dictionary.insert("jumper".to_string(), ["sauteur".to_string()].to_vec());
//! dictionary.insert("nihao".to_string(), ["hello".to_string()].to_vec());
//!
//! // We build the translator.
Expand All @@ -19,53 +20,86 @@
//! #[cfg(feature = "rhai")]
//! {
//! let engine = Engine::new();
//! let hi = engine.compile(r#"
//! let jump = engine.compile(r#"
//! fn translate(input) {
//! if input == "hi" {
//! ["hi", "", "hello", true]
//! if input == "jump" {
//! [input, "", "\n", false]
//! }
//! }
//! "#).unwrap();
//! translator.register("hi".to_string(), hi);
//! translator.register("jump".to_string(), jump);
//! }
//!
//! #[cfg(feature = "rhai")]
//! assert_eq!(
//! translator.translate("hi"),
//! translator.translate("jump"),
//! vec![
//! (
//! "jump".to_owned(),
//! "".to_owned(),
//! vec!["sauter".to_owned()],
//! true
//! ),
//! #[cfg(feature = "rhai")]
//! // Programmable translation
//! (
//! "jump".to_owned(),
//! "".to_owned(),
//! vec!["\n".to_owned()],
//! false
//! ),
//! // Auto-completion
//! (
//! "jumper".to_owned(),
//! "er".to_owned(),
//! vec!["sauteur".to_owned()],
//! false
//! )
//! ]
//! );
//!
//! // Auto-suggestion / Auto-correction
//! #[cfg(feature = "strsim")]
//! assert_eq!(
//! translator.translate("junp"),
//! vec![(
//! "hi".to_owned(),
//! "jump".to_owned(),
//! "".to_owned(),
//! vec!["hello".to_owned()],
//! true
//! vec!["sauter".to_owned()],
//! false
//! )]
//! );
//! ```
//!

#![deny(missing_docs)]

use indexmap::IndexMap;
#[cfg(feature = "rhai")]
pub use rhai::Engine;
#[cfg(feature = "rhai")]
use rhai::{Array, Scope, AST};
use std::collections::HashMap;
use std::cmp::Ordering;
#[cfg(feature = "strsim")]
use strsim::{self};

type P = (String, String, Vec<String>, bool);

/// Core structure of the translator.
pub struct Translator {
dictionary: HashMap<String, Vec<String>>,
dictionary: IndexMap<String, Vec<String>>,
#[cfg(feature = "rhai")]
translators: HashMap<String, AST>,
translators: IndexMap<String, AST>,
auto_commit: bool,
}

impl Translator {
/// Initiate a new translator.
pub fn new(dictionary: HashMap<String, Vec<String>>, auto_commit: bool) -> Self {
pub fn new(dictionary: IndexMap<String, Vec<String>>, auto_commit: bool) -> Self {
Self {
dictionary,
auto_commit,
#[cfg(feature = "rhai")]
translators: HashMap::default(),
translators: IndexMap::default(),
}
}

Expand All @@ -82,30 +116,53 @@ impl Translator {
}

/// Generate a list of predicates based on the input.
pub fn translate(&self, input: &str) -> Vec<(String, String, Vec<String>, bool)> {
pub fn translate(&self, input: &str) -> Vec<P> {
#[cfg(feature = "rhai")]
let mut scope = Scope::new();
#[cfg(feature = "rhai")]
let engine = Engine::new();

let predicates = self.dictionary.iter().filter_map(|(key, value)| {
if key == input {
Some((
if input.len() < 2 || input.len() > key.len() {
return None;
};

let predicate = (key == input).then_some((
1.0,
(
key.to_owned(),
"".to_owned(),
value.to_owned(),
self.auto_commit,
),
));
#[cfg(feature = "strsim")]
let predicate = predicate.or_else(|| {
if key.len() == input.len() {
let confidence = strsim::hamming(key.as_ref(), input)
.map(|n| 1.0 - (n as f64 / key.len() as f64))
.unwrap_or(0.0);

(confidence > 0.7).then(|| {
(
confidence,
(key.to_owned(), "".to_owned(), value.to_owned(), false),
)
})
} else {
None
}
});
predicate.or_else(|| {
key.starts_with(input).then_some((
0.5,
(
key.to_owned(),
key.chars().skip(input.len()).collect(),
value.to_owned(),
false,
),
))
} else if input.len() > 1 && key.starts_with(input) {
Some((
key.to_owned(),
key.chars().skip(input.len()).collect(),
value.to_owned(),
false,
))
} else {
None
}
})
});
#[cfg(feature = "rhai")]
let predicates =
Expand All @@ -126,10 +183,18 @@ impl Translator {
.collect();
let translated = data[3].clone().as_bool().unwrap();

(code, remaining_code, texts, translated)
(1.0, (code, remaining_code, texts, translated))
})
}));
predicates.collect()
let mut predicates = predicates.collect::<Vec<(f64, P)>>();

// from the best to the worst
predicates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal));

predicates
.into_iter()
.map(|(_, predicate)| predicate)
.collect()
}
}

Expand All @@ -140,10 +205,10 @@ mod tests {
#[cfg(feature = "rhai")]
use crate::Engine;
use crate::Translator;
use std::collections::HashMap;
use indexmap::IndexMap;

// We build the translation
let mut dictionary = HashMap::new();
let mut dictionary = IndexMap::new();
dictionary.insert("halo".to_string(), ["hello".to_string()].to_vec());

// We config the translator
Expand Down Expand Up @@ -193,13 +258,14 @@ mod tests {
false
)]
);
#[cfg(feature = "strsim")]
assert_eq!(
translator.translate("halo"),
translator.translate("helo"),
vec![(
"halo".to_owned(),
"".to_owned(),
vec!["hello".to_owned()],
true
false
)]
);
}
Expand Down
3 changes: 2 additions & 1 deletion service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ name = "clafrica"
path = "./src/main.rs"

[features]
default = ["rhai"]
default = ["rhai", "strsim"]
rhai = ["clafrica-config/rhai", "clafrica-translator/rhai"]
strsim = ["clafrica-translator/strsim"]

[dependencies]
clap = { version = "4.4.6", features = ["derive"] }
Expand Down
2 changes: 1 addition & 1 deletion service/data/test.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ hi = "./scripts/hi.rhai"
[translation]
hello = "hi"
heli = "helicopter"
hea = "health"
heal = { value = "health", alias = ["heql"] }
vuue = "vʉe"

0 comments on commit 2755c86

Please sign in to comment.