Skip to content

Commit

Permalink
feat: language detection for mnemonic seed words (#3590)
Browse files Browse the repository at this point in the history
Description
---
Implementation of a basic language detection algorithm for mnemonic words

Motivation and Context
---
Improve language detection for mnemonic words

How Has This Been Tested?
---
cargo test --all
  • Loading branch information
StriderDM committed Nov 21, 2021
1 parent 038b9a6 commit 57f51bc
Showing 1 changed file with 88 additions and 11 deletions.
99 changes: 88 additions & 11 deletions base_layer/key_manager/src/mnemonic.rs
Expand Up @@ -25,15 +25,15 @@ use crate::{
error::{KeyManagerError, MnemonicError},
mnemonic_wordlists::*,
};
use std::slice::Iter;
use std::{cmp::Ordering, slice::Iter};
use strum_macros::{Display, EnumString};
use tari_crypto::tari_utilities::bit::*;

/// The Mnemonic system simplifies the encoding and decoding of a secret key into and from a Mnemonic word sequence
/// It can autodetect the language of the Mnemonic word sequence
// TODO: Develop a language autodetection mechanism to distinguish between ChineseTraditional and ChineseSimplified

#[derive(Clone, Debug, PartialEq, EnumString, Display)]
#[derive(Clone, Debug, PartialEq, EnumString, Display, Copy)]
pub enum MnemonicLanguage {
ChineseSimplified,
English,
Expand All @@ -47,12 +47,8 @@ pub enum MnemonicLanguage {
impl MnemonicLanguage {
/// Detects the mnemonic language of a specific word by searching all defined mnemonic word lists
pub fn from(mnemonic_word: &str) -> Result<MnemonicLanguage, MnemonicError> {
for language in MnemonicLanguage::iterator() {
if find_mnemonic_index_from_word(mnemonic_word, language).is_ok() {
return Ok((*language).clone());
}
}
Err(MnemonicError::UnknownLanguage)
let words = vec![mnemonic_word.to_string()];
detect_language(&words)
}

/// Returns an iterator for the MnemonicLanguage enum group to allow iteration over all defined languages
Expand Down Expand Up @@ -158,11 +154,54 @@ pub fn from_bytes(bytes: Vec<u8>, language: &MnemonicLanguage) -> Result<Vec<Str
Ok(mnemonic_sequence)
}

fn detect_language(words: &[String]) -> Result<MnemonicLanguage, MnemonicError> {
let count = words.iter().len();
match count.cmp(&1) {
Ordering::Less => {
return Err(MnemonicError::UnknownLanguage);
},
Ordering::Equal => {
let word = words.get(0).ok_or(MnemonicError::EncodeInvalidLength)?;
for language in MnemonicLanguage::iterator() {
if find_mnemonic_index_from_word(word, language).is_ok() {
return Ok(*language);
}
}
return Err(MnemonicError::UnknownLanguage);
},
Ordering::Greater => {
for word in words {
let mut languages = Vec::with_capacity(MnemonicLanguage::iterator().len());
// detect all languages in which a word falls into
for language in MnemonicLanguage::iterator() {
if find_mnemonic_index_from_word(word, language).is_ok() {
languages.push(*language);
}
}
// check if at least one of the languages is consistent for all other words against languages yielded
// from the initial word for this iteration
for language in languages {
let mut consistent = true;
for compare in words {
if compare != word && find_mnemonic_index_from_word(compare, &language).is_err() {
consistent = false;
}
}
if consistent {
return Ok(language);
}
}
}
},
}

Err(MnemonicError::UnknownLanguage)
}

/// Generates a vector of bytes that represent the provided mnemonic sequence of words, the language of the mnemonic
/// sequence is autodetected
/// sequence is detected
pub fn to_bytes(mnemonic_seq: &[String]) -> Result<Vec<u8>, MnemonicError> {
let first_word = mnemonic_seq.get(0).ok_or(MnemonicError::EncodeInvalidLength)?;
let language = MnemonicLanguage::from(first_word)?; // Autodetect language
let language = self::detect_language(mnemonic_seq)?;
to_bytes_with_language(mnemonic_seq, &language)
}

Expand Down Expand Up @@ -289,6 +328,44 @@ mod test {
assert!(MnemonicLanguage::from(&"おかあさん".to_string()).is_err()); // Invalid Mnemonic Japanese word
assert!(MnemonicLanguage::from(&"답정너".to_string()).is_err()); // Invalid Mnemonic Korean word
assert!(MnemonicLanguage::from(&"desvelado".to_string()).is_err()); // Invalid Mnemonic Spanish word

// English/Spanish + English/French -> English
let words1 = vec![
"album".to_string(),
"area".to_string(),
"opera".to_string(),
"abandon".to_string(),
];
assert_eq!(detect_language(&words1), Ok(MnemonicLanguage::English));

// English/Spanish + English/French + Italian/Spanish
let words2 = vec![
"album".to_string(),
"area".to_string(),
"opera".to_string(),
"abandon".to_string(),
"tipico".to_string(),
];
assert_eq!(detect_language(&words2).is_err(), true);

// bounds check (last word is invalid)
let words3 = vec![
"album".to_string(),
"area".to_string(),
"opera".to_string(),
"abandon".to_string(),
"topazio".to_string(),
];
assert_eq!(detect_language(&words3).is_err(), true);

// building up a word list: English/French + French -> French
let mut words = Vec::with_capacity(3);
words.push("concert".to_string());
assert_eq!(detect_language(&words), Ok(MnemonicLanguage::English));
words.push("abandon".to_string());
assert_eq!(detect_language(&words), Ok(MnemonicLanguage::English));
words.push("barbier".to_string());
assert_eq!(detect_language(&words), Ok(MnemonicLanguage::French));
}

#[test]
Expand Down

0 comments on commit 57f51bc

Please sign in to comment.