Skip to content

Commit

Permalink
Merge pull request #27 from hanabi1224/use_hashmap_instead_of_bin_search
Browse files Browse the repository at this point in the history
Use hashmap instead of binary search
  • Loading branch information
寧靜 committed Apr 21, 2019
2 parents a67ebbc + f4e7458 commit 3b68cd4
Show file tree
Hide file tree
Showing 9 changed files with 194 additions and 28 deletions.
115 changes: 106 additions & 9 deletions .gitignore
@@ -1,14 +1,111 @@
# Compiled files
*.o
*.so
*.rlib
*.dll

# Executables
*.exe
# Created by https://www.gitignore.io/api/rust,code,intellij+all
# Edit at https://www.gitignore.io/?templates=rust,code,intellij+all

### Code ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json

### Intellij+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

# JetBrains templates
**___jb_tmp___

### Intellij+all Patch ###
# Ignores the whole .idea folder and all .iml files
# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360

.idea/

# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023

*.iml
modules.xml
.idea/misc.xml
*.ipr

# Sonarlint plugin
.idea/sonarlint

### Rust ###
# Generated by Cargo
# will have compiled files and executables
/target/
*~
*.bak

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

# End of https://www.gitignore.io/api/rust,code,intellij+all
4 changes: 2 additions & 2 deletions .travis.yml
Expand Up @@ -22,8 +22,8 @@ rust:
script:
- |
cargo clean &&
cargo build &&
cargo test &&
cargo build --release &&
cargo test --release &&
cargo doc -p pinyin --no-deps &&
cargo run --example main &&
cargo bench
Expand Down
5 changes: 2 additions & 3 deletions Cargo.toml
Expand Up @@ -9,7 +9,7 @@ repository = "https://github.com/mozillazg/rust-pinyin"
description = "Convert Chinese to pinyin"
documentation = "https://docs.rs/pinyin/"
keywords = ["pinyin", "hanzi", "Chinese"]
exclude = ["pinyin-data"]
exclude = ["pinyin-data/*/**"]

[badges]
travis-ci = { repository = "mozillazg/rust-pinyin" }
Expand All @@ -23,9 +23,8 @@ path = "./src/bin/mk_dict.rs"
name = "main"
path = "./examples/main.rs"


[dependencies]

lazy_static = "1"

[features]
unstable = []
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -17,7 +17,7 @@ test: lint

.PHONY: lint
lint:
@cargo run --bin mk_dict > src/dict.rs.new
@cargo run --release --bin mk_dict > src/dict.rs.new
@mv src/dict.rs.new src/dict.rs
@cargo fmt
@cargo clippy -- -A clippy::unreadable_literal
Expand Down
2 changes: 0 additions & 2 deletions src/bin/mk_dict.rs
Expand Up @@ -92,10 +92,8 @@ fn main() {

let template = format!(
"
pub static PINYIN_MAP: [(char, &str); {}] = {:?};
pub static PHONETIC_SYMBOL_MAP: [(char, &str); {}] = {:?};
",
pinyin_map.len(),
pinyin_map,
Expand Down
32 changes: 32 additions & 0 deletions src/integer_hasher.rs
@@ -0,0 +1,32 @@
use std::collections::HashMap;
use std::hash::{BuildHasherDefault, Hasher};

pub struct IntegerHasher {
hash: u64,
}

impl Default for IntegerHasher {
#[inline]
fn default() -> IntegerHasher {
IntegerHasher { hash: 0 }
}
}

impl Hasher for IntegerHasher {
#[inline]
fn finish(&self) -> u64 {
self.hash
}

#[inline]
fn write(&mut self, bytes: &[u8]) {
let mut hash: u64 = 0;
for (i, byte) in bytes.iter().enumerate() {
hash += u64::from(*byte) << (8 * i);
}

self.hash = hash;
}
}

pub type IntegerHashMap<K, V> = HashMap<K, V, BuildHasherDefault<IntegerHasher>>;
29 changes: 18 additions & 11 deletions src/lib.rs
Expand Up @@ -55,9 +55,15 @@
//! }
//! ```

#[macro_use]
extern crate lazy_static;

mod dict;
pub mod integer_hasher;
mod pinyin_map;

pub use dict::{PHONETIC_SYMBOL_MAP, PINYIN_MAP};
pub use dict::PHONETIC_SYMBOL_MAP;
pub use pinyin_map::PINYIN_HASHMAP;

// 声母表
const _INITIALS: [&str; 21] = [
Expand Down Expand Up @@ -199,20 +205,21 @@ fn apply_style(pys: Vec<String>, a: &Args) -> Vec<String> {
}

fn single_pinyin(c: char, a: &Args) -> Vec<String> {
let ret: Vec<String> = PINYIN_MAP
.binary_search_by_key(&c, |&(k, _)| k)
.map(|index| {
let pinyin_list = PINYIN_MAP[index].1.split(',').collect::<Vec<&str>>();
if pinyin_list.is_empty() || a.heteronym {
pinyin_list
let ret: Vec<String> = match PINYIN_HASHMAP.get(&c) {
Some(candidates_str) => {
let candidates = candidates_str.split(',').collect::<Vec<&str>>();
if candidates.is_empty() || a.heteronym {
candidates
.iter()
.map(|pinyin| pinyin.to_string())
.map(std::string::ToString::to_string)
.collect::<Vec<String>>()
} else {
vec![pinyin_list[0].to_string()]
vec![candidates[0].to_string()]
}
})
.unwrap_or_default();
}
None => vec![],
};

apply_style(ret, a)
}

Expand Down
7 changes: 7 additions & 0 deletions src/pinyin_map.rs
@@ -0,0 +1,7 @@
use dict::PINYIN_MAP;
use integer_hasher::IntegerHashMap;

lazy_static! {
pub static ref PINYIN_HASHMAP: IntegerHashMap<char, &'static str> =
PINYIN_MAP.iter().cloned().collect();
}
26 changes: 26 additions & 0 deletions tests/integer_hasher.rs
@@ -0,0 +1,26 @@
extern crate pinyin;
use pinyin::integer_hasher::IntegerHasher;
use std::hash::Hasher;

#[test]
fn test_integer_hasher_1() {
test_integer_hasher_inner(20464028);
}

#[test]
fn test_integer_hasher_2() {
test_integer_hasher_inner(0);
}

#[test]
fn test_integer_hasher_3() {
test_integer_hasher_inner(2046);
}

fn test_integer_hasher_inner(i: u64) {
let mut _hasher = IntegerHasher::default();
let expected: u64 = i;
_hasher.write_u64(expected);
let ret = _hasher.finish();
assert_eq!(expected, ret);
}

0 comments on commit 3b68cd4

Please sign in to comment.