-
Notifications
You must be signed in to change notification settings - Fork 0
/
nysiis.gleam
110 lines (100 loc) · 2.75 KB
/
nysiis.gleam
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gleam/string
import phonetic_gleam/utils.{then_or_else}
// https://en.wikipedia.org/wiki/New_York_State_Identification_and_Intelligence_System
fn tr_first_chars(word: String) -> String {
case string.to_graphemes(word) {
["M", "A", "C", ..t] -> ["M", "C", ..t]
["K", "N", ..t] -> ["N", "N", ..t]
["K", ..t] -> ["C", ..t]
["P", "H", ..t] -> ["F", "F", ..t]
["P", "F", ..t] -> ["F", "F", ..t]
["S", "C", "H", ..t] -> ["S", "S", "S", ..t]
xs -> xs
}
|> string.join("")
}
fn tr_last_chars(word: String) -> String {
let end = string.slice(from: word, at_index: -2, length: 2)
string.drop_right(from: word, up_to: 2)
<> case end {
"EE" | "IE" -> "Y"
"DT" | "RT" | "RD" | "NT" | "ND" -> "D"
_ -> end
}
}
fn tr(chars: List(String), prev: String, code: String) -> String {
case chars {
[] -> code
_ -> {
let #(next_chars, next_codes) = case chars {
[] -> #([], "")
["E", "V", ..t] -> #(["F", ..t], "AF")
["A", ..t] | ["E", ..t] | ["I", ..t] | ["O", ..t] | ["U", ..t] -> #(
t,
"A",
)
["Q", ..t] -> #(t, "G")
["Z", ..t] -> #(t, "S")
["M", ..t] -> #(t, "N")
["K", "N", ..t] -> #(t, "N")
["K", ..t] -> #(t, "C")
["S", "C", "H", ..t] -> #(t, "SSS")
["P", "H", ..t] -> #(t, "FF")
["H", next, ..t] -> {
let do_set_to_prev = !is_vowel(prev) || !is_vowel(next)
then_or_else(do_set_to_prev, #([next, ..t], prev), #([next, ..t], "H"))
}
["H", ..t] -> then_or_else(!is_vowel(prev), #(t, prev), #(t, "H"))
["W", ..t] -> then_or_else(is_vowel(prev), #(t, prev), #(t, "W"))
[x, ..xs] -> #(xs, x)
}
tr(next_chars, first_char(next_codes), code <> next_codes)
}
}
}
fn drop_last_chars(word: String) -> String {
let end =
string.slice(from: word, at_index: -2, length: 2)
|> string.to_graphemes
string.drop_right(from: word, up_to: 2)
<> case end {
[c, "S"] -> c
["A", "Y"] -> "Y"
[c, "A"] -> c
chars -> string.join(chars, "")
}
}
fn first_char(word) {
case string.pop_grapheme(word) {
Ok(#(a, _)) -> a
Error(Nil) -> ""
}
}
fn is_vowel(c: String) {
c == "A" || c == "E" || c == "I" || c == "O" || c == "U"
}
fn prepare_word(word: String) -> String {
word
|> string.uppercase
|> utils.remove_not_allowed_chars("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
}
fn cleanup(codes) {
codes
|> string.to_graphemes
|> utils.remove_adjacent_dups
|> string.join("")
|> drop_last_chars
}
pub fn encode(word) -> String {
let name =
prepare_word(word)
|> tr_first_chars
|> tr_last_chars
{
first_char(name)
<> string.drop_left(from: name, up_to: 1)
|> string.to_graphemes
|> tr("", "")
}
|> cleanup
}