-
Notifications
You must be signed in to change notification settings - Fork 0
/
transliterate.clj
128 lines (122 loc) · 3.02 KB
/
transliterate.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
;; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
;; @ Copyright (c) Michael Leahcim @
;; @ You can find additional information regarding licensing of this work in LICENSE.md @
;; @ You must not remove this notice, or any other, from this software. @
;; @ All rights reserved. @
;; @@@@@@ At 2018-10-18 19:54 <thereisnodotcollective@gmail.com> @@@@@@@@@@@@@@@@@@@@@@@@
(ns
^{:doc "Transliteration package. Contains cyrilic->latin and latin->cyrilic
transliterations "
:author "Michael Leahcim"}
thereisnodot.utils.transliterate
(:require [thereisnodot.akronim.core :refer [defns]]))
(def ^{:private true} trans
[["Ya" "Я"]
["Ja" "Я"]
["ya" "я"]
["ja" "я"]
["Je" "Э"]
["je" "э"]
["ä" "э"]
["Ju" "Ю"]
["Yu" "Ю"]
["ju" "ю"]
["yu" "ю"]
["ü" "ю"]
["Ch" "Ч"]
["ch" "ч"]
["Shh" "Щ"]
["W" "В"]
["shh" "щ"]
["w" "в"]
["Sh" "Ш"]
["sh" "ш"]
["Zh" "Ж"]
["zh" "ж"]
["Yo" "Ё"]
["Jo" "Ё"]
["yo" "ё"]
["jo" "ё"]
["ö" "ё"]
["jo" "ё"]
["ö" "ё"]
["H" "Х"]
["h" "х"]
["X" "Кс"]
["x" "кс"]
["##" "ъ"]
["A" "А"]
["a" "а"]
["B" "Б"]
["b" "б"]
["V" "В"]
["v" "в"]
["G" "Г"]
["g" "г"]
["D" "Д"]
["d" "д"]
["Z" "З"]
["z" "з"]
["I" "И"]
["i" "и"]
["J" "Й"]
["j" "й"]
["K" "К"]
["k" "к"]
["L" "Л"]
["l" "л"]
["M" "М"]
["m" "м"]
["N" "Н"]
["n" "н"]
["O" "О"]
["o" "о"]
["P" "П"]
["p" "п"]
["R" "Р"]
["r" "р"]
["S" "С"]
["s" "с"]
["T" "Т"]
["t" "т"]
["U" "У"]
["u" "у"]
["F" "Ф"]
["f" "ф"]
["C" "Ц"]
["c" "ц"]
["Y" "Ы"]
["y" "ы"]
["'" "ь"]
["'" "Ь"]
["#" "ъ"]
["#" "Ъ"]
["E" "Е"]
["e" "е"]])
(defn- trans-cyrillic-latin
[phrase direction]
(clojure.string/replace
(reduce
(fn [prev next]
(condp = direction
"en-ru"
(clojure.string/replace prev (first next) (last next))
"ru-en"
(clojure.string/replace prev (last next) (first next))))
phrase
trans)
#"[`'#]+",""))
(defns latin-str->cyrillic
"Primitive transliteration of latin to cyrillic"
[(latin-str->cyrillic "Hello world") => "Хелло ворлд"
(latin-str->cyrillic "Nothing works") => "Нотхинг воркс"
(latin-str->cyrillic "Foo bar") => "Фоо бар"]
[phrase-str]
(trans-cyrillic-latin phrase-str "en-ru"))
(defns cyrillic-str->latin
"Primitive transliteration of cyrillic to latin."
[(cyrillic-str->latin "Хелло ворлд" ) => "Hello world"
(cyrillic-str->latin "Нотхинг воркс") => "Nothing worx"
(cyrillic-str->latin "Фоо бар" ) => "Foo bar"]
[phrase-str]
(trans-cyrillic-latin phrase-str "ru-en"))