Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transliterate Greek according to ELOT 743. Untested #4

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
272 changes: 272 additions & 0 deletions src/greeklt/greek_iso_transliteration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
def greek_iso_transliteration(string: str):
SStelioss marked this conversation as resolved.
Show resolved Hide resolved
# This should all probably be a CSV or something
el_low = [
SStelioss marked this conversation as resolved.
Show resolved Hide resolved
"α",
"β",
"γ",
"δ",
"ε",
"ζ",
"η",
"θ",
"ι",
"κ",
"λ",
"μ",
"ν",
"ξ",
"ο",
"π",
"ρ",
"σ",
"τ",
"υ",
"φ",
"χ",
"ψ",
"ω"
]
el_cap = [
"Α",
"Β",
"Γ",
"Δ",
"Ε",
"Ζ",
"Η",
"Θ",
"Ι",
"Κ",
"Λ",
"Μ",
"Ν",
"Ξ",
"Ο",
"Π",
"Ρ",
"Σ",
"Τ",
"Υ",
"Φ",
"Χ",
"Ψ",
"Ω"
]
# TODO FIX Psari, not PSari
eng_low = [
"a",
"v",
"g",
"d",
"e",
"z",
"i",
"th",
"i",
"k",
"l",
"m",
"n",
"x",
"o",
"p",
"r",
"s",
"t",
"y",
"f",

"ch",
"ps",
"o"
]
eng_cap = [
"A",
"V",
"G",
"D",
"E",
"Z",
"I",
"TH",
"I",
"K",
"L",
"M",
"N",
"X",
"O",
"P",
"R",
"S",
"T",
"Y",
"F",
"CH",
"PS",
"O"
]
# Simple digraphs with no extra rules or edge cases
# No need for mixed casing "Γγ" or "Γξ" against Greek phonology
el_simple_digraphs = [
"γγ",
"γξ",
"γχ"
]
el_simple_cap_digraphs = [
"ΓΓ",
"ΓΞ",
"ΓΧ"
]
eng_simple_digraphs = [
"ng",
"nx",
"nch"
]
eng_simple_cap_digraphs = [
"NG",
"NX",
"NCH"
]

el_mono_digraph_sub = [
"Θ",
"Χ",
"Ψ"
]
# Accent based digraphs
# el_low_acc_digraphs = [
# "άυ",
# "αϋ",
#
# "έυ",
# "εϋ",
#
# "ήυ",
# "ηϋ"
#
# ]
# el_mix_acc_digraphs = [
# "Άυ",
# "Αϋ",
#
# "Έυ",
# "Εϋ",
#
# "Ήυ",
# "Ηϋ"
# ]
# el_cap_acc_digraphs = [
# "ΆΥ",
# "ΑΫ",
#
# "ΈΥ",
# "ΕΫ",
#
# "ΉΥ",
# "ΗΫ"
# ]
SStelioss marked this conversation as resolved.
Show resolved Hide resolved
el_mp_digraph = [
"ΜΠ",
"Μπ",
"μπ"
]
eng_mp_digraph_0 = [
"B",
"B",
"b"
]
eng_mp_digraph_1 = [
"MP",
"Mp",
"mp"
]
el_xu_digraphs = [
"αυ",
"ευ",
"ηυ"
]
eng_xu_digraphs = [
"av",
"af",
"ev",
"ef",
"iv",
"if"
]
# List related to xu lists
xu_sound_modifiers_v = [
# β, γ, δ, ζ, λ, μ, ν, ρ, α, ε, η, ι, ο, υ, ω
"β",
"γ",
"δ",
"ζ",
"λ",
"μ",
"ν",
"ρ",

"α",
"ε",
"η",
"ι",
"ο",
"υ",
"ω"
]
xu_sound_modifiers_f = [
# θ, κ, ξ, π, σ, τ, φ, χ, ψ, empty space
"θ",
"κ",
"ξ",
"π",
"σ",
"τ",
"φ",
"χ",
"ψ"
]
print(string)
new_string = string
# Replace ς with σ
new_string = new_string.replace("ς","σ")
# if el_low_acc_digraphs or el_mix_acc_digraphs or el_cap_acc_digraphs in string:
# Do nothing, we don't care with current implementation

# Replace all digraphs so they're ignored by the simple transcription
for i in el_simple_digraphs:
if i in string:
print(new_string.replace(i, eng_simple_digraphs[el_simple_digraphs.index(i)]))
new_string = new_string.replace(i, eng_simple_digraphs[el_simple_digraphs.index(i)])
for i in el_simple_cap_digraphs:
if i in string:
new_string = new_string.replace(i, eng_simple_cap_digraphs[el_simple_digraphs.index(i)])
# TODO: ROMANIZE ACCENTS
for i in el_mp_digraph:
if i in string:
if string.startswith(i):
new_string = new_string.replace(i, eng_mp_digraph_0[el_mp_digraph.index(i)], 1)
new_string = new_string.replace(i, eng_mp_digraph_1[el_mp_digraph.index(i)])
for i in el_xu_digraphs:
if i in string:
for loop in xu_sound_modifiers_f:
if string[string.find(i)+1] in loop:
new_string = new_string.replace(i, eng_xu_digraphs[el_xu_digraphs.index(i)+1])
for loop in xu_sound_modifiers_v:
if string[string.find(i)+1] in loop:
new_string = new_string.replace(i, eng_xu_digraphs[el_xu_digraphs.index(i)])
# Simple transliteration
for i in el_low:
if i in el_low:
new_string = new_string.replace(i, eng_low[el_low.index(i)])
for i in el_cap:
if i in el_cap:
new_string = new_string.replace(i, eng_cap[el_cap.index(i)])
#Normalize capital letters if needed
print(string)
for i in el_mono_digraph_sub:
if string.startswith(i):
if string[2].islower() == True:
new_string = new_string.replace(new_string[1], new_string[1].lower())
return new_string