sinnec · SStelioss · Jan 4, 2023 · Jan 4, 2023 · Jan 4, 2023 · Jan 5, 2023
diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@ Import as any other module using the name `greeklt`
 
 - **capitalize(word)**
 
-Makes a word all caps without accentuation but adds dieresis when necessary:
+Makes a word all caps without accentuation, adding dieresis when necessary:
 
 ```
 word = "γάιδαρος"
@@ -25,7 +25,7 @@ word = "γάιδαρος"
 ΓΑΪΔΑΡΟΣ
 ```
 
-- **remove_accentuation(word)**
+- **remove_accentuation(word, modifier=0)**
 
 Removes accentuation but adds dieresis when necessary, without capitalizing:
 
@@ -36,7 +36,7 @@ word = "γάιδαρος"
 γαϊδαρος
 ```
 
-Works exceptionally well when you want to sort a list aphabetically and not based on unicode:
+Works exceptionally well when you want to sort a list alphabetically and not based on unicode:
 
 ```
 cities = ["Όσλο", "Λευκωσία", "Άκαμπα", "Ζυρίχη", "Ρώμη"]
@@ -47,7 +47,14 @@ cities = ["Όσλο", "Λευκωσία", "Άκαμπα", "Ζυρίχη", "Ρώ
 >>> sorted(cities, key=remove_accentuation)
 ["Άκαμπα", "Ζυρίχη", "Λευκωσία", "Όσλο", "Ρώμη"]
 ```
+Also contains an option to never add dieresis, enabled by setting modifier to 1:
 
+```
+string = "Αιδόνι"
+
+>>> print(remove_accentuation(string))
+Αιδονι
+```
 - **convert_final_s(word)**
 
 Checks last letter of each word in a string. If it is a `σ` it is converted into a `ς` (final `σ`):
@@ -67,6 +74,7 @@ Transliterates a string written with latin characters into it's equivalent Greek
 ```
 
 This can come quite in handy when a user forgets to change the language and the word looks the same both in latin and Greek:
+
 ```
 # ANNA written in latin (Anna)
 name = "ANNA"
@@ -84,22 +92,32 @@ There's also the abillity to convert a word from latin to it's intended accentua
 ```
 >>> greek_transliteration("P;ita soybl;aki")
 Πίτα σουβλάκι
-
 >>> greek_transliteration("kaWiki")
 καΐκι
-
 >>> greek_transliteration("pro:yp;ouesh")
 προϋπόθεση
-
 >>> greek_transliteration("GA:IDAROS")
 ΓΑΪΔΑΡΟΣ
 ```
-
 Note: The function takes as given that the user intended to write the work in Greek using the correct key sequence but just didn't switch their keyboard to Greek. It doesn't convert from Greeklish!
 
 ```
 # Wrong key sequence by user.
 # They're supposed to press SHIFT + W and not just w for the ΅ character to appear.
 >>> greek_transliteration("kawiki")
 καςικι
+```
+
+- **greek_elot_Transliteration(word/sentence)**
+
+Transliterates a string according to the ELOT 743 standard, frequently used in formal transcription settings (e.g passports, ID, etc)
+
+```
+
+name = "Ελευθέριος Βενιζέλος"
+
+>>> latin_name = greek_elot_transliteration(name)
+>>> print(latin_name)
+Eleftherios Venizelos
+
 ```
diff --git a/src/greeklt/greek_elot_transliteration.py b/src/greeklt/greek_elot_transliteration.py
@@ -0,0 +1,231 @@
+def greek_elot_transliteration(string):
+    from remove_accentuation import remove_accentuation
+#    reference_string = string
+    string = remove_accentuation(string, 1)
+    lowercase = {
+        'α': 'a',
+        'β': 'v',
+        'γ': 'g',
+        'δ': 'd',
+        'ε': 'e',
+        'ζ': 'z',
+        'η': 'i',
+        'θ': 'th',
+        'ι': 'i',
+        'κ': 'k',
+        'λ': 'l',
+        'μ': 'm',
+        'ν': 'n',
+        'ξ': 'x',
+        'ο': 'o',
+        'π': 'p',
+        'ρ': 'r',
+        'σ': 's',
+        'τ': 't',
+        'υ': 'y',
+        'φ': 'f',
+        'χ': 'ch',
+        'ψ': 'ps',
+        'ω': 'o'
+    }
+    caps = {
+        'Α': 'A',
+        'Β': 'V',
+        'Γ': 'G',
+        'Δ': 'D',
+        'Ε': 'E',
+        'Ζ': 'Z',
+        'Η': 'I',
+        'Θ': 'TH',
+        'Ι': 'I',
+        'Κ': 'K',
+        'Λ': 'L',
+        'Μ': 'M',
+        'Ν': 'N',
+        'Ξ': 'X',
+        'Ο': 'O',
+        'Π': 'P',
+        'Ρ': 'R',
+        'Σ': 'S',
+        'Τ': 'T',
+        'Υ': 'Y',
+        'Φ': 'F',
+        'Χ': 'CH',
+        'Ψ': 'PS',
+        'Ω': 'O'
+    }
+    # Simple digraphs with no extra rules or edge cases
+    # No need for mixed casing "Γγ" or "Γξ" against Greek phonology
+    el_simple_digraphs = [
+        'γγ',
+        'γξ',
+        'γχ',
+        'ου'
+    ]
+    eng_simple_digraphs = [
+        'ng',
+        'nx',
+        'nch',
+        'ou'
+    ]
+    el_simple_cap_digraphs = [
+        "ΓΓ",
+        "ΓΞ",
+        "ΓΧ",
+        "ΟΥ"
+    ]
+    eng_simple_cap_digraphs = [
+        "NG",
+        "NX",
+        "NCH",
+        "OU"
+    ]
+
+    el_mono_digraph_sub = [
+        "TH",
+        "CH",
+        "PS"
+    ]
+#   Accent based digraphs
+#    el_low_acc_digraphs = [
+#        "άυ",
+#        "αϋ",
+#
+#        "έυ",
+#        "εϋ",
+#
+#        "ήυ",
+#        "ηϋ"
+#
+#    ]
+#    el_mix_acc_digraphs = [
+#        "Άυ",
+#        "Αϋ",
+#
+#        "Έυ",
+#        "Εϋ",
+#
+#        "Ήυ",
+#        "Ηϋ"
+#    ]
+#    el_cap_acc_digraphs = [
+#        "ΆΥ",
+#        "ΑΫ",
+#
+#        "ΈΥ",
+#        "ΕΫ",
+#
+#        "ΉΥ",
+#        "ΗΫ"
+#    ]
+    el_mp_digraph = [
+        "ΜΠ",
+        "Μπ",
+        "μπ"
+    ]
+    eng_mp_digraph_0 = [
+        "B",
+        "B",
+        "b"
+    ]
+    eng_mp_digraph_1 = [
+        "MP",
+        "Mp",
+        "mp"
+    ]
+    el_xu_digraphs = [
+        "αυ",
+        "ευ",
+        "ηυ"
+    ]
+    eng_xu_digraphs_v = [
+        "av",
+        "ev",
+        "iv"
+    ]
+    eng_xu_digraphs_f = [
+        "af",
+        "ef",
+        "if"
+    ]
+    xu_sound_modifiers_v = [
+
+        "β",
+        "γ",
+        "δ",
+        "ζ",
+        "λ",
+        "μ",
+        "ν",
+        "ρ",
+
+        "α",
+        "ε",
+        "η",
+        "ι",
+        "ο",
+        "υ",
+        "ω"
+    ]
+#   +empty space (accounted for in code)
+    xu_sound_modifiers_f = [
+
+        "θ",
+        "κ",
+        "ξ",
+        "π",
+        "σ",
+        "τ",
+        "φ",
+        "χ",
+        "ψ"
+    ]
+#   Replace ς with σ
+    prep_string = string.replace("ς", "σ")
+#   if el_low_acc_digraphs or el_mix_acc_digraphs or el_cap_acc_digraphs in string:
+#   Do nothing, we don't care with current implementation
+#   Prepare the Unicode tables for use with translate()
+    lowercase = string.maketrans(lowercase)
+    caps = string.maketrans(caps)
+#    reference_string_list = reference_string.split(" ")
+    new_string_list = prep_string.split(" ")
+    output = ""
+    for new_string in new_string_list:
+        #   Replace all digraphs, so they're ignored by the simple transcription
+        for i in el_simple_digraphs:
+            if i in string:
+                new_string = new_string.replace(i, eng_simple_digraphs[el_simple_digraphs.index(i)])
+        for i in el_simple_cap_digraphs:
+            if i in string:
+                new_string = new_string.replace(i, eng_simple_cap_digraphs[el_simple_cap_digraphs.index(i)])
+#       Check which "mp" sound to use depending on if it's at word start
+        for i in el_mp_digraph:
+            if i in string:
+                if string.startswith(i):
+                    new_string = new_string.replace(i, eng_mp_digraph_0[el_mp_digraph.index(i)], 1)
+                    new_string = new_string.replace(i, eng_mp_digraph_1[el_mp_digraph.index(i)])
+#       Check what VOWEL+"υ" should transliterate to depending on the following letter.
+        for i in el_xu_digraphs:
+            if i in new_string:
+                if len(new_string) > 2:  # Make sure we're not calling an out of range index
+                    for loop in xu_sound_modifiers_f:
+                        if new_string[new_string.find(i)+2] in loop:
+                            new_string = new_string.replace(i, eng_xu_digraphs_f[el_xu_digraphs.index(i)])
+
+                    for loop in xu_sound_modifiers_v:
+                        if new_string[new_string.find(i)+2] in loop:
+                            new_string = new_string.replace(i, eng_xu_digraphs_v[el_xu_digraphs.index(i)])
+                if len(new_string) == 2:  # Account for VOWEL+"υ" at end of sentence
+                    new_string = new_string.replace(i, eng_xu_digraphs_f[el_xu_digraphs.index(i)])
+
+#   Simple transcription
+        new_string = new_string.translate(caps)
+        new_string = new_string.translate(lowercase)
+#   Normalize capital letters if needed
+        for i in el_mono_digraph_sub:
+            if new_string.startswith(i):
+                if new_string[3].islower() is True:
+                    new_string = new_string.replace(new_string[1], new_string[1].lower())
+        new_string += " "
+        output += new_string
+    return output
diff --git a/src/greeklt/remove_accentuation.py b/src/greeklt/remove_accentuation.py
@@ -1,4 +1,4 @@
-def remove_accentuation(string: str):
+def remove_accentuation(string: str, modifier=0):
     accents = {
         "ά": "α",
         "έ": "ε",
@@ -17,14 +17,19 @@ def remove_accentuation(string: str):
         "Ώ": "Ω",
     }
     dieresis = {"ι": "ϊ", "υ": "ϋ"}
+    dieresis_reverse = {"ϊ": "ι", "ϋ": "υ"}
     new_string = ""
     prev_char = 0
     for c in string:
         char = c
         if c in accents.keys():
             char = accents[c]
-        if c in dieresis.keys() and prev_char in ("ά", "ό", "έ"):
-            char = dieresis[c]
+        if modifier == 0:
+            if c in dieresis.keys() and prev_char in ("ά", "ό", "έ"):
+                char = dieresis[c]
+        if modifier == 1:  # Remove dieresis
+            if c in dieresis_reverse.keys():
+                char = dieresis_reverse[c]
         prev_char = c
         new_string += char
     return new_string
diff --git a/tests/test_tests.py b/tests/test_tests.py
@@ -2,6 +2,7 @@
 from src.greeklt.remove_accentuation import remove_accentuation
 from src.greeklt.greek_transliteration import greek_transliteration
 from src.greeklt.convert_final_s import convert_final_s
+from src.greeklt.greek_elot_transliteration import greek_elot_transliteration
 
 
 def test_capitalize():
@@ -28,3 +29,11 @@ def test_transilteration():
 def test_convert_final_s():
 
     assert convert_final_s("Φάροσ ΦΑΡΟΣ φάρος") == "Φάρος ΦΑΡΟΣ φάρος"
+
+
+def test_elot_transliteration():
+
+    assert greek_elot_transliteration("αυγό") == "avgo", "Failure in vowel+υ conversion, v"
+    assert greek_elot_transliteration("αυτο") == "afto", "Failure in vowel+υ conversion, f"
+    assert greek_elot_transliteration("αγγελος το αγχος του") == "angelos to anchos tou", "Failure in simple diphthongs"
+    assert greek_elot_transliteration("Θανασης") == "Thanasis", "Failure in diphthong capital normalization"