diff --git a/doc/config.md b/doc/config.md index 37ca17e..fb2da72 100644 --- a/doc/config.md +++ b/doc/config.md @@ -217,8 +217,8 @@ To create specific rules for initial, final, and standalone tokens, the `%` character is used to mark the word boundary. Hence: ``` - "token%": "transliteration of token at beginning of word" - "%token": "transliteration of token at end of word" + "%token": "transliteration of token at beginning of word" + "token%": "transliteration of token at end of word" "%token%": "transliteration of standalone token" "token": "transliteration of token anywhere else not specified" ``` diff --git a/scriptshifter/tables/data/arabic.yml b/scriptshifter/tables/data/arabic.yml index 9e45ef9..a806d88 100644 --- a/scriptshifter/tables/data/arabic.yml +++ b/scriptshifter/tables/data/arabic.yml @@ -1,395 +1,396 @@ -# Arabic S2R using the 3rd-party ArabicTransliterator library: -# https://github.com/MTG/ArabicTransliterator - ---- -general: - name: Arabic - description: > - Arabic R2S using a conversion table and S2R using a 3rd party library. - case_sensitive: false - - parents: - - _ignore_base - - -roman_to_script: - map: - - # Original table by David Bucknum, 5 April 2010 - # Updated, 25 January 2019 - # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO - # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin - # Scripts Conceptually" - # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter - - # Punctuation marks: - "*": "\u066D" - ",": "\u060C" - ";": "\u061B" - "?": "\u061F" - - # Exceptions for specific words - # Allah - "Alla\u0304h": "\u0627\u0644\u0644\u0647" - - # Qur'an - "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646" - - # lillah - "lilla\u0304h": "\u0644\u0644\u0647" - - # billah - "billa\u0304h": "\u0628\u0644\u0644\u0647" - - # Rahman - "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646" - - # Ruwat - "Ruwa\u0304t": "\u0631\u0648\u0627\u0629" - "ruwa\u0304t": "\u0631\u0648\u0627\u0629" - - # Hadha - "Ha\u0304dha\u0304": "\u0647\u0630\u0627" - "ha\u0304dha\u0304": "\u0647\u0630\u0627" - - # Hadhihi - "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" - "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" - - # dhalika - "dha\u0304lika": "\u0630\u0644\u0643" - - # Ibn when it appears in the middle of a name sequence - "ibn": "\u0628\u0646" - - # H[dot below]aya[macron]t - "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" - "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" - - # "sh[dot below] as in "Ishaq" - - "sh\u0323": "\u0633\u062D" - - # "s[prime]h" combos - - "s\u02B9h": "\u0633\u0647" - - # "th[dot below]" - - "th\u0323": "\u062A\u062D" - - # dh[dot under] - - "dh\u0323": "\u062F\u062D" - - # La-hu - - "la-hu": "\u0644\u0647" - - # Mi'ah - "Mi\u02BEah": "\u0645\u0627\u0626\u0629" - "Mi\u02BCah": "\u0645\u0627\u0626\u0629" - "mi\u02BEah": "\u0645\u0627\u0626\u0629" - "mi\u02BCah": "\u0645\u0627\u0626\u0629" - - # Mi'at - "Mi\u02BEat": "\u0645\u0627\u0626\u0629" - "Mi\u02BCat": "\u0645\u0627\u0626\u0629" - "mi\u02BEat": "\u0645\u0627\u0626\u0629" - "mi\u02BCat": "\u0645\u0627\u0626\u0629" - - # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu - # will technically use \u06F0-06F9. This needs further discussion with PSD - # as RLIN21 used Hindi numbers, Connexion and Voyager does not.) - - # Edition statements with Latin number - "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1" - "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2" - "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3" - "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4" - "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5" - "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6" - "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7" - "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8" - "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9" - - # Use Basic Arabic-Indic \u0660-0669 - "0": "\u0660" - "1": "\u0661" - "2": "\u0662" - "3": "\u0663" - "4": "\u0664" - "5": "\u0665" - "6": "\u0666" - "7": "\u0667" - "8": "\u0668" - "9": "\u0669" - - # Hyphenated prefixes: - "wa-": "\u0648" - "bi-": "\u0628" - "al-": "\u0627\u0644" - "lil-": "\u0644\u0644" - "li-": "\u0644" - "la\u0304-": "\u0644" - "fi\u0304-": "\u0641\u064A" - "ka-": "\u0643" - - # Vowels and vowel/consonant combinations - ta-marbutah at end of word - "ah%": "\u0629" - "at%": "\u0629" - - # tanwin at end of word - "an%": "\u0627" - - # ayn-alif combo - "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621" - "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621" - - "\u02BBA\u0304": "\u0639\u0627" - "\u02BBa\u0304": "\u0639\u0627" - - "\u02BBI\u0304Y": "\u0639\u064A" - "\u02BBi\u0304y": "\u0639\u064A" - "\u02BBI\u0304": "\u0639\u064A" - "\u02BBi\u0304": "\u0639\u064A" - - "\u02BBU\u0304": "\u0639\u0648" - "\u02BBu\u0304": "\u0639\u0648" - "\u02BBU": "\u0639" - "\u02BBu": "\u0639" - - "%\u02BBA": "\u0639" - # "%\u02BBa": "\u0639" - - # alif and hamzas for all occasions - - # truncation necessary? It seems to work fine with. - - "i\u0304\u02BEah%": "\u064A\u0626\u0629" - "i\u0304\u02BCah%": "\u064A\u0626\u0629" - - "i\u0304\u02BEat%": "\u064A\u0626\u0629" - "i\u0304\u02BCat%": "\u064A\u0626\u0629" - - "i\u02BEa\u0304%": "\u0626\u0627" - "i\u02BCa\u0304%": "\u0626\u0627" - - "i\u02BE%": "\u0626" - "i\u02BC%": "\u0626" - "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627" - "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627" - - "a\u02BE": "\u0623" - "a\u02BC": "\u0623" - "\u02BEi": "\u0626" - "\u02BCi": "\u0626" - "\u02BEa\u0304": "\u0622" - "\u02BCa\u0304": "\u0622" - "\u02BEa": "\u0623" - "\u02BCa": "\u0623" - - "y\u02BCah": "\u064A\u0626\u0629" - "y\u02BEah": "\u064A\u0626\u0629" - - "y\u02BCat": "\u064A\u0626\u0629" - "y\u02BEat": "\u064A\u0626\u0629" - - # A - - "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A" - "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A" - - "a\u0304\u02BCi": "\u0627\u0626" - "a\u0304\u02BEi": "\u0627\u0626" - "a\u0304\u02BC": "\u0627\u0621" - "a\u0304\u02BE": "\u0627\u0621" - "%A\u0304": "\u0622" - "%a\u0304": "\u0622" - "A\u0304": "\u0627" - "a\u0304": "\u0627" - - # These next two lines were intended to convert to alif-ayn when it is at - # # the beginning of a word, definite or indefinine (i.e. - # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" - "%A\u02BB": "\u0623\u0639" - "%a\u02BB": "\u0623\u0639" - "a\u02BB": "\u0639" - "A\u0301": "\u0649" - "a\u0301": "\u0649" - - "ayy": "\u064A" - "%A": "\u0623" - "%a": "\u0627" - "A": "\u0623" - "a": "" - - # I - Capital I at beginning of word is usually alif hamzah-below. - - "i\u0304%": "\u064A" - "i\u0304y": "\u064A" - "iy": "\u064A" - "%I\u0304": "\u0625\u064A" - "i\u0304": "\u064A" - "%\u02BBI": "\u0639" - - # "i\u02BB": "\u0625\u0639" - - "I\u02BE": "\u0627\u0626" - "I\u02BC": "\u0627\u0626" - "i\u02BE": "\u0626" - "i\u02BC": "\u0627\u0626" - - "%I": "\u0625" - "%i": "\u0625" - "I": "\u0625" - "i": "" - - # U - - "u\u0304\u02BE": "\u0624" - "u\u0304\u02BC": "\u0624" - "%U\u0304w": "\u0623\u0648" - "%u\u0304w": "\u0623\u0648" - "%U\u0304": "\u0623\u0648" - "%u\u0304": "\u0623\u0648" - "u\u0304w": "\u0648" - "u\u0304": "\u0648" - "u\u02BE": "\u0624" - "u\u02BC": "\u0624" - - "%U": "\u0623" - "%u": "\u0623" - "U": "\u0623" - "u": "" - - # Consonants, with tashdid added - - "B": "\u0628" - "bb": "\u0628" - "b": "\u0628" - "Th": "\u062B" - "thth": "\u062B" - "th": "\u062B" - "T\u0323": "\u0637" - "t\u0323t\u0323": "\u0637" - "t\u0323": "\u0637" - "T": "\u062A" - "tt": "\u062A" - "t": "\u062A" - "J": "\u062C" - "jj": "\u062C" - "j": "\u062C" - "H\u0323": "\u062D" - "h\u0323h\u0323": "\u062D" - "h\u0323": "\u062D" - "H": "\u0647" - "hh": "\u0647" - "h": "\u0647" - "Kh": "\u062E" - "khkh": "\u062E" - "kh": "\u062E" - "K": "\u0643" - "kk": "\u0643" - "k": "\u0643" - "Dh": "\u0630" - "dhdh": "\u0630" - "dh": "\u0630" - "D\u0323": "\u0636" - "d\u0323d\u0323": "\u0636" - "d\u0323": "\u0636" - "D": "\u062F" - "dd": "\u062F" - "d": "\u062F" - "R": "\u0631" - "rr": "\u0631" - "r": "\u0631" - "Z\u0323": "\u0638" - "z\u0323z\u0323": "\u0638" - "z\u0323": "\u0638" - "Z": "\u0632" - "zz": "\u0632" - "z": "\u0632" - "Sh": "\u0634" - "shsh": "\u0634" - "sh": "\u0634" - "S\u0323": "\u0635" - "s\u0323s\u0323": "\u0635" - "s\u0323": "\u0635" - "S": "\u0633" - "ss": "\u0633" - "s": "\u0633" - "Gh": "\u063A" - "ghgh": "\u063A" - "gh": "\u063A" - "F": "\u0641" - "ff": "\u0641" - "f": "\u0641" - "Q": "\u0642" - "qq": "\u0642" - "q": "\u0642" - "L": "\u0644" - "ll": "\u0644" - "l": "\u0644" - "M": "\u0645" - "mm": "\u0645" - "m": "\u0645" - "N": "\u0646" - "nn": "\u0646" - "n": "\u0646" - "W": "\u0648" - "ww": "\u0648" - "w": "\u0648" - "Y": "\u064A" - "yy": "\u064A" - "y": "\u064A" - - # non-Arabic consonants: - "P": "\u067E" - "p": "\u067E" - "Ch": "\u0686" - "ch": "\u0686" - "V": "\u06A4" - "v": "\u06A4" - "G": "\u06AF" - "g": "\u06AF" - - # Diacritic characters: - # ain (\u0639) - not transliterated alone: - "\u02BB": "\u0639" - # hamza - not romanized - # "\u0621" - # hamza (alone in final position) - "\u02BE%": "\u0621" - "\u02BC%": "\u0621" - - # Do not know what, if anything, is needed here: - # tatweel: - # "\u0640" - # fathatan: - # "\u064B" - # dammatan: - # "\u064C" - # kasratan: - # "\u064D" - # fatha: - # "\u064E" - # damma: - # "\u064F" - # kasra: - # "\u0650" - # shadda: - # "\u0651" - # sukun: - # "\u0652" - # superscript alef: - # "\u0670" - # alef wasla - # "\u0671" - - -script_to_roman: - hooks: - post_config: - - - - arabic.arabic_romanizer.s2r_post_config +# Arabic S2R using the 3rd-party ArabicTransliterator library: +# https://github.com/MTG/ArabicTransliterator + +--- +general: + name: Arabic + description: > + Arabic R2S using a conversion table and S2R using a 3rd party library. + case_sensitive: false + + parents: + - _ignore_base + + +roman_to_script: + map: + + # Original table by David Bucknum, 5 April 2010 + # Updated, 25 January 2019 + # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO + # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin + # Scripts Conceptually" + # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter + + + # Punctuation marks: + "*": "\u066D" + ",": "\u060C" + ";": "\u061B" + "?": "\u061F" + + # Exceptions for specific words + # Allah + "Alla\u0304h": "\u0627\u0644\u0644\u0647" + + # Qur'an + "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646" + + # lillah + "lilla\u0304h": "\u0644\u0644\u0647" + + # billah + "billa\u0304h": "\u0628\u0644\u0644\u0647" + + # Rahman + "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646" + + # Ruwat + "Ruwa\u0304t": "\u0631\u0648\u0627\u0629" + "ruwa\u0304t": "\u0631\u0648\u0627\u0629" + + # Hadha + "Ha\u0304dha\u0304": "\u0647\u0630\u0627" + "ha\u0304dha\u0304": "\u0647\u0630\u0627" + + # Hadhihi + "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" + "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" + + # dhalika + "dha\u0304lika": "\u0630\u0644\u0643" + + # Ibn when it appears in the middle of a name sequence + "ibn": "\u0628\u0646" + + # H[dot below]aya[macron]t + "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" + "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" + + # "sh[dot below] as in "Ishaq" + + "sh\u0323": "\u0633\u062D" + + # "s[prime]h" combos + + "s\u02B9h": "\u0633\u0647" + + # "th[dot below]" + + "th\u0323": "\u062A\u062D" + + # dh[dot under] + + "dh\u0323": "\u062F\u062D" + + # La-hu + + "la-hu": "\u0644\u0647" + + # Mi'ah + "Mi\u02BEah": "\u0645\u0627\u0626\u0629" + "Mi\u02BCah": "\u0645\u0627\u0626\u0629" + "mi\u02BEah": "\u0645\u0627\u0626\u0629" + "mi\u02BCah": "\u0645\u0627\u0626\u0629" + + # Mi'at + "Mi\u02BEat": "\u0645\u0627\u0626\u0629" + "Mi\u02BCat": "\u0645\u0627\u0626\u0629" + "mi\u02BEat": "\u0645\u0627\u0626\u0629" + "mi\u02BCat": "\u0645\u0627\u0626\u0629" + + # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu + # will technically use \u06F0-06F9. This needs further discussion with PSD + # as RLIN21 used Hindi numbers, Connexion and Voyager does not.) + + # Edition statements with Latin number + "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1" + "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2" + "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3" + "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4" + "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5" + "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6" + "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7" + "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8" + "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9" + + # Use Basic Arabic-Indic \u0660-0669 + "0": "\u0660" + "1": "\u0661" + "2": "\u0662" + "3": "\u0663" + "4": "\u0664" + "5": "\u0665" + "6": "\u0666" + "7": "\u0667" + "8": "\u0668" + "9": "\u0669" + + # Hyphenated prefixes: + "wa-": "\u0648" + "bi-": "\u0628" + "al-": "\u0627\u0644" + "lil-": "\u0644\u0644" + "li-": "\u0644" + "la\u0304-": "\u0644" + "fi\u0304-": "\u0641\u064A" + "ka-": "\u0643" + + # Vowels and vowel/consonant combinations - ta-marbutah at end of word + "%ah": "\u0629" + "%at": "\u0629" + + # tanwin at end of word + "%an": "\u0627" + + # ayn-alif combo + "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621" + "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621" + + "\u02BBA\u0304": "\u0639\u0627" + "\u02BBa\u0304": "\u0639\u0627" + + "\u02BBI\u0304Y": "\u0639\u064A" + "\u02BBi\u0304y": "\u0639\u064A" + "\u02BBI\u0304": "\u0639\u064A" + "\u02BBi\u0304": "\u0639\u064A" + + "\u02BBU\u0304": "\u0639\u0648" + "\u02BBu\u0304": "\u0639\u0648" + "\u02BBU": "\u0639" + "\u02BBu": "\u0639" + + "\u02BBA%": "\u0639" + # "\u02BBa%": "\u0639" + + # alif and hamzas for all occasions + + # truncation necessary? It seems to work fine with. + + "%i\u0304\u02BEah": "\u064A\u0626\u0629" + "%i\u0304\u02BCah": "\u064A\u0626\u0629" + + "%i\u0304\u02BEat": "\u064A\u0626\u0629" + "%i\u0304\u02BCat": "\u064A\u0626\u0629" + + "%i\u02BEa\u0304": "\u0626\u0627" + "%i\u02BCa\u0304": "\u0626\u0627" + + "%i\u02BE": "\u0626" + "%i\u02BC": "\u0626" + "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627" + "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627" + + "a\u02BE": "\u0623" + "a\u02BC": "\u0623" + "\u02BEi": "\u0626" + "\u02BCi": "\u0626" + "\u02BEa\u0304": "\u0622" + "\u02BCa\u0304": "\u0622" + "\u02BEa": "\u0623" + "\u02BCa": "\u0623" + + "y\u02BCah": "\u064A\u0626\u0629" + "y\u02BEah": "\u064A\u0626\u0629" + + "y\u02BCat": "\u064A\u0626\u0629" + "y\u02BEat": "\u064A\u0626\u0629" + + # A + + "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A" + "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A" + + "a\u0304\u02BCi": "\u0627\u0626" + "a\u0304\u02BEi": "\u0627\u0626" + "a\u0304\u02BC": "\u0627\u0621" + "a\u0304\u02BE": "\u0627\u0621" + "A\u0304%": "\u0622" + "a\u0304%": "\u0622" + "A\u0304": "\u0627" + "a\u0304": "\u0627" + + # These next two lines were intended to convert to alif-ayn when it is at + # # the beginning of a word, definite or indefinine (i.e. + # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" + "A\u02BB%": "\u0623\u0639" + "a\u02BB%": "\u0623\u0639" + "a\u02BB": "\u0639" + "A\u0301": "\u0649" + "a\u0301": "\u0649" + + "ayy": "\u064A" + "A%": "\u0623" + "a%": "\u0627" + "A": "\u0623" + "a": "" + + # I - Capital I at beginning of word is usually alif hamzah-below. + + "i\u0304%": "\u064A" + "i\u0304y": "\u064A" + "iy": "\u064A" + "I\u0304%": "\u0625\u064A" + "i\u0304": "\u064A" + "\u02BBI%": "\u0639" + + # "i\u02BB": "\u0625\u0639" + + "I\u02BE": "\u0627\u0626" + "I\u02BC": "\u0627\u0626" + "i\u02BE": "\u0626" + "i\u02BC": "\u0627\u0626" + + "I%": "\u0625" + "i%": "\u0625" + "I": "\u0625" + "i": "" + + # U + + "u\u0304\u02BE": "\u0624" + "u\u0304\u02BC": "\u0624" + "U\u0304w%": "\u0623\u0648" + "u\u0304w%": "\u0623\u0648" + "U\u0304%": "\u0623\u0648" + "u\u0304%": "\u0623\u0648" + "u\u0304w": "\u0648" + "u\u0304": "\u0648" + "u\u02BE": "\u0624" + "u\u02BC": "\u0624" + + "U%": "\u0623" + "u%": "\u0623" + "U": "\u0623" + "u": "" + + # Consonants, with tashdid added + + "B": "\u0628" + "bb": "\u0628" + "b": "\u0628" + "Th": "\u062B" + "thth": "\u062B" + "th": "\u062B" + "T\u0323": "\u0637" + "t\u0323t\u0323": "\u0637" + "t\u0323": "\u0637" + "T": "\u062A" + "tt": "\u062A" + "t": "\u062A" + "J": "\u062C" + "jj": "\u062C" + "j": "\u062C" + "H\u0323": "\u062D" + "h\u0323h\u0323": "\u062D" + "h\u0323": "\u062D" + "H": "\u0647" + "hh": "\u0647" + "h": "\u0647" + "Kh": "\u062E" + "khkh": "\u062E" + "kh": "\u062E" + "K": "\u0643" + "kk": "\u0643" + "k": "\u0643" + "Dh": "\u0630" + "dhdh": "\u0630" + "dh": "\u0630" + "D\u0323": "\u0636" + "d\u0323d\u0323": "\u0636" + "d\u0323": "\u0636" + "D": "\u062F" + "dd": "\u062F" + "d": "\u062F" + "R": "\u0631" + "rr": "\u0631" + "r": "\u0631" + "Z\u0323": "\u0638" + "z\u0323z\u0323": "\u0638" + "z\u0323": "\u0638" + "Z": "\u0632" + "zz": "\u0632" + "z": "\u0632" + "Sh": "\u0634" + "shsh": "\u0634" + "sh": "\u0634" + "S\u0323": "\u0635" + "s\u0323s\u0323": "\u0635" + "s\u0323": "\u0635" + "S": "\u0633" + "ss": "\u0633" + "s": "\u0633" + "Gh": "\u063A" + "ghgh": "\u063A" + "gh": "\u063A" + "F": "\u0641" + "ff": "\u0641" + "f": "\u0641" + "Q": "\u0642" + "qq": "\u0642" + "q": "\u0642" + "L": "\u0644" + "ll": "\u0644" + "l": "\u0644" + "M": "\u0645" + "mm": "\u0645" + "m": "\u0645" + "N": "\u0646" + "nn": "\u0646" + "n": "\u0646" + "W": "\u0648" + "ww": "\u0648" + "w": "\u0648" + "Y": "\u064A" + "yy": "\u064A" + "y": "\u064A" + + # non-Arabic consonants: + "P": "\u067E" + "p": "\u067E" + "Ch": "\u0686" + "ch": "\u0686" + "V": "\u06A4" + "v": "\u06A4" + "G": "\u06AF" + "g": "\u06AF" + + # Diacritic characters: + # ain (\u0639) - not transliterated alone: + "\u02BB": "\u0639" + # hamza - not romanized + # "\u0621" + # hamza (alone in final position) + "%\u02BE": "\u0621" + "%\u02BC": "\u0621" + + # Do not know what, if anything, is needed here: + # tatweel: + # "\u0640" + # fathatan: + # "\u064B" + # dammatan: + # "\u064C" + # kasratan: + # "\u064D" + # fatha: + # "\u064E" + # damma: + # "\u064F" + # kasra: + # "\u0650" + # shadda: + # "\u0651" + # sukun: + # "\u0652" + # superscript alef: + # "\u0670" + # alef wasla + # "\u0671" + + +script_to_roman: + hooks: + post_config: + - + - arabic.arabic_romanizer.s2r_post_config \ No newline at end of file