In [4]:
from collections import OrderedDict
from graphtransliterator import GraphTransliterator
gt = GraphTransliterator.from_yaml_file("itrans_devanagari_to_unicode.yaml")

In [7]:
gt.tokens_by_class['consonant']

{'.D',
 '.Dh',
 'Ch',
 'D',
 'Dh',
 'G',
 'GY',
 'J',
 'K',
 'L',
 'N',
 'R',
 'Sh',
 'T',
 'Th',
 'Y',
 '^n',
 'b',
 'bh',
 'ch',
 'chh',
 'd',
 'dh',
 'dny',
 'f',
 'g',
 'gh',
 'h',
 'j',
 'jh',
 'j~n',
 'k',
 'kSh',
 'kh',
 'l',
 'ld',
 'm',
 'n',
 'p',
 'ph',
 'q',
 'r',
 's',
 'sh',
 't',
 'th',
 'v',
 'x',
 'y',
 'z',
 'zh',
 '~N',
 '~n',
 'ṅ'}

In [28]:
import yaml
tests = OrderedDict()

def sample_token(token_class):
    """Return first token in token class."""
    tokens_in_class = gt.tokens_by_class[token_class]
    return list(tokens_in_class)[0]

for rule in gt.rules:
    input_ = ""
    if rule.prev_classes:
        for _ in rule.prev_classes:
            input_ += sample_token(_)
    if rule.prev_tokens:
        for _ in rule.prev_tokens:
            input_ += _
    for _ in rule.tokens:
        input_ += _
    if rule.next_tokens:
        for _ in rule.next_tokens:
            input_ += _
    if rule.next_classes:
        for _ in rule.next_classes:
            input_ += sample_token(_)
    tests[input_] = rule.production

if gt.onmatch_rules:
    for rule in gt.onmatch_rules:
        input_ = ""
        output = ""
        for _ in rule.prev_classes:
            token = sample_token(_)
            input_ += token
            token_production = tests[token] # assumes token has production
            output += token_production
        output += rule.production
        for _ in rule.prev_classes:
            token = sample_token(_)
            input_ += token
            token_production = tests[token]
            output += token_production
        tests[input_] = output
yaml.dump(dict(tests), allow_unicode=True)

'"\\t": "\\t"\n\' \': \' \'\n\',\': \',\'\n.D: ड़\n.Dh: ढ़\n.N: ँ\n.a: ऽ\n.h: ्\u200c\n.m: ं\n.n: ं\n\'0\': ०\n\'1\': १\n\'2\': २\n\'3\': ३\n\'4\': ४\n\'5\': ५\n\'6\': ६\n\'7\': ७\n\'8\': ८\n\'9\': ९\nA: आ\nA.c: ऑ\nAUM: ॐ\nCh: छ\nD: ड\nDh: ढ\nG: ग़\nGY: ज्ञ\nH: ः\nI: ई\nJ: ज़\nK: ख़\nL: ळ\nLLI: ॡ\nLLi: ऌ\nL^i: ऌ\nM: ं\nN: ण\nOM: ॐ\nR: ऱ\nRRI: ॠ\nRRi: ऋ\nR^i: ऋ\nRs.: ₹\nSh: ष\nShA: ा\nShA.c: ॉ\nShI: ी\nShLLI: ॣ\nShLLi: ॢ\nShL^i: ॢ\nShRRI: ॄ\nShRRi: ृ\nShR^i: ृ\nShSh: ष्ष\nShU: ू\nSh^e: ॆ\nSh^o: ॊ\nSha: \'\'\nSha.c: ॅ\nShaa: ा\nShai: ै\nShau: ौ\nShe: े\nShee: ी\nShi: ि\nShii: ी\nSho: ो\nShu: ु\nShuu: ू\nT: ट\nTh: ठ\nU: ऊ\nY: य़\n^e: ऎ\n^n: ऩ\n^o: ऒ\na: अ\na.c: ऍ\na.e: ॲ\naa: आ\nai: ऐ\nau: औ\nb: ब\nbh: भ\nch: च\nchh: छ\nd: द\ndh: ध\ndny: ज्ञ\ne: ए\nee: ई\nf: फ़\ng: ग\ngh: घ\nh: ह\ni: इ\nii: ई\nj: ज\njh: झ\nj~n: ज्ञ\nk: क\nkSh: क्ष\nkh: ख\nl: ल\nld: ळ\nm: म\nn: न\no: ओ\np: प\nph: फ\nq: क़\nr: र\ns: स\nsh: श\nt: त\nth: थ\nu: उ\nuu: ऊ\nv: व\nx: क्ष\ny: य\nz: ज़\nzh: ऴ\n\'|\': ।\n\'||\': ॥\n~

In [22]:
tests

OrderedDict([('ShA', 'ा'),
             ('ShA.c', 'ॉ'),
             ('ShI', 'ी'),
             ('ShLLI', 'ॣ'),
             ('ShLLi', 'ॢ'),
             ('ShL^i', 'ॢ'),
             ('ShRRI', 'ॄ'),
             ('ShRRi', 'ृ'),
             ('ShR^i', 'ृ'),
             ('ShU', 'ू'),
             ('Sh^e', 'ॆ'),
             ('Sh^o', 'ॊ'),
             ('Sha', ''),
             ('Sha.c', 'ॅ'),
             ('Shaa', 'ा'),
             ('Shai', 'ै'),
             ('Shau', 'ौ'),
             ('She', 'े'),
             ('Shee', 'ी'),
             ('Shi', 'ि'),
             ('Shii', 'ी'),
             ('Sho', 'ो'),
             ('Shu', 'ु'),
             ('Shuu', 'ू'),
             ('\t', '\t'),
             (' ', ' '),
             (',', ','),
             ('.D', 'ड़'),
             ('.Dh', 'ढ़'),
             ('.N', 'ँ'),
             ('.a', 'ऽ'),
             ('.h', '्\u200c'),
             ('.m', 'ं'),
             ('.n', 'ं'),
             ('0', '०'),
             ('1', '१'),
           