In [2]:
import toolbox

In [3]:
for mkr, text in toolbox.read_toolbox_file(open('example/corpus.txt')):
    print('Marker: {0!r:<8}Text: {1!r}'.format(mkr, text))

Marker: '\\ref' Text: 'item1'
Marker: '\\t'   Text: 'O        Pedro baixou'
Marker: '\\m'   Text: 'O        Pedro bai   -xou'
Marker: '\\g'   Text: 'the.M.SG Pedro lower -PST.IND.3SG'
Marker: '\\t'   Text: 'a        bola'
Marker: '\\m'   Text: 'a        bola'
Marker: '\\g'   Text: 'the.F.SG ball.F.SG'
Marker: '\\f'   Text: 'Pedro calmed down.'
Marker: '\\l'   Text: 'Pedro lowered the ball.'


In [10]:
### Iterating over records based on keys
pairs = toolbox.read_toolbox_file(open('toolbox/example/corpus.txt'))
for (context, data) in toolbox.records(pairs, ['\\id', '\\ref']):
    print(sorted(context.items()))
    print('\n'.join(map(repr, data)))

[('\\id', None), ('\\ref', 'item1')]
('\\t', 'O        Pedro baixou')
('\\m', 'O        Pedro bai   -xou')
('\\g', 'the.M.SG Pedro lower -PST.IND.3SG')
('\\t', 'a        bola')
('\\m', 'a        bola')
('\\g', 'the.F.SG ball.F.SG')
('\\f', 'Pedro calmed down.')
('\\l', 'Pedro lowered the ball.')


In [11]:
### Normalizing tiers
pairs = toolbox.read_toolbox_file(open('toolbox/example/corpus.txt'))
records = toolbox.records(pairs, ['\\id', '\\ref'])
rec1 = next(records)
for mkr, val in toolbox.normalize_record(rec1[1], ['\\t', '\\g', '\\m']):
    print((mkr, val))

('\\t', 'O        Pedro baixou             a        bola')
('\\m', 'O        Pedro bai   -xou         a        bola')
('\\g', 'the.M.SG Pedro lower -PST.IND.3SG the.F.SG ball.F.SG')
('\\f', 'Pedro calmed down.')
('\\l', 'Pedro lowered the ball.')


In [12]:
### Aligning fields
pairs = toolbox.read_toolbox_file(open('toolbox/example/corpus.txt'))
records = toolbox.records(pairs, ['\\id', '\\ref'])
rec1 = next(records)
normdata = toolbox.normalize_record(rec1[1], ['\\t', '\\g', '\\m'])
alignments = {'\\m': '\\t', '\\g': '\\m'}
for mkr, algns in toolbox.align_fields(normdata, alignments=alignments):
    print((mkr, algns))

('\\t', [('O        Pedro baixou             a        bola', ['O', 'Pedro', 'baixou', 'a', 'bola'])])
('\\m', [('O', ['O']), ('Pedro', ['Pedro']), ('baixou', ['bai', '-xou']), ('a', ['a']), ('bola', ['bola'])])
('\\g', [('O', ['the.M.SG']), ('Pedro', ['Pedro']), ('bai', ['lower']), ('-xou', ['-PST.IND.3SG']), ('a', ['the.F.SG']), ('bola', ['ball.F.SG'])])
('\\f', [(None, ['Pedro calmed down.'])])
('\\l', [(None, ['Pedro lowered the ball.'])])
