# Config

In [1]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as naf

from nlpaug.util import Action

In [2]:
text = 'The quick fear fox jumps over the lazy'

# Sentence Augmenter

In [3]:
aug = nas.ContextualWordEmbsForSentenceAug(model_path='gpt2', include_detail=True)
augmented_data, change_logs = aug.augment('The quick brown fox jumps over the lazy dog.')
print('augmented_data:', augmented_data)

for change_log in reversed(change_logs):
    print('change_log:', change_log)
    break

augmented_data: The quick brown fox jumps over the lazy dog. " " They 're going to leave at 8 pm " the fox begins to be g rief less ." The fox be gins to s us pect in j
change_log: {'orig_token': '', 'orig_start_pos': -1, 'new_token': 'j', 'new_start_pos': 151, 'change_seq': 30, 'action': 'insert'}


# Word Augmenter

In [4]:
aug = naw.ContextualWordEmbsAug(model_path='bert-base-uncased', include_detail=True)
augmented_data, change_log = aug.augment('The quick brown fox jumps over the lazy dog.')
print('augmented_data:', augmented_data)
print('change_log:', change_log)

augmented_data: a quick brown cat jumps after the lazy dog .
change_log: [{'orig_token': 'over', 'orig_start_pos': 26, 'new_token': 'after', 'new_start_pos': 24, 'change_seq': 1, 'action': 'substitute'}, {'orig_token': 'fox', 'orig_start_pos': 16, 'new_token': 'cat', 'new_start_pos': 14, 'change_seq': 2, 'action': 'substitute'}, {'orig_token': 'the', 'orig_start_pos': 0, 'new_token': 'a', 'new_start_pos': 0, 'change_seq': 3, 'action': 'substitute'}]


# Character Augmenter

In [5]:
aug = nac.KeyboardAug(include_detail=True)
augmented_data, change_log = aug.augment('The quick brown fox jumps over the lazy dog.')
print('augmented_data:', augmented_data)
print('change_log:', change_log)

augmented_data: The quiSk brown fox jumps over the lazy dog .
change_log: [{'orig_token': 'quick', 'orig_start_pos': 4, 'new_token': 'quiSk', 'new_start_pos': 4, 'change_seq': 1, 'action': 'substitute'}]


# Pipeline

In [6]:
aug = naf.Sequential([
    naw.RandomWordAug(action='substitute', target_words=['A'], name='aug1', include_detail=False),
    naf.Sequential([
        naw.RandomWordAug(action='substitute', target_words=['D'],name='aug2', include_detail=False),
        naw.RandomWordAug(name='aug3', include_detail=True)
    ], include_detail=False, name='pipe2')
], include_detail=True, name='pipe1')

augmented_data, change_log = aug.augment('The quick brown fox jumps over the lazy dog.')
print('augmented_data:', augmented_data)
print('change_log:', change_log)

augmented_data: D brown D over A D .
change_log: [{'orig_token': 'lazy', 'orig_start_pos': 35, 'new_token': 'A', 'new_start_pos': 31, 'change_seq': 1, 'action': 'substitute'}, {'orig_token': 'the', 'orig_start_pos': 31, 'new_token': 'A', 'new_start_pos': 29, 'change_seq': 2, 'action': 'substitute'}, {'orig_token': 'The', 'orig_start_pos': 0, 'new_token': 'A', 'new_start_pos': 0, 'change_seq': 3, 'action': 'substitute'}, {'orig_token': 'dog', 'orig_start_pos': 33, 'new_token': 'D', 'new_start_pos': 27, 'change_seq': 4, 'action': 'substitute'}, {'orig_token': 'fox', 'orig_start_pos': 14, 'new_token': 'D', 'new_start_pos': 10, 'change_seq': 5, 'action': 'substitute'}, {'orig_token': 'quick', 'orig_start_pos': 2, 'new_token': 'D', 'new_start_pos': 2, 'change_seq': 6, 'action': 'substitute'}, {'orig_token': 'A', 'orig_start_pos': 23, 'new_token': '', 'new_start_pos': 15, 'change_seq': 7, 'action': 'delete'}, {'orig_token': 'jumps', 'orig_start_pos': 12, 'new_token': '', 'new_start_pos': 10,