Skip to content

Commit

Permalink
ix #77
Browse files Browse the repository at this point in the history
  • Loading branch information
makcedward committed Jan 8, 2020
1 parent 74b9477 commit 64aa4c5
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 15 deletions.
10 changes: 5 additions & 5 deletions nlpaug/augmenter/char/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def swap(self, data):
results = []
tokens = self.tokenizer(data)
aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)
if aug_word_idxes is None:
if aug_word_idxes is None or len(aug_word_idxes) < 1:
return data

for token_i, token in enumerate(tokens):
Expand All @@ -137,17 +137,17 @@ def swap(self, data):

result = ''
chars = self.token2char(token)
original_chars = chars.copy()

aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
Method.CHAR)
if aug_char_idxes is None:
if aug_char_idxes is None or len(aug_char_idxes) < 1:
results.append(token)
continue

for char_i in aug_char_idxes:
swap_position = self._get_swap_position(char_i, len(chars)-1, mode=self.swap_mode)
is_original_upper, is_swap_upper = chars[char_i].isupper(), chars[swap_position].isupper()
original_chars = chars.copy()
chars[char_i], chars[swap_position] = original_chars[swap_position], original_chars[char_i]

# Swap case
Expand All @@ -171,7 +171,7 @@ def delete(self, data):
results = []
tokens = self.tokenizer(data)
aug_word_idxes = self._get_aug_idxes(tokens, self.aug_word_min, self.aug_word_max, self.aug_word_p, Method.WORD)
if aug_word_idxes is None:
if aug_word_idxes is None or len(aug_word_idxes) < 1:
return data

for token_i, token in enumerate(tokens):
Expand All @@ -182,7 +182,7 @@ def delete(self, data):
chars = self.token2char(token)
aug_char_idxes = self._get_aug_idxes(chars, self.aug_char_min, self.aug_char_max, self.aug_char_p,
Method.CHAR)
if aug_char_idxes is None:
if aug_char_idxes is None or len(aug_char_idxes) < 1:
results.append(token)
continue

Expand Down
26 changes: 16 additions & 10 deletions test/augmenter/char/test_random_char.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import unittest

from nlpaug.augmenter.char.random import RandomCharAug
from nlpaug.util import Action


class TestRandomCharReplaceAug(unittest.TestCase):
def test_insert_single_word(self):
texts = ['Zoology', 'roku123456']
aug = RandomCharAug(action=Action.INSERT, min_char=1)
aug = RandomCharAug(action='insert', min_char=1)
for text in texts:
augmented_text = aug.augment(text)
self.assertNotEqual(text, augmented_text)
Expand All @@ -17,7 +16,7 @@ def test_insert_single_word(self):

def test_insert_multi_words(self):
texts = ['The quick brown fox jumps over the lazy dog']
aug = RandomCharAug(action=Action.INSERT, min_char=1)
aug = RandomCharAug(action='insert', min_char=1)
for text in texts:
augmented_cnt = 0
augmented_text = aug.augment(text)
Expand All @@ -37,7 +36,7 @@ def test_insert_multi_words(self):

def test_substitute_single_word(self):
texts = ['Zoology', 'roku123456']
aug = RandomCharAug(action=Action.SUBSTITUTE, min_char=1)
aug = RandomCharAug(action='substitute', min_char=1)
for text in texts:
augmented_text = aug.augment(text)
self.assertNotEqual(text, augmented_text)
Expand All @@ -46,7 +45,7 @@ def test_substitute_single_word(self):

def test_substitute_multi_words(self):
texts = ['The quick brown fox jumps over the lazy dog']
aug = RandomCharAug(action=Action.SUBSTITUTE, min_char=1)
aug = RandomCharAug(action='substitute', min_char=1)
for text in texts:
augmented_cnt = 0
augmented_text = aug.augment(text)
Expand All @@ -64,11 +63,19 @@ def test_substitute_multi_words(self):
self.assertTrue(len(texts) > 0)

def test_swap(self):
texts = ['The quick brown fox jumps over the lazy dog']
aug = RandomCharAug(action=Action.SWAP, min_char=1)
texts = [
'The quick brown fox jumps over the lazy dog',
'testing'
]
aug = RandomCharAug(action="swap", min_char=1)
for text in texts:
augmented_cnt = 0
augmented_text = aug.augment(text)

augmented_text = text

# https://github.com/makcedward/nlpaug/issues/77
for i in range(10):
augmented_text = aug.augment(augmented_text)

tokens = aug.tokenizer(text)
augmented_tokens = aug.tokenizer(augmented_text)
Expand All @@ -77,14 +84,13 @@ def test_swap(self):
if token != augmented_token:
augmented_cnt += 1

self.assertLess(augmented_cnt, len(tokens))
self.assertNotEqual(text, augmented_text)

self.assertTrue(len(texts) > 0)

def test_delete(self):
tokens = ['Zoology', 'roku123456']
aug = RandomCharAug(action=Action.DELETE, min_char=1)
aug = RandomCharAug(action='delete', min_char=1)
for t in tokens:
augmented_text = aug.augment(t)
self.assertNotEqual(t, augmented_text)
Expand Down

0 comments on commit 64aa4c5

Please sign in to comment.