In [3]:
import random
from nltk.corpus import wordnet
import nltk

# 確保第一次下載詞庫
# nltk.download('wordnet')
# nltk.download('omw-1.4')

def wordnet_replace(text, replace_word_num):
    words = text.split()
    if not words:
        return text

    new_words = words.copy()
    replaced = 0

    indices = list(range(len(words)))
    random.shuffle(indices)

    for i in indices:
        word = words[i]
        synonyms = set()
        for syn in wordnet.synsets(word):
            for lemma in syn.lemmas():
                synonym = lemma.name().replace('_', ' ')
                if synonym.lower() != word.lower():
                    synonyms.add(synonym)

        if synonyms:
            new_words[i] = random.choice(list(synonyms))
            replaced += 1

        if replaced >= replace_word_num:
            break

    return ' '.join(new_words)


In [6]:
sentence = "unable login company vpn unable login company vpn website trying open new session using link able get pls help urgently working home tomorrow due month end closing"
print(wordnet_replace(sentence, replace_word_num=3))

ineffective login company vpn unable login company vpn website trying open new sitting using link able dumbfound pls help urgently working home tomorrow due month end closing


In [1]:
import random
from nltk.corpus import wordnet

def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace('_', ' ')
            if synonym.lower() != word.lower():
                synonyms.add(synonym)
    return list(synonyms)

def random_insertion(words, n):
    for _ in range(n):
        idx = random.randint(0, len(words) - 1)
        synonyms = get_synonyms(words[idx])
        if synonyms:
            insert_word = random.choice(synonyms)
            insert_pos = random.randint(0, len(words))
            words.insert(insert_pos, insert_word)
    return words

def random_deletion(words, p=0.2):
    # 以機率 p 刪除每個單字
    if len(words) == 1:
        return words
    return [word for word in words if random.uniform(0, 1) > p]

def random_swap(words, n):
    for _ in range(n):
        idx1, idx2 = random.sample(range(len(words)), 2)
        words[idx1], words[idx2] = words[idx2], words[idx1]
    return words

def eda_augment(text, num_ops=1, method='random'):
    """
    text: 原始句子
    num_ops: 操作次數（例如要插入幾個字）
    method: 'insert', 'delete', 'swap', 或 'random'
    """
    words = text.split()
    if method == 'insert':
        return ' '.join(random_insertion(words.copy(), num_ops))
    elif method == 'delete':
        return ' '.join(random_deletion(words.copy()))
    elif method == 'swap':
        return ' '.join(random_swap(words.copy(), num_ops))
    elif method == 'random':
        method = random.choice(['insert', 'delete', 'swap'])
        return eda_augment(text, num_ops, method)
    else:
        raise ValueError("Method must be 'insert', 'delete', 'swap', or 'random'")

In [2]:
sentence = "The quick brown fox jumps over the lazy dog"
print("🔁 插入：", eda_augment(sentence, num_ops=2, method='insert'))
print("❌ 刪除：", eda_augment(sentence, method='delete'))
print("🔄 交換：", eda_augment(sentence, num_ops=2, method='swap'))
print("🎲 隨機：", eda_augment(sentence, num_ops=2, method='random'))

🔁 插入： The quick brown fox jumps over brownness the lazy dog
❌ 刪除： quick brown fox jumps the lazy dog
🔄 交換： quick The brown fox the over jumps lazy dog
🎲 隨機： The quick brown fox jumps concluded over ended the lazy dog
