In [2]:
# This will use the nlpaug package, see dependencies here: https://github.com/makcedward/nlpaug, which
# probably will need to be installed if you want to run this (and when we add this to the module)
# It should (theoretically) be more widely used than the textattack package, and it looks more flexible and diverse.

# What attacks to add?
On the theoretical side of things, we want to add augmentations (attacks), that we think will be likely to throw off the model.
There is no point in adding keyboard-driven mis-spellings or randomly shuffling words in a phrase since that will probably be too artificial and distant from the actual use-case of the model.

Attacks that would make sense to me, going into increasing order of how much it could break the model, are the following:
1. Changing random words with synonyms.
2. Changing random words with others with a similar embedding.
3. Changing random words with a plausible spelling mistake
4. Adding filler words.
5. Changing random words with antonyms (this should not change the argumentative structure, only the contents of the argument).
6. Changing only the first word of a phrase (a likely B token) with its antonym.
7. Paraphrase (or summarise) using other models (already in the nlpaug library).
8. Injecting a filler argumentative expression (from TUDarmstadt guide pdf) at the beginning of a phrase

Also note that we could divide these in two major categories, namely word insertions and word substitutions.

In [3]:
import numpy as np

import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas

import nltk
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/fededagos/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/fededagos/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/fededagos/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [4]:
text = ["From this point of view, I firmly believe that we should attach more importance to cooperation during primary education."]

Note: for every augmenter we can choose the minimum and maximum number of words to change/insert, and with which probability to do so. Defaults are: min = 1, max = 10, p = 0.3

## Synonym
Requires downloading dictionaries

In [5]:
aug = naw.SynonymAug()
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.']
Augmented Text:
['From this point of vista, I firm think that we should attach more than importance to cooperation during chief education.']


# Random word insertion /substitution
Needs to download BERT (or any other model in transformers).
Results of this look pretty cool, and the attack also makes sense.

In [6]:
aug = naw.ContextualWordEmbsAug(
    model_path='bert-base-uncased', action="insert")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.']
Augmented Text:
['from this point be of view, and i firmly believe strongly that we children should attach dramatically more importance to cooperation than during proper primary education.']


## Random spelling mistakes
It is supposed to use random spelling mistakes from dictionaries, but looks quite bad.

In [7]:
aug = naw.SpellingAug()
augmented_texts = aug.augment(text)
print("Original:")
print(text)
print("Augmented Texts:")
print(augmented_texts)

Original:
['From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.']
Augmented Texts:
['Fron ths point of view, I firmely believe thai me should attack more importance to cooperation during primay education.']


# Antonyms
Also depends on downloaded dictionaries. This looks like one of the most interesting: changing the sentiment and or the contents of a claim still make it a claim: will the model pick up on this?

In [14]:
aug = naw.AntonymAug()
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.']
Augmented Text:
['From this point of view, I firmly disbelieve that we should detach less importance to cooperation during secondary education.']


## Summarisation
This requires downloading T5. Results can look like BS.

In [9]:
aug = nas.AbstSummAug(model_path='t5-base')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.']
Augmented Text:
['timothy stanley: we should attach more importance to cooperation during primary education. he says i firmly believe that we should give more emphasis to cooperation.']


# Random keyword change
The following is interesting: we can define a list of "reserved words" and the augmenter will swap each reserved word with another one from the list at random. We could change groups of logical connectives in this way, or overload the use of prepositions after the subject, etc...

In [10]:
reserved_tokens = [
    ["I", "I therefore"],
]
reserved_aug = naw.ReservedAug(reserved_tokens=reserved_tokens)
augmented_text = reserved_aug.augment(text)

print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.']
Augmented Text:
['From this point of view, I therefore firmly believe that we should attach more importance to cooperation during primary education.']


A more extensive example is the following:

In [11]:
reserved_tokens = [
    [
        "I",
        "I therefore",
        "I actually",
        "I basically",
        "I seriously",
        "I really",
        "I highly",
        "I totally",
        "I absolutely",
    ],
]
reserved_aug = naw.ReservedAug(reserved_tokens=reserved_tokens)
augmented_text = reserved_aug.augment(text)

print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)


Original:
['From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.']
Augmented Text:
['From this point of view, I basically firmly believe that we should attach more importance to cooperation during primary education.']


## Custom fillers from TuDarmstadt

In [12]:
def filler_augment(text, fillers=None):
    """Augments the beginning of text with a phrase taken from a list of pre-defined filler phrases.
    The filler phrases are taken from the TUDarmstadt annotation guidelines, with the addition of some
    other common fillers used in english."""
    if fillers is None:
        fillers = [
            "According to the previous fact, ",
            "As can be seen, ",
            "For example, ",
            "Another important point which contributes to my argument is that ",
            "I agree to this view that ",
            "In this context, ",
            "At the end of the day, ",
        ]
    random_idx = np.random.choice(len(fillers))
    filler = fillers[random_idx]
    aug_text = filler + text[0].lower() + text[1:]
    
    return aug_text


In [13]:
filler_augment(text[0])

'I agree to this view that from this point of view, I firmly believe that we should attach more importance to cooperation during primary education.'