# NLPAug

In [2]:
pip install nlpaug


Collecting nlpaug
  Downloading nlpaug-1.1.11-py3-none-any.whl (410 kB)
     -------------------------------------- 410.5/410.5 kB 2.3 MB/s eta 0:00:00
Installing collected packages: nlpaug
Successfully installed nlpaug-1.1.11
Note: you may need to restart the kernel to use updated packages.


In [7]:
import nlpaug.augmenter.sentence as nas
import nlpaug.augmenter.word as naw
#Augmenter qui applique une signification sémantique basée sur une entrée textuelle.

### ContextualWordEmbsAug

In [18]:
"""
    action = insert : un nouveau mot sera injecté à une position aléatoire en fonction
      du calcul des incorporations de mots contextuels.
    action = substitut : le mot sera remplacé en fonction du calcul des plongements contextuels"""

aug = naw.ContextualWordEmbsAug(
model_path='bert-base-uncased', action="insert") 
aug_1 = naw.ContextualWordEmbsAug(
    model_path='bert-base-uncased', action="substitute") 

text = 'The quick brown fox jumps over the lazy dog .'

augmented_text = aug.augment(text)
print("ORIGINAL TEXT: ", text)
print("AUGMENTED TEXT WITH INSERTION: ",augmented_text)

print("\n")

augmented_text_2 = aug_1.augment(text)
print("ORIGINAL TEXT: ", text)
print("AUGMENTED TEXT WITH SUBSTITUTION: ",augmented_text_2)

ORIGINAL TEXT:  The quick brown fox jumps over the lazy dog .
AUGMENTED TEXT WITH INSERTION:  ['the super quick brown fox who jumps first over the lazy dog.']


ORIGINAL TEXT:  The quick brown fox jumps over the lazy dog .
AUGMENTED TEXT WITH SUBSTITUTION:  ['the scared little girl jumps over the lazy dog.']


### Synonyme augmentation

In [22]:
text = 'The quick brown fox jumps over the lazy dog .'

syn_aug = naw.SynonymAug(aug_src='wordnet',aug_max=2) 
#aug_src (str) – Support ‘wordnet’ and ‘ppdb’ .
#aug_max (entier) - Le nombre maximum de mots sera augmenté

syn_aug_text = syn_aug.augment(text,n=4)
print("ORIGINAL TEXT: ", text)
print("\n")
print("AUGMENTED TEXT: ",syn_aug_text)


ORIGINAL TEXT:  The quick brown fox jumps over the lazy dog .


AUGMENTED TEXT:  ['The quick brown university fox jumps all over the lazy dog.', 'The quick brown fox jump out over the lazy frump.', 'The quick robert brown fox jumps concluded the lazy dog.', 'The immediate brown fox jumps over the indolent dog.']


### BackTranslation

In [3]:
#pip install sacremoses
#You need to install sacremoses to use XLMTokenizer

Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
     ------------------------------------ 880.6/880.6 kB 140.3 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py): started
  Building wheel for sacremoses (setup.py): finished with status 'done'
  Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895241 sha256=d1dfe75f75e2e17525cf02753499c63ac60ab879404769f04ecf5c10e048d965
  Stored in directory: c:\users\samir\appdata\local\pip\cache\wheels\12\1c\3d\46cf06718d63a32ff798a89594b61e7f345ab6b36d909ce033
Successfully built sacremoses
Installing collected packages: sacremoses
Successfully installed sacremoses-0.0.53
Note: you may need to restart the kernel to use updated packages.


In [11]:
aug = naw.BackTranslationAug()
text = 'The quick brown fox jumps over the lazy dog .'

print("ORIGINAL TEXT: ", text)
print("AUGMENTED TEXT: ",aug.augment(text))

Downloading tokenizer_config.json:   0%|          | 0.00/67.0 [00:00<?, ?B/s]

Downloading vocab-src.json:   0%|          | 0.00/829k [00:00<?, ?B/s]

Downloading vocab-tgt.json:   0%|          | 0.00/829k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/308k [00:00<?, ?B/s]

ORIGINAL TEXT:  The quick brown fox jumps over the lazy dog .
AUGMENTED TEXT:  ['The speedy brown fox jumps over the lazy dog.']


### Abstractive Summary  (Résumé abstrait)

In [24]:
##### import nltk

aug = nas.AbstSummAug() 
# nas.AbstSummAug()  : Augmenter qui applique une opération (au niveau de la phrase) 
#à une entrée textuelle basée sur un résumé abstrait.

paragraph = """ With the election fervour at its peak, a report by the Association for Democratic Reforms (ADR) revealed that assets of 78, that is, 77% of MLAs in the Punjab legislative assembly who are re-contesting in this year’s state elections, has seen a surge ranging from 2% to 2,954%. The average asset growth, based on the analysis of their poll affidavits, of these 101 MLAs between the 2017 and 2022 assembly elections is ₹2.76 crore.

According to the report, Sukhbir Singh Badal of Shiromani Akali Dal (SAD) from the Jalalabad constituency has declared the maximum increase in assets, by ₹100 crore. His assets increased from ₹102 crore in 2017 to ₹202 crore in 2022. He is followed by Manpreet Singh Badal of the Indian National Congress whose assets increased by ₹32 crore, from ₹40 crore in 2017 to ₹72 crore in 2022.

However, assets of 21 MLAs have dropped from -2% to -74% in the last five years. Among those are chief minister Charanjit Singh Channi, whose assets decreased from ₹14.51 crore in 2017 to ₹9.45 crore in 2022. Punjab Congress president Navjot Singh Sidhu, too, saw a slight decrease in assets, from ₹45.9 crore in 2017 to ₹44.65 crore this year.
"""
sent_list = nltk.sent_tokenize(paragraph)
pred = aug.augment(sent_list)
print(sent_list)
print('\n')
print(pred)

[' With the election fervour at its peak, a report by the Association for Democratic Reforms (ADR) revealed that assets of 78, that is, 77% of MLAs in the Punjab legislative assembly who are re-contesting in this year’s state elections, has seen a surge ranging from 2% to 2,954%.', 'The average asset growth, based on the analysis of their poll affidavits, of these 101 MLAs between the 2017 and 2022 assembly elections is ₹2.76 crore.', 'According to the report, Sukhbir Singh Badal of Shiromani Akali Dal (SAD) from the Jalalabad constituency has declared the maximum increase in assets, by ₹100 crore.', 'His assets increased from ₹102 crore in 2017 to ₹202 crore in 2022.', 'He is followed by Manpreet Singh Badal of the Indian National Congress whose assets increased by ₹32 crore, from ₹40 crore in 2017 to ₹72 crore in 2022.', 'However, assets of 21 MLAs have dropped from -2% to -74% in the last five years.', 'Among those are chief minister Charanjit Singh Channi, whose assets decreased fr

^C

Note: you may need to restart the kernel to use updated packages.


In [13]:
aug = naw.BackTranslationAug()
print("AUGMENTED TEXT: ",aug.augment(pred))


AUGMENTED TEXT:  ["A report by the Association for Democratic Reform (ADR) found that the assets of 78, or 77% of the Punjab legislature's MLAs running again in this year's parliamentary elections, have risen between 2% and 2,954%.", 'The average wealth growth of these 101 MLAs between the 2017 and 2022 parliamentary elections is 2.76 crore.', 'Sukhbir Singh Badal of the Shiromani Akali Dal (SAD) from the Jalalabad constituency explained the maximum increase in assets by 100 crore.', 'Assets rose from 102 crore in 2017 to 202 crore in 2022.', 'He is succeeded by Manpreet Singh Badal of the Indian National Congress, whose wealth has increased by 32 crore, from 40 crore in 2017 to 72 crore in 2022.', 'The assets of 21 MLAs have fallen from -2% to -74% in the last five years.', 'Among them is Prime Minister Charanjit Singh Chan, whose wealth has fallen from $14.51 billion in 2017 to $9.45 billion in 2022.', 'Navjot Singh Sidhu sees a slight decline in assets, from 45.9 crore in 2017 to 44