### Install Prerequisite

In [1]:
!pip install simpletransformers datasets tqdm pandas --user
!pip install transformers sentencepiece





### Importing Libraries

In [2]:
import os
import pandas as pd

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

import simpletransformers.t5 as st

import nltk, string
from nltk.translate.bleu_score import sentence_bleu

  from .autonotebook import tqdm as notebook_tqdm


### Dataset Loading

In [3]:
dataset_df = pd.read_csv('datasets/parallel_detoxification_dataset_small.tsv', sep='\t')

dataset_df.columns = ["input_text","target_text"]
dataset_df["prefix"] = "paraphrase"

dataset_df.head()

Unnamed: 0,input_text,target_text,prefix
0,. or the loud ass one - thousand ton beast roa...,or the loud one - thousand ton beast roaring ...,paraphrase
1,""" mandated "" and "" right fucking now "" would b...","""Mandated' and ""right now"" would be good.",paraphrase
2,""" mandated "" and "" right fucking now "" would b...","""mandated"" and"" right away"" would be good",paraphrase
3,""" mandated "" and "" right fucking now "" would b...",mandated and right would be good,paraphrase
4,* neither * of my coworkers gave a shit when i...,Neither of my co-workers cared when it came to...,paraphrase


### Train Test Split

In [4]:
train_data,test_data = train_test_split(dataset_df,test_size=0.1)

### Defininng Arguments for T5 Model

In [5]:
args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "max_seq_length": 256,
    "num_train_epochs": 4,
    "num_beams": None,
    "do_sample": True,
    "top_k": 50,
    "top_p": 0.95,
    "use_multiprocessing": False,
    "save_steps": -1,
    "save_eval_checkpoints": True,
    "evaluate_during_training": False,
    "adam_epsilon": 1e-08,
    "eval_batch_size": 6,
    "fp_16": False,
    "gradient_accumulation_steps": 16,
    "learning_rate": 0.0003,
    "max_grad_norm": 1.0,
    "n_gpu": 1,
    "seed": 42,
    "train_batch_size": 6,
    "warmup_steps": 0,
    "weight_decay": 0.0
}

### Defining Model

In [6]:
model = st.T5Model("t5","s-nlp/t5-paranmt-detox", args=args, use_cuda=False)

### Train Model

In [7]:
model.train_model(train_data, eval_data=test_data, use_cuda=True,acc=sklearn.metrics.accuracy_score)

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and targets.

Here is a short example:

model_inputs = tokenizer(src_texts, text_target=tgt_texts, ...)

If you either need to use different keyword arguments for the source and target texts, you should do two calls like
this:

model_inputs = tokenizer(src_texts, ...)
labels = tokenizer(text_target=tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.

  "`as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your "
100%|██████████| 2500/2500 [00:01<00:00, 2010.41it/s]
Epoch 1 of 4:   0%|          | 0/4 [00:00<?, ?it/s]
Running Epoch 0 of 4:   0%|          | 0/417 [00:00<?, ?it/s][A
Epo

Epochs 0/4. Running Loss:    2.1916:  19%|█▉        | 81/417 [14:20<1:00:48, 10.86s/it][A
Epochs 0/4. Running Loss:    2.1916:  20%|█▉        | 82/417 [14:26<1:00:09, 10.77s/it][A
Epochs 0/4. Running Loss:    1.9055:  20%|█▉        | 82/417 [14:31<1:00:09, 10.77s/it][A
Epochs 0/4. Running Loss:    1.9055:  20%|█▉        | 83/417 [14:37<59:44, 10.73s/it]  [A
Epochs 0/4. Running Loss:    1.6819:  20%|█▉        | 83/417 [14:42<59:44, 10.73s/it][A
Epochs 0/4. Running Loss:    1.6819:  20%|██        | 84/417 [14:48<59:22, 10.70s/it][A
Epochs 0/4. Running Loss:    1.6126:  20%|██        | 84/417 [14:52<59:22, 10.70s/it][A
Epochs 0/4. Running Loss:    1.6126:  20%|██        | 85/417 [14:59<59:31, 10.76s/it][A
Epochs 0/4. Running Loss:    1.1664:  20%|██        | 85/417 [15:03<59:31, 10.76s/it][A
Epochs 0/4. Running Loss:    1.1664:  21%|██        | 86/417 [15:09<59:24, 10.77s/it][A
Epochs 0/4. Running Loss:    2.9434:  21%|██        | 86/417 [15:14<59:24, 10.77s/it][A
Epochs 0/4. R

Epochs 0/4. Running Loss:    2.3939:  41%|████      | 172/417 [30:40<43:25, 10.63s/it][A
Epochs 0/4. Running Loss:    2.3939:  41%|████▏     | 173/417 [30:46<42:50, 10.53s/it][A
Epochs 0/4. Running Loss:    2.1851:  41%|████▏     | 173/417 [30:51<42:50, 10.53s/it][A
Epochs 0/4. Running Loss:    2.1851:  42%|████▏     | 174/417 [30:57<43:01, 10.62s/it][A
Epochs 0/4. Running Loss:    1.9852:  42%|████▏     | 174/417 [31:01<43:01, 10.62s/it][A
Epochs 0/4. Running Loss:    1.9852:  42%|████▏     | 175/417 [31:08<42:57, 10.65s/it][A
Epochs 0/4. Running Loss:    1.7637:  42%|████▏     | 175/417 [31:12<42:57, 10.65s/it][A
Epochs 0/4. Running Loss:    1.7637:  42%|████▏     | 176/417 [31:20<44:21, 11.04s/it][A
Epochs 0/4. Running Loss:    1.6194:  42%|████▏     | 176/417 [31:24<44:21, 11.04s/it][A
Epochs 0/4. Running Loss:    1.6194:  42%|████▏     | 177/417 [31:30<43:32, 10.88s/it][A
Epochs 0/4. Running Loss:    0.7506:  42%|████▏     | 177/417 [31:35<43:32, 10.88s/it][A
Epochs 0/4

Epochs 0/4. Running Loss:    2.2864:  63%|██████▎   | 263/417 [47:09<27:51, 10.85s/it][A
Epochs 0/4. Running Loss:    2.2864:  63%|██████▎   | 264/417 [47:15<27:28, 10.78s/it][A
Epochs 0/4. Running Loss:    1.9667:  63%|██████▎   | 264/417 [47:20<27:28, 10.78s/it][A
Epochs 0/4. Running Loss:    1.9667:  64%|██████▎   | 265/417 [47:26<27:24, 10.82s/it][A
Epochs 0/4. Running Loss:    1.6520:  64%|██████▎   | 265/417 [47:30<27:24, 10.82s/it][A
Epochs 0/4. Running Loss:    1.6520:  64%|██████▍   | 266/417 [47:36<26:50, 10.67s/it][A
Epochs 0/4. Running Loss:    0.9333:  64%|██████▍   | 266/417 [47:41<26:50, 10.67s/it][A
Epochs 0/4. Running Loss:    0.9333:  64%|██████▍   | 267/417 [47:47<26:39, 10.66s/it][A
Epochs 0/4. Running Loss:    1.6861:  64%|██████▍   | 267/417 [47:51<26:39, 10.66s/it][A
Epochs 0/4. Running Loss:    1.6861:  64%|██████▍   | 268/417 [47:57<26:25, 10.64s/it][A
Epochs 0/4. Running Loss:    2.3533:  64%|██████▍   | 268/417 [48:02<26:25, 10.64s/it][A
Epochs 0/4

Epochs 0/4. Running Loss:    1.8309:  85%|████████▍ | 354/417 [1:03:22<11:24, 10.87s/it][A
Epochs 0/4. Running Loss:    1.3769:  85%|████████▍ | 354/417 [1:03:27<11:24, 10.87s/it][A
Epochs 0/4. Running Loss:    1.3769:  85%|████████▌ | 355/417 [1:03:33<11:15, 10.89s/it][A
Epochs 0/4. Running Loss:    1.2015:  85%|████████▌ | 355/417 [1:03:38<11:15, 10.89s/it][A
Epochs 0/4. Running Loss:    1.2015:  85%|████████▌ | 356/417 [1:03:44<11:00, 10.83s/it][A
Epochs 0/4. Running Loss:    1.9138:  85%|████████▌ | 356/417 [1:03:49<11:00, 10.83s/it][A
Epochs 0/4. Running Loss:    1.9138:  86%|████████▌ | 357/417 [1:03:55<10:50, 10.85s/it][A
Epochs 0/4. Running Loss:    1.5598:  86%|████████▌ | 357/417 [1:03:59<10:50, 10.85s/it][A
Epochs 0/4. Running Loss:    1.5598:  86%|████████▌ | 358/417 [1:04:06<10:35, 10.78s/it][A
Epochs 0/4. Running Loss:    1.7980:  86%|████████▌ | 358/417 [1:04:10<10:35, 10.78s/it][A
Epochs 0/4. Running Loss:    1.7980:  86%|████████▌ | 359/417 [1:04:16<10:28, 10

Epochs 1/4. Running Loss:    1.8851:   6%|▌         | 25/417 [04:31<1:09:02, 10.57s/it][A
Epochs 1/4. Running Loss:    1.8851:   6%|▌         | 26/417 [04:37<1:08:59, 10.59s/it][A
Epochs 1/4. Running Loss:    0.9499:   6%|▌         | 26/417 [04:41<1:08:59, 10.59s/it][A
Epochs 1/4. Running Loss:    0.9499:   6%|▋         | 27/417 [04:47<1:08:36, 10.55s/it][A
Epochs 1/4. Running Loss:    1.2886:   6%|▋         | 27/417 [04:52<1:08:36, 10.55s/it][A
Epochs 1/4. Running Loss:    1.2886:   7%|▋         | 28/417 [04:58<1:08:29, 10.56s/it][A
Epochs 1/4. Running Loss:    1.1715:   7%|▋         | 28/417 [05:02<1:08:29, 10.56s/it][A
Epochs 1/4. Running Loss:    1.1715:   7%|▋         | 29/417 [05:08<1:08:20, 10.57s/it][A
Epochs 1/4. Running Loss:    1.2212:   7%|▋         | 29/417 [05:13<1:08:20, 10.57s/it][A
Epochs 1/4. Running Loss:    1.2212:   7%|▋         | 30/417 [05:19<1:08:35, 10.63s/it][A
Epochs 1/4. Running Loss:    1.3890:   7%|▋         | 30/417 [05:23<1:08:35, 10.63s/it][A

Epochs 1/4. Running Loss:    1.9057:  28%|██▊       | 116/417 [20:40<54:03, 10.77s/it][A
Epochs 1/4. Running Loss:    0.9001:  28%|██▊       | 116/417 [20:44<54:03, 10.77s/it][A
Epochs 1/4. Running Loss:    0.9001:  28%|██▊       | 117/417 [20:50<53:20, 10.67s/it][A
Epochs 1/4. Running Loss:    1.2131:  28%|██▊       | 117/417 [20:55<53:20, 10.67s/it][A
Epochs 1/4. Running Loss:    1.2131:  28%|██▊       | 118/417 [21:01<53:02, 10.64s/it][A
Epochs 1/4. Running Loss:    1.2731:  28%|██▊       | 118/417 [21:05<53:02, 10.64s/it][A
Epochs 1/4. Running Loss:    1.2731:  29%|██▊       | 119/417 [21:11<52:47, 10.63s/it][A
Epochs 1/4. Running Loss:    1.4798:  29%|██▊       | 119/417 [21:16<52:47, 10.63s/it][A
Epochs 1/4. Running Loss:    1.4798:  29%|██▉       | 120/417 [21:22<52:05, 10.52s/it][A
Epochs 1/4. Running Loss:    1.6391:  29%|██▉       | 120/417 [21:26<52:05, 10.52s/it][A
Epochs 1/4. Running Loss:    1.6391:  29%|██▉       | 121/417 [21:32<51:48, 10.50s/it][A
Epochs 1/4

Epochs 1/4. Running Loss:    1.0166:  50%|████▉     | 207/417 [36:58<37:31, 10.72s/it][A
Epochs 1/4. Running Loss:    0.9552:  50%|████▉     | 207/417 [37:02<37:31, 10.72s/it][A
Epochs 1/4. Running Loss:    0.9552:  50%|████▉     | 208/417 [37:10<38:35, 11.08s/it][A
Epochs 1/4. Running Loss:    1.4381:  50%|████▉     | 208/417 [37:14<38:35, 11.08s/it][A
Epochs 1/4. Running Loss:    1.4381:  50%|█████     | 209/417 [37:20<38:12, 11.02s/it][A
Epochs 1/4. Running Loss:    1.6354:  50%|█████     | 209/417 [37:25<38:12, 11.02s/it][A
Epochs 1/4. Running Loss:    1.6354:  50%|█████     | 210/417 [37:31<37:25, 10.85s/it][A
Epochs 1/4. Running Loss:    1.4373:  50%|█████     | 210/417 [37:36<37:25, 10.85s/it][A
Epochs 1/4. Running Loss:    1.4373:  51%|█████     | 211/417 [37:42<37:24, 10.90s/it][A
Epochs 1/4. Running Loss:    1.4848:  51%|█████     | 211/417 [37:46<37:24, 10.90s/it][A
Epochs 1/4. Running Loss:    1.4848:  51%|█████     | 212/417 [37:53<37:02, 10.84s/it][A
Epochs 1/4

Epochs 1/4. Running Loss:    1.0085:  71%|███████▏  | 298/417 [53:15<20:52, 10.52s/it][A
Epochs 1/4. Running Loss:    1.2576:  71%|███████▏  | 298/417 [53:20<20:52, 10.52s/it][A
Epochs 1/4. Running Loss:    1.2576:  72%|███████▏  | 299/417 [53:26<21:00, 10.69s/it][A
Epochs 1/4. Running Loss:    0.5987:  72%|███████▏  | 299/417 [53:31<21:00, 10.69s/it][A
Epochs 1/4. Running Loss:    0.5987:  72%|███████▏  | 300/417 [53:37<20:50, 10.69s/it][A
Epochs 1/4. Running Loss:    1.3146:  72%|███████▏  | 300/417 [53:41<20:50, 10.69s/it][A
Epochs 1/4. Running Loss:    1.3146:  72%|███████▏  | 301/417 [53:48<20:34, 10.65s/it][A
Epochs 1/4. Running Loss:    1.4989:  72%|███████▏  | 301/417 [53:52<20:34, 10.65s/it][A
Epochs 1/4. Running Loss:    1.4989:  72%|███████▏  | 302/417 [53:58<20:22, 10.63s/it][A
Epochs 1/4. Running Loss:    1.2234:  72%|███████▏  | 302/417 [54:03<20:22, 10.63s/it][A
Epochs 1/4. Running Loss:    1.2234:  73%|███████▎  | 303/417 [54:09<20:07, 10.59s/it][A
Epochs 1/4

Epochs 1/4. Running Loss:    1.6267:  93%|█████████▎| 387/417 [1:09:22<05:18, 10.61s/it][A
Epochs 1/4. Running Loss:    1.6267:  93%|█████████▎| 388/417 [1:09:28<05:06, 10.56s/it][A
Epochs 1/4. Running Loss:    1.4341:  93%|█████████▎| 388/417 [1:09:32<05:06, 10.56s/it][A
Epochs 1/4. Running Loss:    1.4341:  93%|█████████▎| 389/417 [1:09:39<04:57, 10.62s/it][A
Epochs 1/4. Running Loss:    1.4665:  93%|█████████▎| 389/417 [1:09:43<04:57, 10.62s/it][A
Epochs 1/4. Running Loss:    1.4665:  94%|█████████▎| 390/417 [1:09:49<04:46, 10.61s/it][A
Epochs 1/4. Running Loss:    1.3082:  94%|█████████▎| 390/417 [1:09:54<04:46, 10.61s/it][A
Epochs 1/4. Running Loss:    1.3082:  94%|█████████▍| 391/417 [1:10:00<04:35, 10.61s/it][A
Epochs 1/4. Running Loss:    1.2874:  94%|█████████▍| 391/417 [1:10:05<04:35, 10.61s/it][A
Epochs 1/4. Running Loss:    1.2874:  94%|█████████▍| 392/417 [1:10:11<04:27, 10.69s/it][A
Epochs 1/4. Running Loss:    1.2122:  94%|█████████▍| 392/417 [1:10:16<04:27, 10

Epochs 2/4. Running Loss:    0.9039:  14%|█▍        | 59/417 [10:39<1:03:42, 10.68s/it][A
Epochs 2/4. Running Loss:    0.9039:  14%|█▍        | 60/417 [10:45<1:03:39, 10.70s/it][A
Epochs 2/4. Running Loss:    1.0637:  14%|█▍        | 60/417 [10:50<1:03:39, 10.70s/it][A
Epochs 2/4. Running Loss:    1.0637:  15%|█▍        | 61/417 [10:56<1:02:50, 10.59s/it][A
Epochs 2/4. Running Loss:    0.9920:  15%|█▍        | 61/417 [11:00<1:02:50, 10.59s/it][A
Epochs 2/4. Running Loss:    0.9920:  15%|█▍        | 62/417 [11:07<1:03:29, 10.73s/it][A
Epochs 2/4. Running Loss:    1.2014:  15%|█▍        | 62/417 [11:11<1:03:29, 10.73s/it][A
Epochs 2/4. Running Loss:    1.2014:  15%|█▌        | 63/417 [11:18<1:03:27, 10.76s/it][A
Epochs 2/4. Running Loss:    1.5506:  15%|█▌        | 63/417 [11:22<1:03:27, 10.76s/it][A
Epochs 2/4. Running Loss:    1.5506:  15%|█▌        | 64/417 [11:30<1:05:33, 11.14s/it][A
Epochs 2/4. Running Loss:    1.3860:  15%|█▌        | 64/417 [11:34<1:05:33, 11.14s/it][A

Epochs 2/4. Running Loss:    1.0303:  36%|███▌      | 150/417 [26:55<47:50, 10.75s/it][A
Epochs 2/4. Running Loss:    0.8677:  36%|███▌      | 150/417 [27:00<47:50, 10.75s/it][A
Epochs 2/4. Running Loss:    0.8677:  36%|███▌      | 151/417 [27:06<47:38, 10.75s/it][A
Epochs 2/4. Running Loss:    1.3707:  36%|███▌      | 151/417 [27:11<47:38, 10.75s/it][A
Epochs 2/4. Running Loss:    1.3707:  36%|███▋      | 152/417 [27:17<47:25, 10.74s/it][A
Epochs 2/4. Running Loss:    1.6475:  36%|███▋      | 152/417 [27:21<47:25, 10.74s/it][A
Epochs 2/4. Running Loss:    1.6475:  37%|███▋      | 153/417 [27:28<47:24, 10.78s/it][A
Epochs 2/4. Running Loss:    1.2814:  37%|███▋      | 153/417 [27:32<47:24, 10.78s/it][A
Epochs 2/4. Running Loss:    1.2814:  37%|███▋      | 154/417 [27:38<47:10, 10.76s/it][A
Epochs 2/4. Running Loss:    1.3236:  37%|███▋      | 154/417 [27:43<47:10, 10.76s/it][A
Epochs 2/4. Running Loss:    1.3236:  37%|███▋      | 155/417 [27:49<46:38, 10.68s/it][A
Epochs 2/4

Epochs 2/4. Running Loss:    1.3569:  58%|█████▊    | 241/417 [43:18<31:53, 10.87s/it][A
Epochs 2/4. Running Loss:    1.9387:  58%|█████▊    | 241/417 [43:23<31:53, 10.87s/it][A
Epochs 2/4. Running Loss:    1.9387:  58%|█████▊    | 242/417 [43:29<31:21, 10.75s/it][A
Epochs 2/4. Running Loss:    0.8227:  58%|█████▊    | 242/417 [43:33<31:21, 10.75s/it][A
Epochs 2/4. Running Loss:    0.8227:  58%|█████▊    | 243/417 [43:39<31:04, 10.71s/it][A
Epochs 2/4. Running Loss:    1.0062:  58%|█████▊    | 243/417 [43:44<31:04, 10.71s/it][A
Epochs 2/4. Running Loss:    1.0062:  59%|█████▊    | 244/417 [43:50<30:30, 10.58s/it][A
Epochs 2/4. Running Loss:    0.9360:  59%|█████▊    | 244/417 [43:54<30:30, 10.58s/it][A
Epochs 2/4. Running Loss:    0.9360:  59%|█████▉    | 245/417 [44:00<30:13, 10.55s/it][A
Epochs 2/4. Running Loss:    1.8647:  59%|█████▉    | 245/417 [44:05<30:13, 10.55s/it][A
Epochs 2/4. Running Loss:    1.8647:  59%|█████▉    | 246/417 [44:11<29:59, 10.52s/it][A
Epochs 2/4

Epochs 2/4. Running Loss:    0.9269:  80%|███████▉  | 332/417 [59:40<15:04, 10.64s/it][A
Epochs 2/4. Running Loss:    0.6833:  80%|███████▉  | 332/417 [59:44<15:04, 10.64s/it][A
Epochs 2/4. Running Loss:    0.6833:  80%|███████▉  | 333/417 [59:50<14:47, 10.56s/it][A
Epochs 2/4. Running Loss:    1.4002:  80%|███████▉  | 333/417 [59:55<14:47, 10.56s/it][A
Epochs 2/4. Running Loss:    1.4002:  80%|████████  | 334/417 [1:00:01<14:42, 10.63s/it][A
Epochs 2/4. Running Loss:    1.4981:  80%|████████  | 334/417 [1:00:05<14:42, 10.63s/it][A
Epochs 2/4. Running Loss:    1.4981:  80%|████████  | 335/417 [1:00:11<14:23, 10.53s/it][A
Epochs 2/4. Running Loss:    0.9024:  80%|████████  | 335/417 [1:00:16<14:23, 10.53s/it][A
Epochs 2/4. Running Loss:    0.9024:  81%|████████  | 336/417 [1:00:23<14:41, 10.88s/it][A
Epochs 2/4. Running Loss:    1.5545:  81%|████████  | 336/417 [1:00:27<14:41, 10.88s/it][A
Epochs 2/4. Running Loss:    1.5545:  81%|████████  | 337/417 [1:00:33<14:15, 10.69s/it]

Epochs 3/4. Running Loss:    1.3571:   1%|          | 3/417 [00:32<1:13:41, 10.68s/it][A
Epochs 3/4. Running Loss:    1.2393:   1%|          | 3/417 [00:36<1:13:41, 10.68s/it][A
Epochs 3/4. Running Loss:    1.2393:   1%|          | 4/417 [00:42<1:12:37, 10.55s/it][A
Epochs 3/4. Running Loss:    0.9219:   1%|          | 4/417 [00:47<1:12:37, 10.55s/it][A
Epochs 3/4. Running Loss:    0.9219:   1%|          | 5/417 [00:53<1:12:58, 10.63s/it][A
Epochs 3/4. Running Loss:    0.6857:   1%|          | 5/417 [00:57<1:12:58, 10.63s/it][A
Epochs 3/4. Running Loss:    0.6857:   1%|▏         | 6/417 [01:04<1:13:17, 10.70s/it][A
Epochs 3/4. Running Loss:    1.4749:   1%|▏         | 6/417 [01:08<1:13:17, 10.70s/it][A
Epochs 3/4. Running Loss:    1.4749:   2%|▏         | 7/417 [01:15<1:13:46, 10.80s/it][A
Epochs 3/4. Running Loss:    1.3822:   2%|▏         | 7/417 [01:19<1:13:46, 10.80s/it][A
Epochs 3/4. Running Loss:    1.3822:   2%|▏         | 8/417 [01:25<1:13:37, 10.80s/it][A
Epochs 3/4

Epochs 3/4. Running Loss:    0.9903:  22%|██▏       | 93/417 [16:47<57:54, 10.72s/it][A
Epochs 3/4. Running Loss:    1.1805:  22%|██▏       | 93/417 [16:52<57:54, 10.72s/it][A
Epochs 3/4. Running Loss:    1.1805:  23%|██▎       | 94/417 [16:58<58:02, 10.78s/it][A
Epochs 3/4. Running Loss:    0.7074:  23%|██▎       | 94/417 [17:03<58:02, 10.78s/it][A
Epochs 3/4. Running Loss:    0.7074:  23%|██▎       | 95/417 [17:09<57:42, 10.75s/it][A
Epochs 3/4. Running Loss:    1.1567:  23%|██▎       | 95/417 [17:13<57:42, 10.75s/it][A
Epochs 3/4. Running Loss:    1.1567:  23%|██▎       | 96/417 [17:20<59:15, 11.08s/it][A
Epochs 3/4. Running Loss:    0.8003:  23%|██▎       | 96/417 [17:25<59:15, 11.08s/it][A
Epochs 3/4. Running Loss:    0.8003:  23%|██▎       | 97/417 [17:31<58:18, 10.93s/it][A
Epochs 3/4. Running Loss:    1.0705:  23%|██▎       | 97/417 [17:36<58:18, 10.93s/it][A
Epochs 3/4. Running Loss:    1.0705:  24%|██▎       | 98/417 [17:42<57:32, 10.82s/it][A
Epochs 3/4. Running L

Epochs 3/4. Running Loss:    1.2429:  44%|████▍     | 184/417 [33:10<41:56, 10.80s/it][A
Epochs 3/4. Running Loss:    1.0149:  44%|████▍     | 184/417 [33:15<41:56, 10.80s/it][A
Epochs 3/4. Running Loss:    1.0149:  44%|████▍     | 185/417 [33:21<41:41, 10.78s/it][A
Epochs 3/4. Running Loss:    1.1416:  44%|████▍     | 185/417 [33:25<41:41, 10.78s/it][A
Epochs 3/4. Running Loss:    1.1416:  45%|████▍     | 186/417 [33:31<41:04, 10.67s/it][A
Epochs 3/4. Running Loss:    0.7893:  45%|████▍     | 186/417 [33:36<41:04, 10.67s/it][A
Epochs 3/4. Running Loss:    0.7893:  45%|████▍     | 187/417 [33:42<40:53, 10.67s/it][A
Epochs 3/4. Running Loss:    0.9032:  45%|████▍     | 187/417 [33:47<40:53, 10.67s/it][A
Epochs 3/4. Running Loss:    0.9032:  45%|████▌     | 188/417 [33:53<41:26, 10.86s/it][A
Epochs 3/4. Running Loss:    1.3088:  45%|████▌     | 188/417 [33:58<41:26, 10.86s/it][A
Epochs 3/4. Running Loss:    1.3088:  45%|████▌     | 189/417 [34:05<41:42, 10.98s/it][A
Epochs 3/4

Epochs 3/4. Running Loss:    0.8166:  66%|██████▌   | 275/417 [49:32<25:34, 10.80s/it][A
Epochs 3/4. Running Loss:    1.1402:  66%|██████▌   | 275/417 [49:37<25:34, 10.80s/it][A
Epochs 3/4. Running Loss:    1.1402:  66%|██████▌   | 276/417 [49:44<25:35, 10.89s/it][A
Epochs 3/4. Running Loss:    1.2208:  66%|██████▌   | 276/417 [49:48<25:35, 10.89s/it][A
Epochs 3/4. Running Loss:    1.2208:  66%|██████▋   | 277/417 [49:55<25:37, 10.98s/it][A
Epochs 3/4. Running Loss:    0.8293:  66%|██████▋   | 277/417 [50:00<25:37, 10.98s/it][A
Epochs 3/4. Running Loss:    0.8293:  67%|██████▋   | 278/417 [50:06<25:47, 11.13s/it][A
Epochs 3/4. Running Loss:    0.8089:  67%|██████▋   | 278/417 [50:11<25:47, 11.13s/it][A
Epochs 3/4. Running Loss:    0.8089:  67%|██████▋   | 279/417 [50:18<25:42, 11.18s/it][A
Epochs 3/4. Running Loss:    1.1870:  67%|██████▋   | 279/417 [50:22<25:42, 11.18s/it][A
Epochs 3/4. Running Loss:    1.1870:  67%|██████▋   | 280/417 [50:29<25:22, 11.12s/it][A
Epochs 3/4

Epochs 3/4. Running Loss:    0.7382:  88%|████████▊ | 365/417 [1:05:44<09:38, 11.12s/it][A
Epochs 3/4. Running Loss:    0.7263:  88%|████████▊ | 365/417 [1:05:48<09:38, 11.12s/it][A
Epochs 3/4. Running Loss:    0.7263:  88%|████████▊ | 366/417 [1:05:55<09:28, 11.15s/it][A
Epochs 3/4. Running Loss:    0.9550:  88%|████████▊ | 366/417 [1:06:00<09:28, 11.15s/it][A
Epochs 3/4. Running Loss:    0.9550:  88%|████████▊ | 367/417 [1:06:06<09:18, 11.16s/it][A
Epochs 3/4. Running Loss:    1.0510:  88%|████████▊ | 367/417 [1:06:11<09:18, 11.16s/it][A
Epochs 3/4. Running Loss:    1.0510:  88%|████████▊ | 368/417 [1:06:18<09:23, 11.49s/it][A
Epochs 3/4. Running Loss:    1.3120:  88%|████████▊ | 368/417 [1:06:23<09:23, 11.49s/it][A
Epochs 3/4. Running Loss:    1.3120:  88%|████████▊ | 369/417 [1:06:29<09:04, 11.34s/it][A
Epochs 3/4. Running Loss:    0.9270:  88%|████████▊ | 369/417 [1:06:34<09:04, 11.34s/it][A
Epochs 3/4. Running Loss:    0.9270:  89%|████████▊ | 370/417 [1:06:40<08:44, 11

(104, 1.2786510180538664)

In [9]:
root_dir = os.getcwd()
trained_model_path = os.path.join(root_dir,"outputs")

#arguments for saved model
args = {
"overwrite_output_dir": True,
"max_seq_length": 256,
"max_length": 50,
"top_k": 50,
"top_p": 0.95,
"num_return_sequences": 1
}

#Defining Fine-tuned Model
trained_model = st.T5Model("t5",trained_model_path,args=args, use_cuda=False)

### Model Prediction 

In [10]:
prefix = "paraphrase"
text = 'I am tired of this school shootings by black people'
pred = trained_model.predict([f"{prefix}: {text}"])
print(pred)

Generating outputs: 100%|██████████| 1/1 [00:00<00:00,  1.75it/s]
Decoding outputs: 100%|██████████| 1/1 [00:12<00:00, 12.16s/it]

['I have been sick of school shootings by black people']





### BLEU Score

In [13]:
def BLEU(text,pred):
    text = text.split()
    for can in pred:
        candidate = can.split()
        BLEU = sentence_bleu(text, candidate)
        print('BLEU score -> {} text -> {} pred -> {}' .format(BLEU,text,candidate))

### Cosine Similarity

In [14]:
nltk.download('punkt') 

stemmer = nltk.stem.porter.PorterStemmer()
remove_punctuation_map = dict((ord(char), None) for char in string.punctuation)

def stem_tokens(tokens):
    return [stemmer.stem(item) for item in tokens]

def normalize(text):
    return stem_tokens(nltk.word_tokenize(text.lower().translate(remove_punctuation_map)))

#defining vectorizer
vectorizer = TfidfVectorizer(tokenizer=normalize, stop_words='english')

def cosine_sim(text1, text2):
    tfidf = vectorizer.fit_transform([text1, text2])
    return ((tfidf * tfidf.T).A)[0,1]

def cosine_sim_pred(text, pred):
    for can in pred:
        cs = cosine_sim(text, can)
        print('CS score -> {} text -> {} pred -> {}' .format(cs,text,can))

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\JOY\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Detoxification Function

In [15]:
def detoxification(text):
    prefix = "paraphrase"
    pred = trained_model.predict([f"{prefix}: {text}"])
    
    BLEU(text,pred)
    cosine_sim_pred(text, pred)
   
    return pred

### Testing

In [16]:
detoxification("I will kill you")

Generating outputs: 100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
Decoding outputs: 100%|██████████| 1/1 [00:04<00:00,  4.54s/it]

BLEU score -> 1.2882297539194154e-231 text -> ['I', 'will', 'kill', 'you'] pred -> ['I', 'will', 'defeat', 'you.']
CS score -> 0.0 text -> I will kill you pred -> I will defeat you.



The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
  % sorted(inconsistent)


['I will defeat you.']