https://arxiv.org/pdf/1810.04805.pdf

In [1]:
import os
os.sys.path.append('..')

In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import collections
import logging
import json
import math
import os
import random
import six
from tqdm import tqdm_notebook as tqdm
from IPython.display import HTML, display

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler

import tokenization
from modeling import BertConfig, BertForMaskedLanguageModelling
from optimization import BERTAdam
from masked_language_model import notqdm, convert_tokens_to_features, LMProcessor, predict_masked_words, predict_next_words


In [4]:

logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s', 
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO)
logger = logging.getLogger(__name__)

# Args

In [5]:
parser = argparse.ArgumentParser()

## Required parameters
parser.add_argument("--data_dir",
                    default=None,
                    type=str,
                    required=True,
                    help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
parser.add_argument("--bert_config_file",
                    default=None,
                    type=str,
                    required=True,
                    help="The config json file corresponding to the pre-trained BERT model. \n"
                         "This specifies the model architecture.")
parser.add_argument("--task_name",
                    default=None,
                    type=str,
                    required=True,
                    help="The name of the task to train.")
parser.add_argument("--vocab_file",
                    default=None,
                    type=str,
                    required=True,
                    help="The vocabulary file that the BERT model was trained on.")
parser.add_argument("--output_dir",
                    default=None,
                    type=str,
                    required=True,
                    help="The output directory where the model checkpoints will be written.")

## Other parameters
parser.add_argument("--init_checkpoint",
                    default=None,
                    type=str,
                    help="Initial checkpoint (usually from a pre-trained BERT model).")
parser.add_argument("--do_lower_case",
                    default=False,
                    action='store_true',
                    help="Whether to lower case the input text. True for uncased models, False for cased models.")
parser.add_argument("--max_seq_length",
                    default=128,
                    type=int,
                    help="The maximum total input sequence length after WordPiece tokenization. \n"
                         "Sequences longer than this will be truncated, and sequences shorter \n"
                         "than this will be padded.")
parser.add_argument("--do_train",
                    default=False,
                    action='store_true',
                    help="Whether to run training.")
parser.add_argument("--do_eval",
                    default=False,
                    action='store_true',
                    help="Whether to run eval on the dev set.")
parser.add_argument("--train_batch_size",
                    default=32,
                    type=int,
                    help="Total batch size for training.")
parser.add_argument("--eval_batch_size",
                    default=8,
                    type=int,
                    help="Total batch size for eval.")
parser.add_argument("--learning_rate",
                    default=5e-5,
                    type=float,
                    help="The initial learning rate for Adam.")
parser.add_argument("--num_train_epochs",
                    default=3.0,
                    type=float,
                    help="Total number of training epochs to perform.")
parser.add_argument("--warmup_proportion",
                    default=0.1,
                    type=float,
                    help="Proportion of training to perform linear learning rate warmup for. "
                         "E.g., 0.1 = 10%% of training.")
parser.add_argument("--no_cuda",
                    default=False,
                    action='store_true',
                    help="Whether not to use CUDA when available")
parser.add_argument("--local_rank",
                    type=int,
                    default=-1,
                    help="local_rank for distributed training on gpus")
parser.add_argument('--seed', 
                    type=int, 
                    default=42,
                    help="random seed for initialization")
parser.add_argument('--gradient_accumulation_steps',
                    type=int,
                    default=1,
                    help="Number of updates steps to accumualte before performing a backward/update pass.") 

_StoreAction(option_strings=['--gradient_accumulation_steps'], dest='gradient_accumulation_steps', nargs=None, const=None, default=1, type=<class 'int'>, choices=None, help='Number of updates steps to accumualte before performing a backward/update pass.', metavar=None)

In [6]:
experiment_name = 'erotica_cased_7_tied_mlm'

argv = """
--task_name lm \
--data_dir {DATA_DIR} \
--vocab_file {BERT_BASE_DIR}/vocab.txt \
--bert_config_file {BERT_BASE_DIR}/bert_config.json \
--init_checkpoint {BERT_BASE_DIR}/pytorch_model.bin \
--do_train \
--do_eval \
--gradient_accumulation_steps 2 \
--train_batch_size 16 \
--learning_rate 3e-5 \
--num_train_epochs 3.0 \
--max_seq_length 128 \
--output_dir ../outputs/{name}/
""".format(
    BERT_BASE_DIR='../data/weights/cased_L-12_H-768_A-12',
    DATA_DIR='../data/input/erotica_gutenberg',
    name=experiment_name
).replace('\n', '').split(' ')
print(argv)
args = parser.parse_args(argv)

['--task_name', 'lm', '--data_dir', '../data/input/erotica_gutenberg', '--vocab_file', '../data/weights/cased_L-12_H-768_A-12/vocab.txt', '--bert_config_file', '../data/weights/cased_L-12_H-768_A-12/bert_config.json', '--init_checkpoint', '../data/weights/cased_L-12_H-768_A-12/pytorch_model.bin', '--do_train', '--do_eval', '--gradient_accumulation_steps', '2', '--train_batch_size', '16', '--learning_rate', '3e-5', '--num_train_epochs', '3.0', '--max_seq_length', '128', '--output_dir', '../outputs/erotica_cased_7_tied_mlm/']


# Init

In [7]:
if args.local_rank == -1 or args.no_cuda:
    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    n_gpu = torch.cuda.device_count()
else:
    device = torch.device("cuda", args.local_rank)
    n_gpu = 1
    # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
    torch.distributed.init_process_group(backend='nccl')
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))

if args.gradient_accumulation_steps < 1:
    raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
                        args.gradient_accumulation_steps))

args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps)

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if n_gpu > 0:
    torch.cuda.manual_seed_all(args.seed)

if not args.do_train and not args.do_eval:
    raise ValueError("At least one of `do_train` or `do_eval` must be True.")

12/08/2018 09:02:23 - INFO - __main__ -   device cuda n_gpu 1 distributed training False


In [8]:
bert_config = BertConfig.from_json_file(args.bert_config_file)

if args.max_seq_length > bert_config.max_position_embeddings:
    raise ValueError(
        "Cannot use sequence length {} because the BERT model was only trained up to sequence length {}".format(
        args.max_seq_length, bert_config.max_position_embeddings))

In [9]:
if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
    print("Output directory ({}) already exists and is not empty.".format(args.output_dir))
os.makedirs(args.output_dir, exist_ok=True)
save_path = os.path.join(args.output_dir, 'state_dict.pkl')
save_path

Output directory (../outputs/erotica_cased_7_tied_mlm/) already exists and is not empty.


'../outputs/erotica_cased_7_tied_mlm/state_dict.pkl'

# Load data

In [10]:
tokenizer = tokenization.FullTokenizer(
    vocab_file=args.vocab_file, do_lower_case=args.do_lower_case)

decoder = {v:k for k,v in tokenizer.wordpiece_tokenizer.vocab.items()}

In [11]:
processors = {
        "lm": LMProcessor,
}
    
task_name = args.task_name.lower()
if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

processor = processors[task_name](tokenizer=tokenizer)
label_list = processor.get_labels()

In [12]:
train_examples = processor.get_train_examples(args.data_dir, skip=30, tqdm=tqdm)
num_train_steps = int(
    len(train_examples) / args.train_batch_size * args.num_train_epochs)

HBox(children=(IntProgress(value=0, description='tokenising', max=15111, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='chunking', max=50777, style=ProgressStyle(description_width='…




In [13]:
train_features = convert_tokens_to_features(
    train_examples, label_list, args.max_seq_length, tokenizer, tqdm=tqdm)

all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
all_label_weights = torch.tensor([f.label_weights for f in train_features], dtype=torch.long)

HBox(children=(IntProgress(value=0, max=50777), HTML(value='')))




In [14]:
train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_label_weights)
if args.local_rank == -1:
    train_sampler = RandomSampler(train_data)
else:
    train_sampler = DistributedSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)

# Load model

In [15]:
args.init_checkpoint

'../data/weights/cased_L-12_H-768_A-12/pytorch_model.bin'

In [16]:
save_path

'../outputs/erotica_cased_7_tied_mlm/state_dict.pkl'

In [17]:
model = BertForMaskedLanguageModelling(bert_config)
if args.init_checkpoint is not None:
    model.bert.load_state_dict(torch.load(args.init_checkpoint, map_location='cpu'))
    
if os.path.isfile(save_path):
    model.load_state_dict(torch.load(save_path, map_location='cpu'))
    
model.to(device)

if args.local_rank != -1:
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
                                                      output_device=args.local_rank)
elif n_gpu > 1:
    model = torch.nn.DataParallel(model)
    
model

BertForMaskedLanguageModelling(
  (bert): BertModel(
    (embeddings): BERTEmbeddings(
      (word_embeddings): Embedding(28996, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BERTLayerNorm()
      (dropout): Dropout(p=0.1)
    )
    (encoder): BERTEncoder(
      (layer): ModuleList(
        (0): BERTLayer(
          (attention): BERTAttention(
            (self): BERTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BERTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BERTLayerNorm()
              (dropout): Dropout(p=0.1)
            )
          )
          (intermediate): BERTInterm

# Opt

In [18]:
no_decay = ['bias', 'gamma', 'beta']
optimizer_parameters = [
    {'params': [p for n, p in model.named_parameters() if n not in no_decay], 'weight_decay_rate': 0.01},
    {'params': [p for n, p in model.named_parameters() if n in no_decay], 'weight_decay_rate': 0.0}
    ]

optimizer = BERTAdam(optimizer_parameters,
                     lr=args.learning_rate,
                     warmup=args.warmup_proportion,
                t_total=num_train_steps)

# Train 3

In [19]:
val_test="""The next day I was somewhat somnolent, of which you may be sure Miss Frankland took no notice. She retired to her own room when we went for our recreation. My friends scolded me for not coming to them the previous night, but I told them that my parents had continued to move about her room for so long a time that I had fallen fast asleep, and even then had not had enough, as they might have observed how sleepy I had been all day."""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=30, T=1, device=device))

In [20]:
1/0

ZeroDivisionError: division by zero

In [None]:
# val_test="""The next day I was somewhat somnolent, of which you may be sure Miss Frankland took no notice. She retired to her own room when we went for our recreation. My sisters scolded me for not coming to them the previous night, but I told them that Miss F. had continued to move about her room for so long a time that I had fallen fast asleep, and even then had not had enough, as they might have observed how sleepy I had been all"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=20, T=1, device=device))

In [None]:
display(predict_masked_words(val_test, processor, tokenizer, model, device=device, max_seq_length=args.max_seq_length))

In [None]:
global_step = 0

In [None]:
model.train()
for _ in tqdm(range(int(args.num_train_epochs)), desc="Epoch"):
    tr_loss, nb_tr_examples, nb_tr_steps = 0, 0, 0
    with tqdm(total=len(train_dataloader), desc='Iteration', mininterval=60) as prog:
        for step, batch in enumerate(train_dataloader):
            
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids, label_weights = batch
            loss, logits = model(input_ids, segment_ids, input_mask, label_ids, label_weights)
            if n_gpu > 1:
                loss = loss.mean() # mean() to average on multi-gpu.
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps
            loss.backward()
            tr_loss += loss.item()
            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()    # We have accumulated enougth gradients
                model.zero_grad()
            prog.update(1)
            prog.desc = 'Iter. loss={:2.6f}'.format(tr_loss/nb_tr_examples)
            if step%3000==10:
                
                print('step', step, 'loss', tr_loss/nb_tr_examples)
                display(predict_masked_words(val_test, processor, tokenizer, model, device=device, max_seq_length=args.max_seq_length))
                display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=10, device=device))
                tr_loss, nb_tr_examples, nb_tr_steps = 0, 0, 0
                
            # TODO validation test at end of each epoch to check for overfitting
                
    
    torch.save(model.state_dict(), save_path)

global_step += 1

In [None]:
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=10, device=device, debug=False))

In [None]:
torch.save(model.state_dict(), save_path)

In [55]:
val_test="""Frank could no longer resist"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=1, device=device))

In [56]:
val_test="""There was no doubt the lad had seen everything"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=150, T=1, device=device))

In [57]:
val_test="""The next night I had been asleep about a couple of hours when I was suddenly awakened by"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=1, device=device))

In [58]:
val_test="""His mind spun in on itself"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=150, T=1, device=device))

In [59]:
val_test="""A giant spider descended on to"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=.5, device=device))

In [60]:
val_test="""Madness enveloped his mind as"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=.1, device=device))

In [61]:
val_test="""A thin film of"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=1, device=device))

In [62]:
val_test="""Quivering with fear, he trembled as"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=1, device=device))

In [63]:
val_test="""Madness enveloped his mind as"""
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=300, T=1, device=device))

In [64]:
val_test="""All at once, in a moment of realisation, he knew the secret to creating true artificial intelligence was """
display(predict_next_words(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=1, device=device))

# Iteratitive prediction

The next words prediction is poorer than in the GPT model. 
I wonder if recursive masking will help the output quality.
We would predict the probabiltity of each word and selectively mask and replacing words that don't belong

- [ ] easy method, randomly mask 10% of the words each time

Hard method
- [ ] make sure we predict each existing word
- [ ] give the whole sentance, with mask tokens, predict probability of each actual word
- [ ] replace low prob words with a sampled higher prob word
- [ ] repeat


In [None]:
# def predict_masked_words(
#     x, processor, tokenizer, model, n=10, max_seq_length=300, device="cuda"
# ):
#     ex = processor._create_examples(x, "train", tqdm=notqdm)[-1:]
#     label_list = processor.get_labels()

#     log_feats = convert_tokens_to_features(
#         ex, label_list, max_seq_length, tokenizer, tqdm=notqdm
#     )

#     log_input_ids = torch.tensor([f.input_ids for f in log_feats], dtype=torch.long)
#     log_input_mask = torch.tensor([f.input_mask for f in log_feats], dtype=torch.long)
#     log_segment_ids = torch.tensor([f.segment_ids for f in log_feats], dtype=torch.long)
#     log_label_ids = torch.tensor([f.label_id for f in log_feats], dtype=torch.long)
#     log_label_weights = torch.tensor(
#         [f.label_weights for f in log_feats], dtype=torch.long
#     )

#     batch = [
#         log_input_ids,
#         log_input_mask,
#         log_segment_ids,
#         log_label_ids,
#         log_label_weights,
#     ]

#     with torch.no_grad():
#         batch = tuple(t.to(device) for t in batch)
#         input_ids, input_mask, segment_ids, label_ids, label_weights = batch
#         logits = model(input_ids, segment_ids, input_mask).detach()

#     i = 0
#     display(
#         HTML(
#             html_clean_decoded(
#                 tokens=label_ids[i][1:-2],
#                 input_mask=input_mask[i][1:-2],
#                 label_weights=label_weights[i][1:-2],
#                 tokenizer=tokenizer
#             ).replace("rgba(255,0,0", "rgba(0,0,255")
#         )
#     )
#     display(
#         HTML(
#             html_clean_decoded_logits(
#                 input_ids=input_ids[i][1:-1],
#                 input_mask=input_mask[i][1:-1],
#                 logits=logits[i][1:-1],
#                 label_weights=label_weights[i][1:-1],
#                 tokenizer=tokenizer
#             )
#         )
#     )

In [142]:
from masked_language_model import insert_next_word_input_id, html_clean_decoded, html_clean_decoded_logits, insert_next_word_input_ids, improve_words_recursive

In [125]:
# def improve_words_recursive(
# text, processor, tokenizer, model, max_seq_length=300, n=10, T=1.0, device="cuda", debug=False
# ):
# """
# Predict next `n` words for some `text`
# Args:
# - text (str) base string, we will predict next words
# - processor
# - tokenizer
# - n (int) amount of words to predict
# - T (float) temperature for when samping predictions

# Returns:
# - IPython html object, which show predicted words in red, with opacity indicating confidence
# """

# val_test="""There was no doubt the lad had seen.” " Oh, I am so sick, I am so sick, " she said, laughing, almost crying, as if it were all over with a prick. " " You have a little doubt, " he continued, " that it is, " he continued, " that it was a sin, " and I have a bad headache, and I am quite sure it will be good, if it is not, if it is, that your father was no doubt the lad had seen.” " Oh, I am so sick, I am so sick, " she said, laughing, almost crying, as if it were all over with a prick. " " You have a little doubt, " he continued, " that it is, " he continued, " that it was a sin, " and I have a bad headache, and I am quite sure it will be good, if it is not, if it is, that your father nothing of it."""
# val_test="""There was no doubt the lad had exploded.” " Oh, I am so sick, I am so hangry, " she said, lauhing, almost crying, as if it were all over with a prick. " " You have a little doubt, " he continued, " that it is, " he continued, " that it was a sin, " and I have a bad headache, and I am quite sure it will be good, if it is not, if it is, that your father was no doubt the lad had seen.” " Oh, I am so sick, I am so sick, " she said, laughing, almost crying, as if it were all over with a prick. " " You have a little doubt, " he continued, " that it is, " he continued, " that it was a sin, " and I have a bad headache, and I am quite sure it will be good!!, if rather is not, if it fat fat, that your father nothing of it."""

# # Frank could no longer
# val_test="""
# Frank could no longer a lover seems moments later composes a, a So finished. " Old Man! yes delighted; pom tripas! A------ But I have fear what would be inquireable inquiry; a question of.. In a violent draft air tempests andtrovations, CHAP. CH that. Who was love? He, stun. the orifice of the if are.:. W.,. Us. Now you. " He could no longer a lover seems moments later composes a, a So finished. " Old Man! yes delighted; pom tripas! A------ But I have fear what would be inquireable inquiry; a question of.. In a violent draft air tempests andtrovations, CHAP. CH that. Who was love? He, stun. the orifice of the if are.:. W.,. Us. Now you. " He at me"""
# # display(predict_next_words_recursive(val_test, processor, tokenizer, model, max_seq_length=args.max_seq_length, n=100, T=.5, device=device)).strip('\n')
# text=val_test
# # processor
# # tokenizer
# # model
# max_seq_length=args.max_seq_length
# n=100
# T=1.0
# # device
# debug=False

# ITERATIVE_MASK_FRAC=0.05 
# T=1
# iterations = 100


In [141]:
improve_words_recursive(text, processor, tokenizer, model, iterations=300, max_seq_length=300, n=10, T=1.0, device="cuda", debug=10)

bad_words ['seems', 'tender', '##rot', 'it', 'that', 'if', 'seems', 'delighted', 'trip', 'air', 'and', '##tro', 'that', 'if', 'are', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'says', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'en', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'thoughts', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
bad_words ['s

bad_words ['no', 'consider', 'later', 'word', 'figure', 'finished', 'John', 'his', '##om', '##pen', 'wit', 'di', 'extract', '##van', '’', '=', 'know', 'longer', 'compose', 'forget', 'finished', 'Mad', 'Frank', '##ffe', 'fear', 'reflect', '##able', 'disorder', 'number', '##mpest', 'choices', 'are', 'Because', 'feet', '.', 'But', 'future', 'added', 'are', 'Then', '[PAD]', 'Judge', 'dynamic', '##nova', '##pen', 'humming', '[PAD]', 'CH', '##OC', '##pen', '##mise', 'ne', '##mes', '##b', 'gain', '##p', 'know', '[PAD]', 'might', 'reasoned', 'many', 'les']
bad_words ['He', 'consider', 'later', 'compose', 'figure', 'finished', 'Old', 'John', 'his', '##pen', 'Which', 'contains', 'flood', '##mpest', 'extract', 'these', 'consciousness', 'let', 'longer', 'love', 'understanding', 'breathe', 'forget', 'finished', 'Prime', 'Frank', '##om', 'Rufus', 'what', 'reflect', 'disorder', '##mpt', 'choices', '?', 'W', 'feet', 'But', 'implement', 'future', '[PAD]', '"', 'Judge', 'want', 'for', '##om', '##nova', 

bad_words ['W', 'Man', '.', '##ir', 'cheeks', 'To', 'angry', '##vations', 'ecclesiastical', 'substitute', 'without', 'Man', 'Prime', 'l', '##x', 'e', '##ant', 'report', 'agreed', 'te', '##ive', 'CH', 'For', 'needs', '?', '.', 'Yet', 'none', 'Adrian', 'grey', '.', 'Love', 'future', 'says', '.', 't', 'was', 'be', 'thee', 'than', 'say', 'doubt', ':', 'None', 'because', '-', 'Take', '##om', '##any', 'what', 'announcing', '.', 'agreed', 'anxiety', 'doubt', 'gain', '##ent', '##ll', '.', 'may', 'matter', 's', 'another', 'rather', ':', 'deny']
bad_words ['consider', 'W', 'Man', 'let', 'cheeks', '"', 'in', 'with', 'verse', 'Toy', 'ecclesiastical', ':', 'substitute', 'you', 'thrust', 'sees', 'Man', 'me', 'have', ',', 'A', 'hour', 'te', 'shall', 'Answer', '##i', 'needs', 'round', 'of', 'Yet', 'none', 'Adrian', 'Love', 'future', 'says', 'condition', 'was', '?', 'thee', 'than', 'say', ':', 'or', 'None', 'Take', '##om', '##any', ':', 'say', 'agreed', 'anxiety', 'not', 'gain', '##ent', 'artists', 'ma

bad_words ['##ler', '"', 'should', 'let', '##ity', 'but', ',', 'les', 'three', 'Mars', 'Fell', 'Need', 'you', 'sees', 'ones', 'Cairns', 'or', 'rather', 'The', 'thousand', 'think', 'that', 'two', 'round', '##í', 'but', 'blended', 'will', 'Rapid', 'refrain', 'Hair', 'Martial', 'will', 'is', 'suppose', 'l', '##AP', 'word', 'deserve', 'Study', 'gain', '##ita', 'instruments', '##ful', 'second', 'says', 'was', 'know']
bad_words ['first', '"', 'should', '##ity', 'refusal', 'but', 'good', 'says', '##ère', 'cases', 'CH', 'Sense', '“', 'contrary', 'sees', ':', 'ones', '##ant', 'Cairns', 'rather', 'thousand', 'Al', 'would', 'feeling', 'and', 'bell', 'translation', 'And', 'parts', 'will', 'it', 'excuses', 'refrain', 'Hair', '?', 'Martial', 'say', 'suppose', '##AP', 'deserve', 'foot', 'second', 'gain', 'flames', 'instruments', '6', 'For', 'boot']
bad_words ['never', 'que', 'quit', 'Which', '##yr', '##ish', '##ity', 'son', '##ère', 'cases', 'three', '“', 'ye', 'you', 'don', 'thrust', 'sees', 'he', '

bad_words ['end', 'open', 'And', 'said', 'rhetoric', 'express', '##ish', 'opinion', 'conflicting', 'lack', 'voice', 'Sense', 'information', 'labour', 'Need', '!', 'impose', 'you', 'writing', 'favor', 'would', 'here', 'fashionable', '##fort', 'a', '##ury', 'applied', 'are', 'therefore', 'Courts', 'Mad', 'man', 'furious', 'behalf', 'Me', 'speaks', 'indeed', 'it', '-', 'reflection', 'Either', 'and', 'like', 'or', '?', 'mad', 'then', 'might', 'into', 'yourself', ',', 'me', 'The', 'this', 'heard', '##bit', 'last', 'meaning', '##ve', 'you', 'And', 'unknown']
bad_words ['end', 'open', 'speaking', 'express', 'this', 'conflicting', 'Show', 'voice', 'Our', 'information', 'labour', "'", ',', 'Need', '!', '.', 'better', 'blame', 'writing', 'language', 'anytime', 'some', 'CH', 'silent', '-', 'Mad', 'furious', 'behalf', 'better', 'Me', 'speaks', 'indeed', 'reflection', '.', 'Either', '14', 'use', 'or', 'for', '##AP', '?', 'mad', 'T', 'there', 'into', 'will', 'me', 'to', 'yourself', 'me', '’', 'this'

bad_words ['##pice', 'themselves', 'the', 'refusal', '##ux', 'contain', '40', 'win', 'it', 'that', 'grown', 'choose', 'inn', 'it', 'say', 'or', '##iding', 'but', '?', 'Do', 'be', 'them', 'blame', 'full', 'use', 'or', '-', 'doubt', 'each', 'grant', 'piece', 'from', 'dead', '##bit', 'bells', '-', '?', 'Franco', '-', 'mentioned']
bad_words ['for', '##pice', 'tongue', 'tell', 'themselves', 'effect', '##ux', 'contain', ',', '40', 'does', 'speak', 'ye', 'go', 'with', 'opinion', 'choose', 'inn', '##st', 'say', 'No', ':', 'people', ';', 'or', '##iding', 'say', 'others', 'those', '?', 'but', 'blame', 'speech', '.', 'doubt', 'what', 'Ms', ',', 'grant', 'second', 'Dr', '##bit', 'interrupted', 'change', 's', 'there', 'mentioned']
bad_words ['##pice', 'you', "'", '##ux', 'contain', 'art', 'loved', 'succeed', 'inn', 'government', '-', 'But', 'vote', '##iding', 'again', 'Put', 'Mon', 'like', 'blame', 'choice', '.', '-', '##AP', 'doubt', '##name', 'grant', 'Love', 'Dr', '##bit', 'no', 's', 'there', 'E

bad_words ['he', 'tongue', 'Horace', '##ion', 'privately', 'CH', 'art', 'Speak', 'speaks', 'This', '.', 'might', '[UNK]', ',', 'epithet', 'gal', '##st', 'and', 'opinion', '##ifice', 'but', 'pen', 'she', 'spirit', 'discretion', 'bathing', 'negative', '.', "'", 'Ten', 'men', 'workers', "'", '.']
bad_words ['tongue', 'Open', ':', 'therefore', 'we', 'q', 'You', '.', 'So', 'meeting', '[UNK]', 'epithet', 'gal', 'or', '—', '##ifice', 'pen', 'cannot', 'discretion', 'proper', 'statements', 'negative', 'They', 'men', 'workers', 'adds', 'The']
bad_words ["'", 'that', 'effect', '##in', 'question', 'smiling', 'L', 'turn', 'If', 'cheer', 'she', 'dim', 'voice', '-', 'say', '##ifice', 'Bass', 'says', 'proper', 'statements', 'They', 'reason', ';', 'workers', ';', 'point', 'adds', 'therefore']
bad_words ['tongue', 'Open', 'effect', 'translate', '40', 'And', 'say', 'protest', 'to', 'meaning', 's', 'weak', 'spirit', 'broken', 'CH', 'gal', '##st', 'the', 'not', 'If', 'Bass', 'say', 'refusal', 'statements',

bad_words ['woman', '’', 'Peace', 'effect', 'dance', 'sake', 'be', 'conversation', 'spoken', 'in', 'duration', 'do', 'at', 'situation', '=', '##uant', 'made', 'said', 'She', 'substitute', 'engagements', 'if', '##dge', 'posture', 'not', 'that', 'In', '##uin', 'him', 'forget', 'I', 'finally', 'adds', '##ze', 'and', 'your', 'x']
bad_words ['’', 'health', 'opinion', 'em', '##in', 'sake', '##d', '##AP', 'sentence', 'shaken', 'duration', 'In', 'at', '=', 'That', 'satisfaction', 'She', 'Get', 'consulted', 'said', '##ici', 'when', 'posture', 'may', 'forget', "'", 'finally', 'adds', 'as', '##ual']
bad_words ['naked', 'say', 'health', 'can', 'opinion', 'but', 'consequence', 'sake', 'sentence', 'wise', ':', 'ours', 'In', '##posed', 'sufficient', 'or', 'that', 'broken', '##AP', 'it', 'death', 'mine', '##ici', 'are', 'mi', 'made', 'q', 'the', 'die', 'But', 'Courts', 'adds', 'said', '##ual']
bad_words ['You', '##d', 'sentence', 'Put', 'In', '##posed', 'merely', 'And', 'understood', 'What', 'place', 

bad_words ['em', 'Ask', 'fell', 'this', 'will', '##uo', 'wise', 'manner', '##s', 'that', 'have', 'this', 'you', 'harp', "'", '.', 'and', '.', 'us', 'word', 'that', 'phase', 'No', '.', 'based', 'good', 'this', 'word', 'word', 'him', 'given', '##uin', '##uo']
bad_words ['that', 'em', '##bracing', 'men', 'speaking', 'think', 'absurd', 'pour', '##uo', 'value', 'her', 'it', 'relation', 'in', 'ambiguous', 'utter', 'saying', 'the', 'harp', '.', 'us', 'not', 'No', '.', 'speak', 'good', 'an', 'made', 'it', 'him', '##uin', 'noise']
bad_words ['page', 'other', 'you', '##bracing', ';', 'anything', 'pour', '##uo', 'first', 'half', 'wise', 'shall', 'that', 'Which', '##vo', 'think', 'execute', '##rt', 'women', '.', 'to', "'", 'you', 'word', 'and', 'good', 'an', '##sist', 'word', 'given', 'I', 'noise', '.']
bad_words ['others', '##bracing', 'will', 'this', 'pour', '##uo', 'word', 'without', 'sometimes', 'shall', '##vo', 'do', '"', 'Point', 'never', 'language', ':', 'good', 'these', 'speak', 'said', ';

bad_words [',', 'If', 'have', 'You', 'act', 'likewise', 'body', 'Council', 'Reports', 'knows', 'and', 'it', 'have', '##na', 'word', 'that', 'words', 'act', 'Bram', '!', 'but', 'lover', 'this', 'her', 'meaning', '##ﬁ', '##ctions', '-', 't', 'Men', 'not', ':', 'more', 'these', 'men', 'be', 'fit', 'Say', '-', 'all', 'from', 'the', 'say', '"', 'I']
bad_words [',', 'If', 'Some', 'please', 'or', 'act', 'likewise', 'title', 'Council', 'knows', 'it', ',', 'him', 'that', '?', 'words', 'admit', 'hear', ',', ';', 'said', 'Besides', 'flowers', 'battle', '##ctions', 'be', 't', 'Men', 'more', 'said', 'men', 'good', 'reason', 'fit', 'speaking', 'if', 'the', 'say', 'and', 'or']
bad_words ['court', 'page', 'you', 'other', 'persons', 'whom', 'give', 'only', 'likewise', 'body', 'Council', 'knows', 'it', ',', 'is', 'that', 'mouth', 'A', 'words', 'act', 'then', 'judge', 'philosophy', "'", '“', 'but', 'Besides', '##rt', "'", "'", '##t', 'not', 't', 'Men', 'Toy', 'say', '-', 'then', 'men', 'good', 'reason', 

bad_words ['so', 'reserve', 'put', 'some', 'Sal', 'better', 'then', 'collision', 'Desire', 'face', 'you', 'wit', ':', 'well', 'form', ':', 'hide', 'Love', 'added', 'replied', '-', 'she', '-', 'one', '##ification', 'succeeding']
bad_words ['?', 'objects', '?', 'you', 'harmony', 'collision', 'What', 'Martial', 'ladies', 'fixed', 'you', 'you', 'herself', '"', 'preference', 'be', '##lor', 'was', 'this', 'to', 'judge', 'esteem', 'thought', 'replied', 'that', 'she', 'if', '.', 'one', 'one', 'again']
bad_words ['.', 'answer', 'give', 'all', 'say', 'Martial', 'ladies', 'V', 'feel', 'use', 'be', 'or', 'knowing', 'chose', 'goes', 'judge', 'replied', 'which', 'she', 'we', 'The', 'some', 'but']
bad_words ['out', 'mused', ';', 'word', 'fixed', 'will', 'use', 'good', 'well', 'speak', 'by', 'Dialogue', 'Be', 'actress', 'replied', 'some', 'or', 'on', '-', 'side', 'and']
bad_words ['alone', 'and', 'said', 'give', 'on', 'Soft', 'chosen', 'have', 'across', 'use', 'said', 'says', 'drink', 'addressed', '-'

bad_words ['want', 'things', 'whom', 'have', 'must', 'Sultan', 'if', 'Man', 'matter', "'", 'man', 'point', 'you', 'Every', 'thing', 'folding', '"', '##rch', 'By', 'people', 'going', 'proposed', 'obstacle', 'Congress', '##var', 'we', 'another']
bad_words ['after', 'whom', 'have', 'all', 'think', 'or', 'the', 'putting', 'People', 'so', 'Every', 'one', '-', 'ideas', '##rch', 'after', 'pitch', 'people', 'going', '##s', '"', 'fires', 'surveyor', 'never', 'but', 'behave']
bad_words ['or', 'proposed', 'when', 'would', 'pat', 'about', 'if', 'doctrine', 'And', 'clumsy', 'point', 'Without', 'trumpet', ',', 'Sultan', 'being', 'at', 'fires', 'surveyor', 'little', "'", ':', 'I']
bad_words ['another', 'last', "'", 'otherwise', 'whom', 'betrayed', 'or', '##fuse', 'doctrine', 'laugh', '137', 'point', "'", 'letter', 'sin', '-', 'spoke', 'little', 'I', 'fires', 'surveyor', 'of', "'", 'little', 'or', 'council', ':']
bad_words ['sort', 'reversal', 'betrayed', 'you', 'put', 'Broken', 'say', 'doctrine', 'Da

bad_words ['put', ':', 'sort', 'kind', 'better', 'Punch', 'which', 'glance', 'something', '.', 'trust', 'That', 'said', 'pretty', 'we', 'direction', 'replied', "'", ':', ';', 'speech', 'himself', 'that', 'and', 'then', 'company', 'more', 'you', 'rather', 'whether', 'person', 'frost', 'and']
bad_words ['least', 'kind', 'passes', 'What', 'Punch', 'errors', 'all', 'mine', 'pretty', 'we', 'strengthen', 'opposite', 'title', 'speech', 'himself', "'", 'and', 'of', 'company', "'", 'proceeding', 'says', ';', 'but', 'or', 'love', 'person', 'frost', 'them']
bad_words ['put', 'you', 'whether', 'or', 'Yes', 'may', ',', 'errors', ',', 'in', 'pretty', 'say', 'grace', 'will', 'gestures', 'speech', "'", 'company', 'proceeding', 'you', 'if', 'person', 'like', 'good', 'them', 'quotes']
bad_words ['PS', "'", 'being', 'gesture', 'may', 'You', 'use', "'", '.', 'opinion', 'in', 'of', 'and', 'liberty', "'", 'know', 'party', 'that', 'all', 'will', 'or', 'good', 'them', 'said']
bad_words ['from', 'lose', 'and',

bad_words ['m', 'made', 'discovered', 'will', 'France', 'this', 'which', "'", 'growing', ';', 'and', 'up', 'need', '##am', 'that', 'granted', "'", 'observe', 'whatever', '—', 'one', 'Sultan', 'of', ',', 'all', 'will', '[', 'what', 'it', 'all', 'being', 'rank', 'addressing', 'heart', 'next', "'", 'children']
bad_words ['one', 'made', 'those', 'things', 'Gerhard', 'though', 'and', 'growing', ';', 'things', 'sir', 'All', 'whatever', 'flinch', "'", 'Drop', 'Sultan', 'example', 'your', ';', 'that', 'think', '[', 'For', 'being', 'For', 'rank', 'Vice', "'", 'addressing', 'heart', '(', ',', 'not']
bad_words ['one', 'made', 'those', 'blame', '##kini', 'Nature', 'himself', 'it', 'Italy', 'growing', ';', 'well', 'or', '##am', 'sir', 'granted', 'then', 'flinch', 'Drop', 'example', 'your', 'all', 'and', ':', 'plays', 'that', 'you', 'but', 'For', 'it', 'being', 'are', 'every', 'addressing', 'heart', 'officers']
bad_words ['made', 'custom', 'about', 'better', 'example', ';', 'France', 'doubt', 'twice

bad_words ['something', 'similar', 'language', 'said', 'volumes', 'voices', 'continuing', 'Paris', 'says', 'which', 'decides', 'this', 'sufficient', 'say', 'horns', '##ity', 'but', 'But', 'said', 'Said', 'yourself', 'but', 'presence', 'let', 'that', 'either', 'virtue', '"', 'conduct']
bad_words ['company', 'I', 'reach', 'discourse', 'it', 'volumes', '16th', 'Paris', 'decide', 'speak', 'thought', 'not', '##ity', ';', 'then', 'I', 'Said', 'where', 'choose', 'there', 'return', 'let', 'quoted', 'rather', 'virtue', '"', 'conduct']
bad_words ['lower', 'opinion', 'the', 'French', 'Paris', 'of', 'speak', 'one', 'this', '##ity', '-', 'suit', 'then', 'sentiments', 'translation', 'order', 'return', 'but', 'rather', 'perhaps', 'possible', 'can', '-', '##different', 'Cup', 'which']
bad_words ['company', 'ease', 'help', 'preference', 'but', 'Paris', 'a', 'your', 'you', 'speak', 'trust', 's', 'say', 'submission', '##ity', '-', 'trust', 'translation', 'can', ':', ',', 'your', 'you', 'all', '-', 'added

bad_words ['which', 'says', 'say', 'France', 'or', 'German', 'French', 'language', 'Yes', 'face', 'attitude', 'country', 'I', 'want', 'bow', 'em', 'it', 'second', 'people', 'and', 'second', 'says']
bad_words ["'", 'whether', 'may', 'more', 'whom', 'added', 'except', '-', 'replied', 'ear', 'spirit', 'trust', 'all', 'I', 'any', 'place', 'Put', 'to', '##quer', 'and', 'but', 'replied', 'second', 'people', 'second', 'all']
bad_words ['and', 'that', 'others', ',', 'before', 'coup', 'mouth', 'air', 'putting', 'trust', 'deny', 'undertake', 'The', 'me', 'and', 'expression', 'replied', 'one', 'people', 'and', 'child']
bad_words ['that', 'us', 'like', 'us', 'all', 'added', 'my', 'half', 'details', 'say', 'air', 'it', '##ibi', 'either', 'I', 'said', 'a', 'The', 'me', 'that', 'for', 'say', 'only', 'replied', 'it', 'friend', ';', 'but']
bad_words ['reminded', 'silent', 'yourself', 'say', 'he', '-', 'person', 'throat', 'heart', 'and', 'said', 'not', 'just', '##nounce', 'The', 'me', 'suggests', 'only'

bad_words ['nothing', 'us', 'better', 'whom', 'half', 'lack', 'curious', '##x', 'keeping', 'all', '##ment', 'mind', 'looking', 'and', 'Maid', 'take', 'each', "'", ',', '.', 'one', 'not', 'consider', 'half', 'name', '-', 'But', 'said', 'ladies', '"', 'says', 'is', 'master', 'love', "'"]
bad_words ['nothing', 'curious', '##x', 'keeping', '##ment', 'mind', 'German', 'Maid', 'you', 'French', 'also', 'Port', 'in', 'none', 'of', '.', 'No', 'speaking', 'take', 'speaking', 'say', "'", 'of', '’', '"', 'all', 'the', 'says', 'master', 'also', 'says']
bad_words ['there', 'like', 'yourself', 'half', 'lack', 'third', '##x', ',', 'it', 'make', 'excellent', 'only', 'with', 'liberty', 'to', 'but', 'say', 'take', 'speaking', 'said', 'Turkish', 'either', 'of', 'more', 'get', '"', '-', 'to', 'first', '-', 'says']
bad_words ['can', 'make', 'Say', 'lack', '##vier', 'it', 'well', 'German', 'means', 'speaking', 'If', 'excellent', 'gain', 'only', 'word', 'while', 'word', 'disagree', '##che', 'adding', 'name', 

bad_words ['Straits', 'all', 'which', 'I', 'all', 'your', 'But', '##nes', 'dealings', ',', 'ladies', 'matter', 'said', 'Born', 'this', 'Agreement', 'Kiss', 'Come', 'reply', 'of', 'Third', 'but', 'authority', 'you', 'or', 'and', 'this', "'", 'after']
bad_words ['Straits', 'there', 'that', 'should', 'none', ';', 'or', 'knowing', '##nes', 'dealings', 'bidding', 'you', 'speak', 'ladies', "'", 'turn', 'word', 'ring', 'character', 'Speaking', "'", 'I', 'what', 'of', 'Kiss', "'", 'All', 'reply', 'for', 'and', 'his', 'but', 'word', '-', "'", 'made', 'world', 'But', 'consequence', 'after']
bad_words ['lacking', 'is', 'nothing', 'there', 'your', 'quarter', 'knowing', '##bor', 'dealings', 'and', 'refusing', 'good', 'for', 'turn', 'Speaking', ',', 'term', 'set', 'Kiss', 'All', 'mouth', 'for', 'into', 'made', 'world', 'which', 'consequence']
bad_words ['##r', 'is', 'or', ',', 'the', 'class', 'knowing', '##bor', 'good', 'All', 'another', 'turn', 'Ring', 'This', 'E', 'term', 'Boss', 'Good', "'", 'som

bad_words ['some', 'replies', 'ones', '##tep', ',', 'trade', 'one', '"', 'Gates', 'say', 'otherwise', 'Assembly', '-', 'what', "'", 'then', 'found', '-', 'Bell', 'All', 'again', 'half']
bad_words ['speaking', 'ones', 'rather', 'if', 'is', 'ears', 'heard', 'well', '##sist', '"', 'No', 'these', 'say', 'mouth', 'or', 'all', 'Grand', 'ours', '##nges', 'say', 'or', 'Put', 'found', 'and', 'Well', 'Speaking', 'either']
bad_words ['is', 'for', 'is', 'anxiously', 'word', 'first', '##en', 'put', 'turning', 'half', 'it', '"', 'Say', 'these', 'meaning', 'All', 'some', ';', 'Grand', 'ours', '##nges', 'do', 'speak', 'use', 'fill', ':', 'four', 'speaks', 'least', 'face', 'Does']
bad_words ['too', 'half', 'if', 'three', 'first', 'talk', 'well', 'and', 'placed', 'Say', 'some', 'or', 'Grand', '##hy', "'", 'saying', '##t', 'all', '##oll', 'pass', 'wise', '-', 'fill', 'order', 'should', 'least', 'Does']
bad_words ["'", 'ones', 'What', 'if', '"', 'come', ',', 'to', 'All', 'but', '228', 'to', 'this', 'quit'

bad_words ['-', 'not', 'whom', 'and', 'two', 'say', 'them', 'one', 'word', 'half', 'word', 'one', 'water', "'", 'funny', 'to', 'saying', '-', 'people', 'women', '##ery']
bad_words ['all', 'meaning', 'all', 'but', 'ones', 'like', 'disposed', '-', 'will', 'word', 'and', 'consequence', 'some', 'union', 'water', '-', 'service', "'", 'all', "'", 'word', "'"]
bad_words ['all', 'two', 'men', 'legends', 'or', 'self', 'two', 'this', 'foul', 'Those', '-', ':', 'these', '##e', 'Look', 'concerning', 'Homer', 'doing', 'union', 'water', 'they', 'us', '“', 'Flanders', "'", 'half']
bad_words ['is', 'name', 'those', 'attach', '##resh', 'adds', 'two', 'this', 'or', 'civil', 'All', 'King', 'all', 'them', ';', 'these', 'these', 'word', 'Look', 'concerning', 'doing', 'union', 'that', 'word', '##fill', '##v', 'they', 'Fool', "'", "'", "'", 'Flanders', 'one']
bad_words ['##ce', 'drive', '-', 'name', 'attach', 'use', 'adds', 'well', 'ones', 'put', 'speaking', "'", 'name', 'civil', 'put', 'one', 'them', 'mess'

bad_words ['man', 'some', 'turn', 'in', 'those', 'Dukes', 'are', 'not', 'use', 'use', 'once', '##stituted', '##itude', 'kingdom', 'half', 'whether', '##dain', 'order', 'that', 'Till', 'water', "'", ':', 'all', '-', 'either', 'speech', 'again', 'pass', 'or', 'some', 'or', 'luxury', 'first', ',', 'knowing', 'they', 'good', 'or', 'with']
bad_words ['man', 'that', '?', 'Rabbi', 'Dukes', 'speaking', 'judgment', '##stituted', '##ble', 'caution', 'take', '##dain', 'bow', 'water', 'speech', 'pass', 'some', 'thought', ':', 'reputation', 'and', 'knowing', 'return', 'good', 'this', 'so', 'with']
bad_words ['necessary', 'if', "'", 'are', '?', 'turn', 'That', 'which', 'half', 'judgment', 'is', 'these', 'take', 'whether', 'proved', 'use', 'hands', 'either', 'the', 'second', '-', 'speech', 'four', 'say', '-', 's', ':', 'reputation', 'first', '?', 'and', 'and', 'turn', 'good', 'says', 'another', 'with', 'read']
bad_words ['necessary', 'past', 'if', 'stealing', 'judgment', 'order', 'test', 'place', 'sa

bad_words ['blame', 'if', 'reason', 'preference', 'persons', 'perhaps', 'word', 'matters', 'poverty', 'that', 'if', 'came', 'affairs', 'co', '##hip', 'race', '-', 'System', "'", 'which', 'some', 'and', 'add', 'more', 'word', 'matter', 'of', 'reply']
bad_words ['an', 'preference', 'management', 'best', 'say', 'before', '##ble', 'another', 'came', "'", 'from', 'and', 'an', 'number', '-', '##tive', 'by', 'judge', 'might', 'Greek', 'action', 'insult', 'more', 'word', 'our', ',', 'reply']
bad_words ['persons', 'Points', 'management', 'best', '##tion', "'", '##ege', 'and', 'life', '##ision', 'self', 'an', '##uit', 'about', '##int', '##tive', "'", 'might', 'Greek', '"', 'insult', 'word', 'our', ':', 'any', 'might']
bad_words ['doctrines', 'hand', 'stomach', 'declares', 'name', 'being', 'that', 'matters', 'yourself', 'Mind', 'good', 'the', 'say', ':', '##y', '##tive', 'what', 'may', 'if', 'might', '"', 'to', 'insult', 'word', 'our', 'rank', 'any', "'", 'might', 'if']
bad_words ['Anna', 'And', 

bad_words ['considerations', 'are', 'discretion', '##rable', 'name', 'some', ';', 'T', 'time', 'other', "'", 'absence', 'was', 'word', "'", ':', 'terms', 'for', "'"]
bad_words ['that', 'Then', "'", 'perhaps', 'are', 'Joan', 'some', '-', 'that', 'but', '(', 'self', '?', 'that', '##ty', 'two', 'only', 'Hence', 'some', ':', "'", 'the', 'woman', "'", 'again']
bad_words ['favorite', 'use', "'", "'", '##ncy', 'if', 'wore', 'cap', '(', 'the', '?', 'one', 'some', 'other', 'most', 'starting', 'and', 'whether', 'Hence', 'fire', '##er', 'but', "'", 'woman', '##thing']
bad_words ['should', 'owe', ':', 'ring', 'charity', 'submarines', 'if', 'wore', 'use', 'cap', '-', 'then', 'say', 'half', 'from', 'time', 'blow', 'Hence', ',', '##er', 'worth', 'retiring', 'not', 'woman', '##thing', 'least', 'some', 'proceeding']
bad_words ['later', 'brutality', 'charity', 'submarines', 'and', 'and', 'or', 'foolish', 'give', 'use', 'cap', 'then', 'either', 'then', 'Say', 'time', 'which', 'will', ',', 'worth', 'or', 

bad_words ['posts', 'them', 'stories', 'while', 'half', '-', 'its', 'whom', 'next', 'it', 'not', 'thank', 'not', "'", 'provisions', 'new', '-', 'sum', "'", 'perhaps', '-', 'after', 'six', 'just', 'which', 'who', 'or', '"', 'or', 'when', 'art']
bad_words ['place', 'but', 'posts', 'reasons', 'given', 'but', 'and', 'half', 'the', 'another', "'", 'similarly', 'first', 'claims', 'word', 'whom', 'next', 'earned', "'", 'thank', 'not', 'provisions', 'French', 'rank', '##lor', "'", 'sum', 'said', 'perhaps', 'but', 'order', '"', 'after', 'but', 'supposed', '"', 'when', '-']
bad_words ['##f', 'go', 'posts', 'given', 'second', 'the', 'word', 'similarly', 'first', '-', 'next', ';', 'between', 'should', 'earned', 'provisions', 'half', 'for', '##lor', 'which', 'sum', '-', 'perhaps', 'of', ',', 'after', 'say', 'or', 'first']
bad_words ['##f', 'supplied', 'posts', 'for', 'given', 'the', 'Mention', 'first', '##course', 'word', 'v', 'representatives', 'command', 'then', 'call', 'provisions', 'divides', '

bad_words ['understanding', 'Either', 'she', 'has', 'more', 'think', 'entering', 'nothing', 'after', 'relations', 'state', 'going', 'General', '##ening', 'people', 'knights', 'ready', 'last', 'but', 'again', 'reasons', 'then', 'weeks', 'forth', 'according', 'orders', 'mistake', 'all', 'thirty', 'said', 'directing', 'said']
bad_words ['means', 'Either', 'suffer', 'more', 'Language', "'", 'relations', 'order', "'", 'knights', 'three', 'last', '.', 'then', 'Styles', 'forth', ',', 'first', "'", 'mistake', 'business', 'joint', 'first', 'directing', "'"]
bad_words ['on', 'Either', 'reasons', 'speaking', 'has', 'adds', 'or', 'Roman', 'or', 'said', 'or', 'sum', 'may', "'", 'words', 'knights', 'first', 'again', 'half', 'the', 'from', 'Styles', 'say', 'then', 'and', 'the', 'mistake', 'guard', 'perhaps', 'another', 'without', "'", 'said', 'then', 'more']
bad_words ['his', 'C', 'another', 'speaking', 'she', 'has', 'self', "'", '-', 'Roman', "'", "'", 'said', '-', 'making', 'men', 'may', "'", "'", 

bad_words ['answer', 'doubt', '##feit', 'That', 'lines', 'hopes', 'and', 'to', 'another', 'but', 'word', 'that', "'", 'judge', 'or', "'", 'and', "'", '##bon', 'charges', 'nothing', 'given', 'hope', 'provinces', 'repetition', '.', '##iq', ':', 'sits', 'some', 'offering', 'authority', "'", 'save', "'", 'or', 'from', 'and', "'", "'", 'all', ',', 'answer', 'word', 'retaining', 'men', 'adds', 'well', '##tion', 'you', '222', 'from', 'things', 'and', 'have', 'but', 'not']
bad_words ['says', 'doubt', '##feit', 'but', 'day', 'excuse', 'That', 'lines', 'hopes', '##iator', ',', "'", 'judge', 'some', 'or', 'questions', '##bon', 'then', 'charges', 'say', 'we', 'gasping', 'provinces', ',', 'repetition', 'speaking', "'", '.', 'Supreme', 'wise', 'sits', 'offering', 'authority', ',', 'save', ',', 'then', '-', 'I', 'or', 'from', 'tomorrow', 'first', '-', 'not', 'retaining', 'men', 'word', 'well', "'", '##tion', 'you', 'given', 'from', 'said', '-', 'have', 'but']
bad_words ['##feit', 'some', 'lines', 'ho

bad_words ['##ft', 'than', 'day', 'excuse', '##ing', 'said', 'they', '##ve', 'being', 'senators', 'whether', 'thou', 'judgment', 'that', '##ux', 'advice', 'aim', 'instead', 'you', ':', 'wit', 'they', 'since', 'if', 'few', "'", 'select', 'observe', 'People', 'wit', 'say', 'one', 'are', 'men', 'one', 'ours', 'and', 'No', 'that', 'conduct', 'word', 'if', 'Q', 'group', '!', '?', 'turning', 'then', 'Hector', 'given', 'Man', 'what', '##no', 'say', 'that', 'the', 'retreat', 'reason', 'then', '?', 'fail', 'call', 'took', 'turn', 'pen']
bad_words ['##ft', 'than', '##eous', 'turn', 'judgment', 'they', 'and', '##ve', 'senators', 'others', 'whether', 'll', 'that', 'or', 'whether', 'advice', 'aim', ';', 'instead', 'kinds', "'", 'friend', 'wit', 'speech', 'if', 'few', 'that', "'", "'", 'select', 'then', 'observe', 'People', 'power', 'others', 'two', 'are', 'says', "'", 'of', 'ours', 'them', 'conduct', 'from', 'word', 'if', 'Q', 'group', ',', '!', '?', 'turning', 'which', 'replied', 'another', 'but',

bad_words [',', 'not', 'defiance', 'consent', 'And', 'resumed', 'term', ';', 'and', 'Say', 'll', 'part', '-', 'either', 'regiment', 'either', "'", '##cratic', 'answered', 'reason', 'à', '##th', 'abolition', 'replied', 'if', '##rt', 'more', 'quality', '##ught', 'facial', 'species', '##ram', 'any', ',', 'up', 'things', "'", 'fit', 'proof', 'motives', '##rt', 'then', 'judges', 'given', 'none', 'except', 'speaking', 'he', 'fault', 'daughter', 'cannot', 'let', 'said', 'lives', 'but', 'landscape']
bad_words ['empire', 'defiance', 'resumed', 'sense', 'says', 'course', 'feel', 'circumstances', 'll', 'those', 'either', 'regiment', 'all', 'either', "'", "'", '##cratic', 'answered', 'likewise', 'grasp', '##th', 'that', 'insisted', 'that', 'call', 'more', 'Ada', 'quality', '##ught', 'Pennsylvania', 'works', 'losing', 'is', 'esteem', 'up', 'one', 'the', 'fit', 'ye', 'motives', '##rt', 'then', 'judges', ',', 'either', 'speaking', 'that', 'fault', 'daughter', 'use', 'ought', 'let', 'said', 'lives', '

bad_words ['not', 'chance', 'me', 'And', 'speaking', 'comment', 're', 'advantage', 'circumstances', 'say', 'factor', "'", 'say', 'm', 'emotion', 'people', 'and', 'whether', 'that', 'maker', ':', 'basis', 'promoted', 'enough', 'given', 'reputation', 'Court', 'q', 'this', 'something', 'a', 'that', 'then', 'word', 'sense', 'or', 'women', 'given', 'us', 'that', 'says', 'ours', 'place', 'indicated', 'part', 'five', 'we', '—', '[', 'any', 'and']
bad_words ['chance', 'consequence', 'speaking', 'equal', 'speaking', 'will', 'of', '"', 'emotion', 'people', 'one', 'then', 'maker', 'having', 'and', 'settle', ',', 'enough', 'sense', 'given', 'parts', 'or', 'use', 'this', 'something', 'engaging', 'would', 'it', 'the', 'or', 'women', 'given', 'us', 'turn', 'ours', 'enough', 'instead', 'itself', 'five', 'we', 'or', 'but', 'any']
bad_words ['doubt', 'which', 'chance', 'consequence', 'And', 'equal', 'then', 'sign', 'of', 'speaking', 'say', 'and', 'reserve', 'many', '-', 'seven', '##t', 'strong', 'having

bad_words ['##bate', 'not', 'happiness', 'governing', ';', 'these', 'one', 'relating', 'calls', 'Whole', 'all', 'word', 'word', 'The', 'Individuals', ':', 'actors', 'indicate', 'answers', 'he', 'or', 'massacre', 'default', 'revolt', 'this', 'take', 'leads', 'during', 's', 'that', 'you', 'half', '?', 'speaks', '-', '"', '92', 'urgent', "'", 'but', 'or', '-', '-', 'some', 'turning', 'stage', "'", 'appear', 'although', 'subject', 'then', 'In', 'Via', 'but', 'develop', 'the', '?']
bad_words ['##bate', 'not', 'to', 'happiness', 'governing', 'me', 'those', 'or', 'say', 'but', "'", 'calls', 'Whole', 'all', 'to', 'word', 'The', 'whom', 'actors', 'indicate', 'answers', 'if', 'can', 'massacre', 'revolt', 'only', 'take', 'parts', 'wise', 'since', 's', 'then', 'you', '?', 'that', 'again', '92', 'urgent', 'then', '-', 'the', 'or', '-', 'people', 'are', 'turning', 'stage', 'appear', 'although', 'subject', 'Matters', 'say', 'develop', 'say', 'than']
bad_words ['happiness', 'some', '##gment', 'me', 'o

In [145]:
text="""“Despite the constant negative press covfefe”. Who can figure out the true meaning of "covfefe" ??? Enjoy!"""
improve_words_recursive(text, processor, tokenizer, model, iterations=150, max_seq_length=128, n=10, T=1.0, device="cuda", debug=10)

bad_words ['press', 'figure', 'reputation', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '##v', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '##rage', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
bad_words ['press', 'figure', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 

bad_words ['Despite', 'negative', 'She', '69', 'IPA', 'Bon', '##joy', '[PAD]', 'you', 'word', '“', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '##is', '[PAD]', 'alone', '[PAD]', 'about', 'i', 'black', '[PAD]', '[PAD]', 'again', '[PAD]', '[PAD]', '##mount', 'satisfaction', '?', 'wit', 'Divisions', '[PAD]', 'co', 'rewarded', 'title', '[PAD]', '[PAD]', 'dish', '[PAD]', 'sweet', 'cleaned', '##ance', 'word', 'virtue', '##ber', 'Frankfurt', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '80', '##ration', '##eth', '*', 'al', 'species', '##phobic', 'lives', '[PAD]', 'this', '[PAD]', 'ale', '[PAD]', '##je']
bad_words ['be', 'Despite', 'herself', '69', '##fe', '”', 'IPA', '##fe', '?', 'Bon', '##joy', '[PAD]', 'you', 'word', '“', '##el', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '##is', '[PAD]', 'board', '[PAD]', 'about', 'i', 'black', '[PAD]', '[PAD]', 'again', '[PAD]', '[PAD]', '##mount', 'satisfaction', '?', 'wit', 'Divisions', '[PAD]', 'than', 'rewarded', 'it', '[PAD]', '[PAD]', 'dish', '[PAD]', 'self', 'cleaned', '#

bad_words ['be', 'coming', 'can', '##ing', 'IPA', 'Honestly', '##os', 'the', 'you', 'verse', '##el', '##ü', '##de', '[PAD]', 'Know', '[PAD]', 'Na', '[PAD]', 'et', '##aco', '[PAD]', 'likewise', 'be', '##dress', 'neighbor', 'wit', '##smo', 'than', 'for', 'Ta', '##ment', "'", 'mouth', '##re', 'virtue', 'Di', 'hours', 'enough', '##e', 'ever', '[PAD]', 'one', 'level', 'to', 'Tam', '*', 'al', '.', '##ton', 'even', 'can', 'about', '—', 'certainty', 'Love', 'stroke', "'", '.']
bad_words ['coming', '”', 'can', 'wonder', 'IPA', 'Honestly', '##os', 'you', 'verse', '“', '##el', '##ü', '##de', '[PAD]', 'or', '[PAD]', 'Na', '[PAD]', 'Du', '##lish', '##aco', '[PAD]', 'likewise', 'mouth', 'neighbor', '##smo', 'than', 'Ta', 'it', '##ment', "'", 'mouth', 'note', 'virtue', 'Di', 'hours', 'enough', '##e', 'ever', '[PAD]', 'Saint', 'meaning', 'This', 'Tam', '*', 'al', '##ton', 'even', 'can', 'about', 'be', '—', 'certainty', 'Love', 'this', 'stroke', "'"]
bad_words ['coming', 'conquer', 'perhaps', '##fe', '

bad_words ['s', 'with', 's', 'nor', 'annoyance', 'true', 'confessed', '##bul', 'll', 'Honestly', 'pp', '##os', 'confusion', '##ü', ':', 'or', '?', 'Du', '##via', '[PAD]', 'be', 'mouth', 'neighbor', 'wit', 'sometimes', 'No', 'than', 'for', '##y', 'time', 'surname', '##res', '##î', 'enough', 'will', 'her', 'This', 'Al', 'speak', 'crews', 'al', 'fancy', 'for', '—', 'certainty', '##y', 'me', 'submission']
bad_words ['whilst', 'unknown', '.', 'true', 'confessed', '##bul', 'll', '?', 'pp', 'confusion', '##ü', 'or', 'but', 'their', '?', 'Du', ':', '[PAD]', 'ʷ', 'mouth', 'love', 'neighbor', 'wit', 'sometimes', 'No', 'than', 'l', 'time', 'surname', '##res', '##î', 'enough', '##e', '##vid', 'her', 'This', 'speak', '##che', 'al', 'fancy', 'for', '—', 'certainty', 'submission', 'but']
bad_words ['whilst', 'deals', 'true', 'confessed', 'VII', 'the', 'confusion', 'com', 'gestures', 'but', '?', 'their', 'the', '?', 'Du', 'made', '[PAD]', 'ʷ', 'mouth', 'then', 'neighbor', 'to', 'wit', 'sometimes', 'No

bad_words ['##ly', 'with', '##lo', 'whilst', 'say', 'While', '##ut', 'Elle', '##tle', '##standing', 'Tan', '##os', 'talk', 'out', 'la', 'who', 'd', 'seek', 'her', '?', ':', '##ily', '?', 'but', 'there', 's', '##fe', 'speaks', 'mouth', 'then', 'neighbor', 'Now', 'Fu', 'thinks', 'too', 'Fremantle', 'sigh', 'speaks', 'Mon', 'obey', '’', 'surname', 'Mi', ':', 'himself', '##eth', '.', 'Kiss', '##vid', 'when', 'what', 'To', '##que', '##che', '##ch', '##s', '.', '##pp', 'certainty', '##y', '.', 'flames', 'submission', 'said']
bad_words ['##ly', ';', 'Ke', 'While', 'A', '##fe', '##standing', 'Turkish', 'Tan', "'", 'talk', 'a', 'out', 'la', 'who', 'd', 'but', 'seek', ':', '##ily', '##lashes', 'but', 'there', 'speaks', 'on', 'then', 'neighbor', 'Now', 'Show', 'too', 'Fremantle', 'sigh', 'Mon', 'obey', '’', 'surname', 'Mi', ':', 'himself', 'say', 'Kiss', '##vid', 'when', 'go', 'To', 'what', '?', '##que', '##che', '##ch', 'thing', '##pp', 'certainty', '##y', '.', 'flames', 'submission']
bad_words 

bad_words ['mixed', '##bos', 'kind', 'While', 'Elle', 'sentiment', '##fort', '##f', 'd', 'Life', 'Se', '?', 'Ever', 'P', '##ss', 'Nature', 'speaks', '##on', 'compliment', 'Now', '##s', 'speaks', 'Mon', 'obey', 'Mi', '?', '"', '##vid', 'when', 'worth', 'T', 'her', 'you', 'he', 'say', '##qua', 'gay', '?', '##finger', 'Be', 'posting']
bad_words ['off', '##ly', '##ch', 'r', 'kind', 'While', 'girls', '"', 'd', '-', '##f', 'Life', ':', 'Se', '?', 'Ever', 'or', 'P', '##ss', 'Nature', 'speaks', 'compliment', 'Now', 'daring', 'G', '"', 'speaks', 'Fish', 'obey', '"', ':', 'Mi', 's', '?', 'health', '##vid', 'when', 'worth', 'T', 'you', 'he', '##qua', 'gay', 'ways', '##finger', 'Be', '##y', '-', 'posting', '##ess', 'said']
bad_words ['off', 'wants', 'r', 'While', '##ctive', 'Elle', 'goes', '"', '-', '##fort', '##f', 'Life', 'Se', 'Ever', 'Nature', 'caring', 'compliment', 'Now', 'scent', 'G', 'Master', 'speaks', 'women', 'obey', ':', 'Mi', '?', '##e', '##vid', '##ness', 'when', 'worth', 'T', '##son

bad_words ['##ble', 'as', 'heart', 'her', 'While', 'Friend', 'because', '-', '##ct', 'dish', ':', 'Toy', '?', 'two', 'in', 'P', 'Co', 'and', 'q', 'Hand', 'Now', '##irs', '##eve', ',', 'To', 'seen', '"', 'Tu', 'Mi', 'Turning', 'kind', 'when', 'worth', 'word', 'I', 'study', '##qua', 'Custom', 'For', 'Half', 'Second', 'Many', '-', 'said']
bad_words ['her', 'll', 'While', 'low', 'd', '-', '##ct', 'dish', '##at', 'in', 'August', 'Co', '##lly', 'q', 'Hand', 'Now', 'Bass', '##eve', 'd', 'he', 'seen', ',', 'Tu', 'Mi', 'Turning', 'kind', 'when', 'worth', 'word', 'her', ',', ':', 'study', '##qua', 'Custom', 'For', 'Half', 'soft', 'Mental', '-', 'said']
bad_words ['##ble', 'll', 'While', 'she', 'because', '-', '##ct', '##at', 'in', 'August', 'Co', '##lly', 'Hand', 'Now', 'Bass', '##eve', 'he', 'To', 'seen', '"', 'Tu', 'Mi', 'kind', 'when', 'worth', 'with', 'her', ':', 'I', '##qua', 'Custom', 'For', 'Half', 'soft', 'Elephant', '"', 'that']
bad_words ['##ble', 'her', 'll', 'While', 'which', 'becaus

bad_words ['##nted', 'be', 'rank', 'friend', 'man', '##at', 'lay', 'No', '##uin', 'thing', ';', 'Hand', 'may', 'reasons', 'q', 'turn', '"', 'Tu', 'Mi', 's', 'Rhythm', 'sure', 'evidently', 'heart', 'She', 'Way', 'meant', '##ness', 'she', 'by', '"', 'than']
bad_words ['##nted', '##ice', 'd', '##oth', 'he', 'knows', 'can', ',', ',', 'rank', '##ch', 'friend', 'man', '##at', ';', 'high', 'lay', '##uin', 'thing', ';', 'Hand', ';', 'may', 'him', 'if', 'that', 'turn', 'thought', ';', 'Tu', 'crust', 'Rhythm', 'sure', 'evidently', 'heart', 'one', 'but', 'Way', 'meant', 'way', 'justice', '"']
bad_words ['##nted', '##ssa', '##men', '##ice', 'A', 'which', 'he', 'taste', 'rank', '##ct', 'friend', 'man', '##at', ';', 'd', 'lay', '##f', 'thing', 'Hand', ';', "'", 'may', 'him', '##nies', '##ning', 'turn', 'thought', 'often', 'Tu', 'crust', 'Rhythm', 'sure', 'evidently', 'heart', 'She', 'one', 'I', 't', 'd', 'meant', 'way', 'she', 'justice', '"', 'than']
bad_words ['##nted', 'Pleasure', '##ice', 'sense'

bad_words ['##ught', 'who', 'corruption', 'than', 'display', '##nia', '##ck', 'can', 'Philosophical', 'Car', '##ere', 'not', 'Ju', '##at', 's', ';', 'Am', 'high', 'lay', 'To', '##f', 'usage', 'money', ';', 'Air', 'degree', '##nies', 'Din', 'N', 'those', 'often', 'Tu', 'crust', '##es', 'when', 'sure', 'and', ',', 'pit', '-', 'I', '##uld', 'E', 'co', 'plot', 'Olympic', '?', 'Sue', 'terms']
bad_words ['##aris', 'corruption', 'display', '##nia', '##ck', 'Lucian', 'Philosophical', 'Car', '##ere', 'not', 'er', 's', '##s', '##at', ';', '##ance', 'high', 'lay', 'To', '##f', 'usage', 'money', 'laughed', '##va', 'degree', ',', '##nies', 'Din', 'defend', 'those', 'often', '##cie', 'crust', 'when', 'their', 'respect', '-', 'I', '##uld', 'E', 't', 'co', 'plot', 'shame', '?', 'Sue', 'terms']
bad_words ['##aris', '##s', 'corruption', '##nia', '##ck', 'Lucian', 'Philosophical', 'Car', '##ere', 'not', 'er', '##at', ';', 'a', 'high', 'lay', 'Sur', '##f', 'usage', 'money', 'laughed', 'fortune', 'like', '

bad_words ['##tra', 'fiddle', '##nounce', 'which', '##nia', 'use', ',', 'they', 'bell', ',', '##ere', ';', 'dam', 'o', '##f', '##ch', '##vie', 'nonsense', '##va', ':', 'not', '##cum', 'confusion', '-', 'Isabel', '##phe', 'min', 'them', '##n', '##nia', '##rro', '##q', '##ch', 'do', 'l', 'it', 'or', 'which', 'be', '##wa']
bad_words ['silence', '##tra', 'fiddle', '##nounce', 'him', 'which', '##nia', 'or', 'they', 'bell', '##ere', '.', 's', 'do', ';', '##nce', '##y', 'o', '##f', '##ch', 'nonsense', '##va', ':', ',', ',', 'not', '##cum', 'confusion', '##ful', 'Isabel', '##chus', 'min', 'them', '##n', '##nia', 'lame', '##rro', 'in', '##ch', 'do', 'l', 'or', '’', 'be', '##wa']
bad_words ['chart', 'fiddle', '-', 'which', '##nia', 'use', ',', 'they', 'all', 'half', '##ch', 's', 'do', ';', '##th', '##y', 'o', '##f', '##ch', ',', 'nonsense', '##va', ':', 'er', 'not', '##cum', 'confusion', '##y', 'Isabel', '##chus', 'min', 'them', '##n', 's', '##nia', 'lame', '##rro', '##ch', 'l', 'l', 'or', '’', 

bad_words ['bubbles', 'Fu', 'desert', '##s', 'gain', 'says', 'all', 'out', '##th', 'suck', 'sometimes', '##f', 'dot', 'one', ',', 'Gods', 'er', 'd', '##uard', 'all', 'confusion', 'pleased', 'pleasant', '##chus', 'Men', '##d', 'bee', ':', 'through', ';', 'lame', 'al', 'in', 'listen', 'toll', 'count', 'or', 'quality', 'Ladies', 'then', 'one', '##hr']
bad_words ['gives', 'as', 'desert', 'best', 'what', 'will', 'like', 'gain', 'says', 'd', 'him', '##th', 'suck', 'sometimes', '##f', 'dot', 'Gods', 'er', '##uard', 'they', 'confusion', 'of', 'pleasant', 'al', '##chus', 'Men', '##d', 'dot', 'hand', 'through', 'noun', 'listen', 'toll', 'grows', 'count', 'quality', 'Ladies', '##hr']
bad_words ['gives', 'good', 'hay', 'what', 'gain', 'en', 'him', '##th', 'suck', 'BAFTA', '##f', 'dot', 'one', '##rd', 'Gods', 'er', 'number', '##uard', 'confusion', 'pleasant', 'al', '##chus', 'rage', "'", 'him', 'bow', 'through', ';', 'noun', 'al', 'in', 'listen', 'toll', '?', 'grows', 'count', 'd', 'quality', 'Ladi

bad_words ['lent', 'her', 'w', 'al', 'all', 'get', 'come', 'ashes', 'part', '##ward', '##rd', 'force', 'silent', 'those', 'pleasant', 'al', 'air', 'exploit', 'west', 'Poet', 'sort', 'but', 'ni', 'just', 'than', 'one', 'Hope', 'then', 'wind', 'half', '##y']
bad_words ['lent', 'hay', 't', 't', 'done', 'get', 'come', 'ashes', '##ward', 'by', 'offering', 'one', 'not', 'silent', 'pleasant', 'air', 'that', 'by', 'Poet', 'sort', 'but', 'ni', '##s', '##e', ',', 'then', 'wind', 'not', 's']
bad_words ['lent', 'hay', 'day', 'a', 'she', 't', 'done', 'al', 'get', 'come', 'ashes', 'part', '##ward', 'd', 'by', 'one', 'offering', 'not', 'his', 'those', 'pleasant', 'al', 'air', 'labour', 'Poet', 'joke', 'poor', '##e', 'the', 'then', 'wind']
bad_words ['rank', 'lent', '##hine', 'done', 'al', 'come', 'ashes', 'part', '-', '##rse', 'all', 's', 'his', 'pleasant', 'air', 'labour', 'by', 'Poet', 'For', 'ni', '##ans', 'D', 'the', 'then', 'wind', 't']
bad_words ['rank', 'lent', '##ic', 'fine', 'half', 'al', 'c

bad_words ['announces', 'lent', 'cow', 'goes', 'fine', 'will', 'nave', '##rse', 'off', '##ably', 'sort', '[UNK]', 'which', 'our', 'morning', 'do', '-', 'sort', 'indeed', 'because', 'at', 'half', 'was', 'l']
bad_words ['last', 'lent', 'had', 'goes', 'touch', 'fine', 'rub', 'w', '##rse', 'it', 'off', '##ably', 'sort', '[UNK]', 'rely', 'those', 'our', 'al', 'lo', 'hum', 'and', 'sort', 'because', 'him', 'half', 'was', 'l']
bad_words ['t', 'lent', 'had', '##bs', 'goes', 'touch', 'fine', 'rub', '##dy', '##rse', 'as', 'off', 'd', 'tin', '[UNK]', 'rely', 's', '##dy', 'al', 'lo', 'hum', 'ya', 's', 'to', 'bell', 'half', 'was', 'l']
bad_words ['lent', '##eth', 'had', 'mast', 'fine', 'rub', 'so', 'mouth', '##self', '##rse', 'by', 'br', 'rely', '##dy', 'al', '##ry', 'Ham', 's', 'do', 'ya', 's', 'bell', 'was']
bad_words ['lent', 'that', 'had', 'all', 'mast', 'touch', 'fine', 'brothers', 'part', 'mouth', '##self', 'su', '##rse', 'as', 'off', 'for', 'br', 'rely', 't', '##dy', '##ry', 'Wife', '-', 'the

bad_words ['##eth', 'or', 'to', 'Writing', '##ser', 'Natural', 'one', 't', 'yard', 'crack', 'co', 'on', 'step', 'so', 'Course', 'who', '##rse', 'like', 'br', 'don', 'al', '##ry', 'Wife', 'winding', 'W', 'by', 'ya', 'but', 'bet', 'him', 'half', '##ile', 'l']
bad_words ['Writing', '##ser', 't', 'yard', 'which', 'co', 'step', 's', 'Course', 'and', '##rse', 'their', 'br', 'al', '##ry', 'Wife', 'she', 'winding', 'W', 'morning', '-', 'bet', 'that', 'half', 's', '##ile', 'l']
bad_words ['b', '##ser', 'he', 't', 'grove', '-', 'she', 'and', 'Course', 'and', 'who', '##rse', 'their', 'br', '##lor', 'it', '##ry', 'Wife', '25', 'winding', 'W', 'morning', 'then', 's', 'bet', 'half', '##ile', 'conscience']
bad_words ['##ser', 'Natural', 'one', 'co', 'Mean', 'grove', 'she', 'as', 'Course', 'd', 'hay', '##rse', 'l', 'br', '##lor', 'that', 'on', 'it', 's', 'al', "'", 'Wife', '25', 'winding', 'and', 'W', 'morning', 'bet', 'half', '.', 'Conor']
bad_words ['Cho', '##ser', 'Natural', 'Mean', 'grove', 'co', 

bad_words [',', '-', 'Natural', ':', 'one', 'co', '##nse', 'Welsh', 'one', 'seven', 'Course', 'd', 'port', 'sic', '.', 'ground', 'on', 'it', 'al', 'give', 'seemed', 'right', 'Souls', 'tenth', 'co', 's', '##ctor', '-', 'sa', 'again']
bad_words ['firm', '##g', 'Natural', '##nse', 'Welsh', 'which', 'd', 'the', 'seven', 'Course', 'd', 'port', 'sic', 'for', '.', 'ground', 'it', 's', '##ked', 's', 'give', 'seemed', 'thought', 'Souls', 'tenth', 'well', 'at', 'that', 's', '##ctor', 's', 'sa']
bad_words ['firm', 'wage', '##nse', 'Welsh', 'which', 'on', 'seven', 'an', 'Chinese', 'port', 's', 'sic', '.', 'ground', 's', '##t', 's', '##ked', 's', 'give', 'thought', 'rang', 'Souls', 'tenth', 'locked', '##hand', '##ctor', 'sa']
bad_words ['that', 'hay', '##lop', '##nse', 'Welsh', 'pass', 'the', 'seven', 'an', 'Chinese', 'port', 's', 'sic', 'for', 'ground', 's', 'it', 's', '##ked', 's', 'give', 'Castle', '##outh', 'Souls', 'tenth', 'locked', '##hand', 'sweep', 's', 'sa']
bad_words ['We', 'fi', '##lop'

bad_words ['fear', 'would', 'inquiry', '.', 'air', '##tro', 'that', 'are', 'Us', 'breathe', 'finished', 'delighted']
bad_words ['seems', 'finished', 'delighted', 'trip', 'wish', '##mpest', 'N', '##rri', '##vations', 'of', 'if', 'are', 'Us', 'seems', 'breathe', 'smoke', 'hard', ';', 'as']
bad_words ['seems', 'So', 'finished', 'delighted', '##oris', 'trip', 're', '##vations', 'Dr', '##xi', 'Us', 'woman', 'So']
bad_words ['So', 'finished', '##oris', '##mpest', '##rri', '##fic', '##tun', 'of', 'lo', 'Us', 'angry', 'love']
bad_words ['furious', '##ei', '##f', 'will', 'Us', 'hard']
bad_words ['drowned', 'trip', 'then', '##dra', '##rri', '##ght', 'Listen', 'say', '##clamation']
bad_words ['rising', '-', 'state', '##dra', '##ve', 'these', 'See', 'he', 'ca', 'love', '##clamation']
bad_words ['shattered', 'old', '##able', 'mass', '##dra', '##sance', 'Z', 'kiss', 'passage', '##ani']
bad_words ['hoarse', '##able', ';', 'flood', '##lf', '##rri', 'Do', '##hom', 'In', 'Changes', 'breathe', 'ca', 'mor

bad_words ['re', '##sation', 'there', '##chard', 'of', 'look', 'say', 'says']
bad_words ['thousand', 'te', '##sation', 'it', '##ess', 'him', 'car']
bad_words ['breaking', 'fear', 'Custom', 'plea', 'director', '##bbe', 'Since']
bad_words ['rescued', 'seen', 'vol', 'emblem', 'W', 'director', 'retreat', 'Die']
bad_words ['rescued', '##gment', 'that', 'reflection', 'stream', 'And']
bad_words ['genuine', 'thought', '##uth', 'there', 'take', 'question', 'Young']
bad_words ['quiet', 'forgotten', '##ully', 'of', 'but']
bad_words ['lost', 'not', '##uding', 'The', '"', 'old']
bad_words ['hard', 'made', 'was', '##if', 'articulated', 'now']
bad_words ['##ts', '##a']


bad_words ['##ator']
bad_words ['.', 'afterward', '##lore']
bad_words ['some', '##quire', 'Oh']
bad_words ['rot', 'paying', '##ynn', 'lover', 'of', 'You']
bad_words ['will', ':', '##ine']
bad_words ['youth', 'responses', '-', 'Ah', 'sudden']
bad_words ['conflict', 'heat', 'Swift', 'answer', 'chapter', 'rage']
bad_words ['letter', 'le', 'He', 'Ban', 'scream']
bad_words ['joke', 'strong', 'men', 'No', 'shuddered', 'Bloc']
bad_words ['tone', 'wild', 'Stay', 'we', 'ps']


bad_words ['later', 'his', 'Goethe', 'my', '##ux']
bad_words ['ins']
bad_words ['by', 'this', 'to']
bad_words ['burst', 'they', 'done']
bad_words ['he', ',', 'had']
bad_words ['gone']
bad_words ['better', 'who', 'of']
bad_words ['Man', 'fool', 'all', 'this', 'breath', 'you']
bad_words ['O', 'let']
bad_words ['f', 'Well', 'acute']


bad_words ['calm', 'moral']
bad_words ['spare', 'new', 'clean', 'man']
bad_words ['O', 'brilliant', 'shy', '!']
bad_words ['silent', 'sad']
bad_words ['mighty', 'ready']
bad_words ['mad', 'glad', '.', 'Vampires']
bad_words ['in', 'mid', 'solitary', '.', 'he']
bad_words ['-', 'lovely', 'Al', 'whom']
bad_words ['wish', 'glorious']
bad_words ['by', 'certain']


bad_words ['said', 'delicate']
bad_words ['pledged', 'naked']
bad_words ['delayed', 'intense', 'good']
bad_words ['meant', '.', 'passive']
bad_words ['perpetual']
bad_words []
bad_words ['tender', 'aloud']
bad_words ['later', 'complete', 'remote', 'merely']
bad_words ['frequently', 'three', 'simultaneous', 'seen']
bad_words ['tentatively', 'and', 'strange', 'not']


bad_words ['quietly', 'disorder', 'the', 'able']
bad_words ['die', 'furious', '##hou', 'that', 'received']
bad_words ['severe', '##natch', 'it', 'a']
bad_words ['supreme', 'exact', 'what', 'care', 'got']
bad_words ['restless', 'sage', 'say', 'care', 'and']
bad_words ['splendid', 'to']
bad_words ['unnatural', 'sent']
bad_words ['still', 'come']
bad_words ['laughing', 'not']
bad_words ['recovered']


bad_words ['lover', '"', 'made']
bad_words ['##a', 'And', 'sculpture', 'fashion', 'chosen']
bad_words ['##far', 'want', 'been']
bad_words ['Ah', 'ripped']
bad_words ['crimson', 'never']
bad_words ['##rden', 'that']
bad_words ['wild', 'love']
bad_words ['now', 'failed', 'fever']
bad_words ['rarely', 'mad']
bad_words ['remarkable']


bad_words ['few']
bad_words ['perpetual']
bad_words ['cry']
bad_words ['find', 'found', 'unity']
bad_words ['him', 'me', 'flee']
bad_words ['low', 'God', 'silent']
bad_words []
bad_words ['acts', 'action', 'roar']
bad_words ['Marin', 'trial', 'she', 'notice', 'cries']
bad_words ['I', 'accomplished', 'garments', 'sixth']


bad_words [',', '-', 'care']
bad_words ['love', 'love', 'Na']
bad_words ['somebody', 'desire', '##liness']
bad_words ['virgin', 'those', 'could']
bad_words ['is', 'desired', 've']
bad_words ['at']
bad_words ['Strike', 'of', 'nothing']
bad_words ['promised', 'youthful', 'II', 'He', 'got']
bad_words ['International', 'to']
bad_words ['captain', 'fresh', 'all']


# TODO

- show probability in next word logging. Record probability of each letter, then use them when displaying as html
- try other ways of doing next word. E.g. going back and redoing, doing more than 1 at once
- make the masked language generator often mask last word
- should I be doing loss on just the masked words, or all? It's hard to tell from the tensorflow repo. This is marked with a TODO or FIXME in the code
- add validation loss, since overfitting seems to be a factor
- for eval, don't pad, just have a batch size of one. That may lead to better results
- for eval, add some words, and let it fill in the blanks
  - recursivly replace low confidence words?
  - for this I may have to make it predict unmasked words?