In [1]:
from __future__ import absolute_import, division, print_function

import argparse
import csv
import logging
import os
import random
import math
import sys
import re

from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertModel, BertForMaskedLM

from sklearn.metrics.pairwise import cosine_similarity as cosine

from scipy.special import softmax

import openpyxl

from pathlib import Path

from PPDB import Ppdb
from nltk.tokenize import word_tokenize

# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import numpy as np
import torch
import nltk
#from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

from collections import defaultdict

from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset)
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule

from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


In [2]:
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids):
        self.unique_id = unique_id
        self.tokens = tokens
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.input_type_ids = input_type_ids

def convert_sentence_to_token(sentence, seq_length, tokenizer):
  
    tokenized_text = tokenizer.tokenize(sentence)

    if len(tokenized_text) > seq_length - 2:
        tokenized_text = tokenized_text[0:(seq_length - 2)]

    position =[]
    special =[]
    isSpecial = False

    whole_word = ''
    words = []

    start_pos =  1

    connect_sign = 0
    for index in range(len(tokenized_text)-1):
        
        if(tokenized_text[index+1]=="-" and tokenized_text[index+2]!="-"):
            
            whole_word += tokenized_text[index]
            special.append(start_pos+index)
            continue

        if(tokenized_text[index]=="-"):
            
            whole_word += tokenized_text[index]
            special.append(start_pos+index)

            if(tokenized_text[index-1]=="-"):
                words.append(whole_word)
                position.append(special)
                special = []
                whole_word = ''
            continue

        if(tokenized_text[index]!="-" and tokenized_text[index-1]=="-"):
            whole_word += tokenized_text[index]
            words.append(whole_word)
            whole_word = ''
            special.append(start_pos+index)
            position.append(special)
            special = []
            continue    

        if(tokenized_text[index+1][0:2]=="##"):
            special.append(start_pos+index)
            whole_word += tokenized_text[index]
            isSpecial = True
            continue
        else:
            if isSpecial:
                isSpecial = False
                special.append(start_pos+index)
                position.append(special)
                whole_word += tokenized_text[index]
                whole_word = whole_word.replace('##','')
                words.append(whole_word)
                whole_word = ''
                special =  []
            else:
                position.append(start_pos+index)
                words.append(tokenized_text[index])

    if isSpecial:
        isSpecial = False
        special.append(start_pos+index+1)
        position.append(special)
        whole_word += tokenized_text[index+1]
        whole_word = whole_word.replace('##','')
        words.append(whole_word)
    else:
        position.append(start_pos+index+1)
        words.append(tokenized_text[index+1])
       
    return tokenized_text, words, position

def convert_whole_word_to_feature(tokens_a, mask_position, seq_length, tokenizer, prob_mask):
    """Loads a data file into a list of `InputFeature`s."""

    #tokens_a = tokenizer.tokenize(sentence)
    #print(mask_position)
    #print("Convert_whole_word_to_feature")
    #print(tokens_a)

    tokens = []
    input_type_ids = []
    tokens.append("[CLS]")
    input_type_ids.append(0)


    class ClassName(object):


    	"""docstring for ClassName"""



    	def __init__(self, arg):

    		super(ClassName, self).__init__()


    		self.arg = arg

    		

    
    len_tokens = len(tokens_a)
    first_sentence_mask_random = random.sample(range(0,len_tokens), int(prob_mask*len_tokens))

    mask_index = []

    for mask_pos in mask_position:
        mask_index.append(mask_pos-len_tokens-2)

    for i in range(len_tokens):

        if i in mask_index:
            tokens.append(tokens_a[i])
        elif i in first_sentence_mask_random:
            tokens.append('[MASK]')
        else:
            tokens.append(tokens_a[i])
        input_type_ids.append(0)
    
    tokens.append("[SEP]")
    input_type_ids.append(0)

    

    true_word = ''
    index = 0
    count = 0
    mask_position_length = len(mask_position)

    while count in range(mask_position_length):
        index = mask_position_length - 1 - count

        pos = mask_position[index]
        if index == 0:
            tokens[pos] = '[MASK]'
        else:
            del tokens[pos]
            del input_type_ids[pos]

        count += 1

    #print(tokens)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
    input_mask = [1] * len(input_ids)

        # Zero-pad up to the sequence length.
    while len(input_ids) < seq_length:
        input_ids.append(0)
        input_mask.append(0)
        input_type_ids.append(0)

    assert len(input_ids) == seq_length
    assert len(input_mask) == seq_length
    assert len(input_type_ids) == seq_length

      
    return InputFeatures(unique_id=0,  tokens=tokens, input_ids=input_ids,input_mask=input_mask,input_type_ids=input_type_ids)
    

def convert_token_to_feature(tokens_a, mask_position, seq_length, tokenizer, prob_mask):
    """Loads a data file into a list of `InputFeature`s."""

    #tokens_a = tokenizer.tokenize(sentence)
    #print(mask_position)
    #print("----------")
    #print(tokens_a)

    tokens = []
    input_type_ids = []
    tokens.append("[CLS]")
    input_type_ids.append(0)

    len_tokens = len(tokens_a)
    #print("length of tokens: ", len_tokens)

    first_sentence_mask_random = random.sample(range(0,len_tokens), int(prob_mask*len_tokens))

    for i in range(len_tokens):

        if i==(mask_position-len_tokens-2):
            tokens.append(tokens_a[i])
        elif i in first_sentence_mask_random:
            tokens.append('[MASK]')
        else:
            tokens.append(tokens_a[i])
        input_type_ids.append(0)
    
    tokens.append("[SEP]")
    input_type_ids.append(0)


    true_word = ''
    true_word = tokens[mask_position]
    tokens[mask_position] =  '[MASK]'

    #print(tokens)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
    input_mask = [1] * len(input_ids)

        # Zero-pad up to the sequence length.
    while len(input_ids) < seq_length:
        input_ids.append(0)
        input_mask.append(0)
        input_type_ids.append(0)

    assert len(input_ids) == seq_length
    assert len(input_mask) == seq_length
    assert len(input_type_ids) == seq_length

      
    return InputFeatures(unique_id=0,  tokens=tokens, input_ids=input_ids,input_mask=input_mask,input_type_ids=input_type_ids)
    

def getWordmap(wordVecPath):
    words=[]
    We = []
    f = open(wordVecPath,'r', encoding="utf-8")
    lines = f.readlines()

    for (n,line) in enumerate(lines):
        if (n == 0) :
            print(line)
            continue
        word, vect = line.rstrip().split(' ', 1)
                    
        vect = np.fromstring(vect, sep=' ')
                
        We.append(vect)

        words.append(word)

        #if(n==200000):
        #    break
    f.close()       
    return (words, We)


def getWordCount(word_count_path):
    word2count = {}
    xlsx_file = Path('',word_count_path)
    wb_obj = openpyxl.load_workbook(xlsx_file)
    sheet = wb_obj.active

    last_column = sheet.max_column-1
    for i, row in enumerate(sheet.iter_rows(values_only=True)):
        if i==0:
            continue
        word2count[row[0]] = round(float(row[last_column]),3)
        
    return word2count

def read_eval_index_dataset(data_path, is_label=True):
    sentences=[]
    mask_words = []
    mask_labels = []

    with open(data_path, "r", encoding='ISO-8859-1') as reader:
        while True:
            line = reader.readline()
            
            if not line:
                break
            
            sentence,words = line.strip().split('\t',1)
                #print(sentence)
            mask_word,labels = words.strip().split('\t',1)
            label = labels.split('\t')
                
            sentences.append(sentence)
            mask_words.append(mask_word)
                
            one_labels = []
            for la in label[1:]:
                if la not in one_labels:
                    la_id,la_word = la.split(':')
                    one_labels.append(la_word)
                
                #print(mask_word, " ---",one_labels)
            mask_labels.append(one_labels)
            
    return sentences,mask_words,mask_labels

def read_eval_dataset(data_path, is_label=True):
    sentences=[]
    mask_words = []
    mask_labels = []
    id = 0

    with open(data_path, "r", encoding='ISO-8859-1') as reader:
        while True:
            line = reader.readline()
            if is_label:
                id += 1
                if id==1:
                    continue
                if not line:
                    break
                sentence,words = line.strip().split('\t',1)
                #print(sentence)
                mask_word,labels = words.strip().split('\t',1)
                label = labels.split('\t')
                
                sentences.append(sentence)
                mask_words.append(mask_word)
                
                one_labels = []
                for la in label:
                    if la not in one_labels:
                        one_labels.append(la)
                
                #print(mask_word, " ---",one_labels)
                    
                mask_labels.append(one_labels)
            else:
                if not line:
                    break
                #print(line)
                sentence,mask_word = line.strip().split('\t')
                sentences.append(sentence)
                mask_words.append(mask_word)
    return sentences,mask_words,mask_labels

def BERT_candidate_generation(source_word, pre_tokens, pre_scores, ps, num_selection=10):

    cur_tokens=[]
   

    source_stem = ps.stem(source_word)

    assert num_selection<=len(pre_tokens)

    for i in range(len(pre_tokens)):
        token = pre_tokens[i]
     
        if token[0:2]=="##":
            continue

        if(token==source_word):
            continue

        token_stem = ps.stem(token)

        if(token_stem == source_stem):
            continue

        if (len(token_stem)>=3) and (token_stem[:3]==source_stem[:3]):
            continue

        cur_tokens.append(token)
        

        if(len(cur_tokens)==num_selection):
            break
    
    if(len(cur_tokens)==0):
        cur_tokens = pre_tokens[0:num_selection+1]
        

    assert len(cur_tokens)>0       

    return cur_tokens

def cross_entropy_word(X,i,pos):
    
    #print(X)
    #print(X[0,2,3])
    X = softmax(X,axis=1)
    loss = 0
    loss -= np.log10(X[i,pos])
    return loss


def get_score(sentence,tokenizer,maskedLM):
    tokenize_input = tokenizer.tokenize(sentence)

    len_sen = len(tokenize_input)

    START_TOKEN = '[CLS]'
    SEPARATOR_TOKEN = '[SEP]'

    tokenize_input.insert(0, START_TOKEN)
    tokenize_input.append(SEPARATOR_TOKEN)

    input_ids = tokenizer.convert_tokens_to_ids(tokenize_input)

    #tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
    #print("tensor_input")
    #print(tensor_input)
    #tensor_input = tensor_input.to('cuda')
    sentence_loss = 0
    
    for i,word in enumerate(tokenize_input):

        if(word == START_TOKEN or word==SEPARATOR_TOKEN):
            continue

        orignial_word = tokenize_input[i]
        tokenize_input[i] = '[MASK]'
        #print(tokenize_input)
        mask_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
        #print(mask_input)
        mask_input = mask_input.to('cuda')
        with torch.no_grad():
            att, pre_word =maskedLM(mask_input)
        word_loss = cross_entropy_word(pre_word[0].cpu().numpy(),i,input_ids[i])
        sentence_loss += word_loss
        #print(word_loss)
        tokenize_input[i] = orignial_word
        
    return np.exp(sentence_loss/len_sen)


def LM_score(source_word,source_context,substitution_selection,tokenizer,maskedLM):
    #source_index = source_context.index(source_word)

    source_sentence = ''

    for context in source_context:
        source_sentence += context + " "
    
    source_sentence = source_sentence.strip()
    #print(source_sentence)
    LM = []

    source_loss = get_score(source_sentence,tokenizer,maskedLM)

    for substibution in substitution_selection:
        
        sub_sentence = source_sentence.replace(source_word,substibution)

        
        #print(sub_sentence)
        score = get_score(sub_sentence,tokenizer,maskedLM)

        #print(score)
        LM.append(score)

    return LM,source_loss


def preprocess_SR(source_word, substitution_selection, fasttext_dico, fasttext_emb, word_count):
    ss = []
    ##ss_score=[]
    sis_scores=[]
    count_scores=[]

    isFast = True

    if(source_word not in fasttext_dico):
        isFast = False
    else:
        source_emb = fasttext_emb[fasttext_dico.index(source_word)].reshape(1,-1)

    #ss.append(source_word)

    for sub in substitution_selection:

        if sub not in word_count:
            continue
        else:
            sub_count = word_count[sub]

        if(sub_count<=3):
            continue

        #if sub_count<source_count:
         #   continue
        if isFast:
            if sub not in fasttext_dico:
                continue

            token_index_fast = fasttext_dico.index(sub)
            sis = cosine(source_emb, fasttext_emb[token_index_fast].reshape(1,-1))

            #if sis<0.35:
            #    continue
            sis_scores.append(sis)

        ss.append(sub)
        count_scores.append(sub_count)

    return ss,sis_scores,count_scores

def compute_context_sis_score(source_word, sis_context, substitution_selection, fasttext_dico, fasttext_emb):
    context_sis = []

    word_context = []

    

    for con in sis_context:
        if con==source_word or (con not in fasttext_dico):
            continue

        word_context.append(con)

    if len(word_context)!=0:
        for sub in substitution_selection:
            sub_emb = fasttext_emb[fasttext_dico.index(sub)].reshape(1,-1)
            all_sis = 0
            for con in word_context:
                token_index_fast = fasttext_dico.index(con)
                all_sis += cosine(sub_emb, fasttext_emb[token_index_fast].reshape(1,-1))

            context_sis.append(all_sis/len(word_context))
    else:
        for i in range(len(substitution_selection)):
            context_sis.append(len(substitution_selection)-i)

            
    return context_sis


def substitution_ranking(source_word, source_context, substitution_selection, fasttext_dico, fasttext_emb, word_count, ssPPDB, tokenizer, maskedLM, lables):

    ss,sis_scores,count_scores=preprocess_SR(source_word, substitution_selection, fasttext_dico, fasttext_emb, word_count)

    #print(ss)
    if len(ss)==0:
        return source_word

    if len(sis_scores)>0:
        seq = sorted(sis_scores,reverse = True )
        sis_rank = [seq.index(v)+1 for v in sis_scores]
    
    rank_count = sorted(count_scores,reverse = True )
    
    count_rank = [rank_count.index(v)+1 for v in count_scores]
  
    lm_score,source_lm = LM_score(source_word,source_context,ss,tokenizer,maskedLM)

    rank_lm = sorted(lm_score)
    lm_rank = [rank_lm.index(v)+1 for v in lm_score]
    

    bert_rank = []
    ppdb_rank =[]
    for i in range(len(ss)):
        bert_rank.append(i+1)

        if ss[i] in ssPPDB:
        	ppdb_rank.append(1)
        else:
        	ppdb_rank.append(len(ss)/3)

    if len(sis_scores)>0:
        all_ranks = [bert+sis+count+LM+ppdb  for bert,sis,count,LM,ppdb in zip(bert_rank,sis_rank,count_rank,lm_rank,ppdb_rank)]
    else:
        all_ranks = [bert+count+LM+ppdb  for bert,count,LM,ppdb in zip(bert_rank,count_rank,lm_rank,ppdb_rank)]
    #all_ranks = [con for con in zip(context_rank)]


    pre_index = all_ranks.index(min(all_ranks))

    #return ss[pre_index]

    pre_count = count_scores[pre_index]

    if source_word in word_count:
    	source_count = word_count[source_word]
    else:
    	source_count = 0

    pre_lm = lm_score[pre_index]

    #print(lm_score)
    #print(source_lm)
    #print(pre_lm)


    #pre_word = ss[pre_index]


    if source_lm>pre_lm or pre_count>source_count:
    	pre_word = ss[pre_index]
    else:
    	pre_word = source_word

    
    return pre_word


def evaulation_SS_scores(ss,labels):
    assert len(ss)==len(labels)

    potential = 0
    instances = len(ss)
    precision = 0
    precision_all = 0
    recall = 0
    recall_all = 0

    for i in range(len(ss)):

        one_prec = 0
        
        common = list(set(ss[i]).intersection(labels[i]))

        if len(common)>=1:
            potential +=1
        precision += len(common)
        recall += len(common)
        precision_all += len(ss[i])
        recall_all += len(labels[i])

    potential /=  instances
    precision /= precision_all
    recall /= recall_all
    F_score = 2*precision*recall/(precision+recall)

    return potential,precision,recall,F_score


def evaulation_pipeline_scores(substitution_words,source_words,gold_words):

    instances = len(substitution_words)
    precision = 0
    accuracy = 0
    changed_proportion = 0

    for sub, source, gold in zip(substitution_words,source_words,gold_words):
        if sub==source or (sub in gold):
            precision += 1
        if sub!=source and (sub in gold):
            accuracy += 1
        if sub!=source:
            changed_proportion += 1

    return precision/instances,accuracy/instances,changed_proportion/instances




def extract_context(words, mask_index, window):
    #extract 7 words around the content word

    length = len(words)

    half = int(window/2)

    assert mask_index>=0 and mask_index<length

    context = ""

    if length<=window:
        context = words
    elif mask_index<length-half and mask_index>=half:
        context = words[mask_index-half:mask_index+half+1]
    elif mask_index<half:
        context = words[0:window]
    elif mask_index>=length-half:
        context = words[length-window:length]
    else:
        print("Wrong!")

    return context

def preprocess_tag(tag):
    if tag[0] =="V" or tag[0]=="N":
        return tag
    if tag[0]=="R":
        return "r"
    if tag[0]=="J" or tag[0]=="I":
        return 'a'
    else:
        return 's'  


In [3]:
parser = argparse.ArgumentParser()

parser.add_argument("--eval_dir",
                        default=None,
                        type=str,
                        help="The evaluation data dir.")
parser.add_argument("--bert_model", default=None, type=str,
                        help="Bert pre-trained model selected in the list: bert-base-uncased, "
                        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
                        "bert-base-multilingual-cased, bert-base-chinese.")

parser.add_argument("--output_SR_file",
                        default=None,
                        type=str,
                        help="The output directory of writing substitution selection.")
parser.add_argument("--word_embeddings",
                        default=None,
                        type=str,
                        help="The path of word embeddings")
parser.add_argument("--word_frequency",
                        default=None,
                        type=str,
                        help="The path of word frequency.")
    
parser.add_argument("--ppdb",
                        default="./ppdb-2.0-tldr",
                        type=str,
                        help="The path of word frequency.")

parser.add_argument("--prob_mask",
                        default=0,
                        type=float,
                        help="Proportion of the masked words in first sentence. "
                             "E.g., 0.1 = 10%% of training.")



## Other parameters
parser.add_argument("--cache_dir",
                        default="",
                        type=str,
                        help="Where do you want to store the pre-trained models downloaded from s3")

parser.add_argument("--max_seq_length",
                        default=128,
                        type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. \n"
                             "Sequences longer than this will be truncated, and sequences shorter \n"
                             "than this will be padded.")

parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
parser.add_argument("--do_lower_case",
                        action='store_true',
                        help="Set this flag if you are using an uncased model.")

parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
parser.add_argument("--num_selections",
                        default=20,
                        type=int,
                        help="Total number of training epochs to perform.")
parser.add_argument("--num_eval_epochs",
                        default=1,
                        type=int,
                        help="Total number of training epochs to perform.")



parser.add_argument("--warmup_proportion",
                        default=0.1,
                        type=float,
                        help="Proportion of training to perform linear learning rate warmup for. "
                             "E.g., 0.1 = 10%% of training.")
parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
parser.add_argument('--fp16',
                        action='store_true',
                        help="Whether to use 16-bit float precision instead of 32-bit")
parser.add_argument('--loss_scale',
                        type=float, default=0,
                        help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
                             "0 (default value): dynamic loss scaling.\n"
                             "Positive power of 2: static loss scaling value.\n")
parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
args = parser.parse_args([])


In [4]:
args.do_eval=True
args.do_lower_case=True
args.num_selections=10
args.prob_mask=0.0
args.eval_dir='D:/data/bert_ppdb/datasets/lex.mturk.txt'
args.bert_model='bert-large-uncased-whole-word-masking'
args.max_seq_length=250
args.word_embeddings='D:/data/bert_ppdb/crawl-300d-2M-subword.vec'
args.word_frequency='D:/data/bert_ppdb/SUBTLEX_frequency.xlsx'
args.ppdb='D:/data/bert_ppdb/ppdb-2.0-tldr'
args.output_SR_file='D:/data/bert_ppdb/results/NNSeval'



In [5]:
if args.server_ip and args.server_port:
    import ptvsd
    print("Waiting for debugger attach")
    ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
    ptvsd.wait_for_attach()


if args.local_rank == -1 or args.no_cuda:
    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    n_gpu = torch.cuda.device_count()
else:
    torch.cuda.set_device(args.local_rank)
    device = torch.device("cuda", args.local_rank)
    n_gpu = 1
    # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
    torch.distributed.init_process_group(backend='nccl')

logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN)

logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
    device, n_gpu, bool(args.local_rank != -1), args.fp16))


random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if n_gpu > 0:
    torch.cuda.manual_seed_all(args.seed)

if not args.do_eval:
    raise ValueError("At least `do_eval` must be True.")

tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)

train_examples = None
num_train_optimization_steps = None
    

# Prepare model
cache_dir = args.cache_dir if args.cache_dir else os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank))
model = BertForMaskedLM.from_pretrained(args.bert_model,output_attentions=True,cache_dir=cache_dir)
if args.fp16:
    model.half()
model.to(device)

output_sr_file = open(args.output_SR_file,"a+")


INFO:__main__:device: cuda n_gpu: 1, distributed training: False, 16-bits training: False
INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file D:/data/bert_ppdb/bert-large-uncased-whole-word-masking-vocab.txt
INFO:pytorch_pretrained_bert.modeling:loading weights file D:/data/bert_ppdb/bert-large-uncased-whole-word-masking-pytorch_model.bin
INFO:pytorch_pretrained_bert.modeling:loading configuration file D:/data/bert_ppdb/bert-large-uncased-whole-word-masking-config.json
INFO:pytorch_pretrained_bert.modeling:Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "type_vocab_size": 2,
  "vocab_size": 30522
}

INFO:pytorch_pretrained_bert.modeling:Weights from pretrained model not used in BertForMaskedLM: ['cls.seq_relationship

In [6]:
print("Loading embeddings ...")

wordVecPath = args.word_embeddings
#wordVecPath = "/media/qiang/ee63f41d-4004-44fe-bcfd-522df9f2eee8/glove.840B.300d.txt"

fasttext_dico, fasttext_emb = getWordmap(wordVecPath)

#stopword = set(stopwords.words('english'))
word_count_path = args.word_frequency
#word_count_path = "word_frequency_wiki.txt"
word_count = getWordCount(word_count_path)

ps = PorterStemmer()

Loading embeddings ...
2000000 300



In [7]:
print("loading PPDB ...")
ppdb_path = args.ppdb
ppdb_model = Ppdb(ppdb_path)

loading PPDB ...


In [8]:
for num in range(1):
    args.seed=num*10+5
    print(args.seed)
    print('第 ',num,'次循环')
    
    
    CGBERT = []
    CSBERT = []
    SS = []
    substitution_words = []
   
    num_selection = args.num_selections

    bre_i=0

    window_context = 11
    if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        
     
        fileName = args.eval_dir.split('/')[-1][:-4]
        if fileName=='lex.mturk':
            eval_examples, mask_words, mask_labels = read_eval_dataset(args.eval_dir)
        else:
            eval_examples, mask_words, mask_labels = read_eval_index_dataset(args.eval_dir)

       
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
            #logger.info("  Batch size = %d", args.eval_batch_size)

        model.eval()
      
        eval_size = len(eval_examples)

        print("prob_mask:",args.prob_mask)
    
    
    for i in range(eval_size):
        print('Sentence {} rankings: '.format(i))
        #output_sr_file.write(str(i))
        #output_sr_file.write(' sentence: ')
        #output_sr_file.write('\n')
        print(' sentence: ',eval_examples[i])
        print('mask_words ',mask_words[i])
        tokens, words, position = convert_sentence_to_token(eval_examples[i], args.max_seq_length, tokenizer)

        assert len(words)==len(position)

        mask_index = words.index(mask_words[i])

        mask_context = extract_context(words,mask_index,window_context)

        len_tokens = len(tokens)

        mask_position = position[mask_index]
 
        if isinstance(mask_position,list):
            feature = convert_whole_word_to_feature(tokens, mask_position, args.max_seq_length, tokenizer, args.prob_mask)
        else:
            feature = convert_token_to_feature(tokens, mask_position, args.max_seq_length, tokenizer, args.prob_mask)

        tokens_tensor = torch.tensor([feature.input_ids])

        token_type_ids = torch.tensor([feature.input_type_ids])

        attention_mask = torch.tensor([feature.input_mask])
    
        tokens_tensor = tokens_tensor.to('cuda')
        token_type_ids = token_type_ids.to('cuda')
        attention_mask = attention_mask.to('cuda')

            # Predict all tokens
        with torch.no_grad():
            all_attentions,prediction_scores = model(tokens_tensor, token_type_ids,attention_mask)


            
        if isinstance(mask_position,list):
            predicted_top = prediction_scores[0, mask_position[0]].topk(80)
        else:
            predicted_top = prediction_scores[0, mask_position].topk(80)
            #print(predicted_top[0].cpu().numpy())
        pre_tokens = tokenizer.convert_ids_to_tokens(predicted_top[1].cpu().numpy())
            
        #print(predicted_top[0].cpu().numpy())

        sentence = eval_examples[i].lower()
        words = word_tokenize(sentence)

        words_tag = nltk.pos_tag(words)

        complex_word_index = words.index(mask_words[i])

        complex_word_tag = words_tag[complex_word_index][1]

        #print(complex_word_tag)

        complex_word_tag = preprocess_tag(complex_word_tag)
        #print(complex_word_tag)

        #mask_context = extract_context(words,complex_word_index,window_context)

        #break
        #print(mask_words[i])
        #cgPPDB,csPPDB = PPDB_candidate_generation(mask_words[i],complex_word_tag, ppdb_model, ps, word_count, 30)
        cgPPDB = ppdb_model.predict(mask_words[i],complex_word_tag)

        #if(len(cgPPDB)>30):
        #cgPPDB=cgPPDB[:30]

        #break
        cgBERT = BERT_candidate_generation(mask_words[i], pre_tokens, predicted_top[0].cpu().numpy(), ps, args.num_selections)
            
        CGBERT.append(cgBERT)

        pre_word = substitution_ranking(mask_words[i], mask_context, cgBERT, fasttext_dico, fasttext_emb,word_count,cgPPDB,tokenizer,model,mask_labels[i])

        print('cgBERT ',cgBERT)
        substitution_words.append(pre_word)
    potential,precision,recall,F_score=evaulation_SS_scores(CGBERT, mask_labels)
    print("The score of evaluation for BERT candidate generation")
    print(potential,precision,recall,F_score)

        

    precision,accuracy,changed_proportion=evaulation_pipeline_scores(substitution_words, mask_words, mask_labels)
    print("The score of evaluation for full LS pipeline")
    print(precision,accuracy,changed_proportion)


        #output_sr_file.close()

INFO:__main__:***** Running evaluation *****
INFO:__main__:  Num examples = 500


5
第  0 次循环
prob_mask: 0.0
Sentence 0 rankings: 
 sentence:  "In March 1992 , Linux version 0.95 was the first to be capable of running X. This large version number jump was due to a feeling that a version 1.0 with no major missing pieces was imminent ."
mask_words  pieces
cgBERT  ['features', 'code', 'parts', 'functionality', 'feature', 'bugs', 'issues', 'components', 'files', 'elements']
Sentence 1 rankings: 
 sentence:  Much of the water carried by these streams is diverted .
mask_words  diverted
cgBERT  ['recycled', 'acidic', 'fresh', 'saline', 'agricultural', 'filtered', 'underground', 'wastewater', 'rain', 'groundwater']
Sentence 2 rankings: 
 sentence:  "Harry also becomes the worthy possessor of the remaining Deathly Hallows : the Invisibility Cloak and the Resurrection Stone , hence becoming the true Master of Death ."
mask_words  possessor
cgBERT  ['owner', 'heir', 'guardian', 'bearer', 'recipient', 'holder', 'master', 'keeper', 'savior', 'user']
Sentence 3 rankings: 
 sentenc

cgBERT  ['purchase', 'takeover', 'absorption', 'replacement', 'merger', 'ownership', 'folding', 'dissolution', 'creation', 'defeat']
Sentence 27 rankings: 
 sentence:  Kowal suggested the name and the IAU endorsed it in 1975 .
mask_words  endorsed
cgBERT  ['adopted', 'approved', 'accepted', 'chose', 'used', 'selected', 'coined', 'registered', 'recognized', 'ratified']
Sentence 28 rankings: 
 sentence:  Dry air wrapping around the southern periphery of the cyclone eroded most of the deep convection by early on September 12 .
mask_words  periphery
cgBERT  ['edge', 'portion', 'end', 'half', 'tip', 'side', 'core', 'portions', 'part', 'edges']
Sentence 29 rankings: 
 sentence:  "Brief additional internal links are generally tolerated when used to facilitate communication or to provide general information , but undesirable if seen as canvassing for some purpose ."
mask_words  tolerated
cgBERT  ['desirable', 'beneficial', 'useful', 'acceptable', 'helpful', 'welcome', 'effective', 'appropriate

cgBERT  ['covers', 'includes', 'encompasses', 'comprises', 'lists', 'discusses', 'concerns', 'contains', 'chronicles', 'describes']
Sentence 52 rankings: 
 sentence:  "Realising that the gang could not elude the police forever , Moondyne Joe formulated a plan to escape the colony by traveling overland to the colony of South Australia ."
mask_words  elude
cgBERT  ['evade', 'avoid', 'escape', 'resist', 'serve', 'fight', 'fool', 'withstand', 'dodge', 'survive']
Sentence 53 rankings: 
 sentence:  "Southeastern Oklahoma , also known by its official tourism designation , Kiamichi Country , encompasses the southeastern quarter of the state of Oklahoma ."
mask_words  encompasses
cgBERT  ['is', 'comprises', 'covers', 'occupies', 'includes', 'constitutes', 'represents', 'borders', 'describes', 'spans']
Sentence 54 rankings: 
 sentence:  The A-train family comprises trains for both commuter services and limited express services .
mask_words  comprises
cgBERT  ['includes', 'of', 'provides', 'opera

cgBERT  ['health', 'environmental', 'emerging', 'special', 'urgent', 'sustainability', 'societal', 'global', 'priority', 'key']
Sentence 77 rankings: 
 sentence:  "Long ago , there was a decision by the late former Prime Minister , Rafik Hariri , to restore the synagogue and surround it with a garden ."
mask_words  restore
cgBERT  ['expand', 'rebuild', 'buy', 'close', 'extend', 'purchase', 'save', 'preserve', 'build', 'remove']
Sentence 78 rankings: 
 sentence:  "Port Arthur was also the destination for juvenile convicts , receiving many boys , some as young as nine arrested for stealing toys ."
mask_words  juvenile
cgBERT  ['young', 'runaway', 'many', 'child', 'escaped', 'the', 'escaping', 'visiting', 'boy', 'female']
Sentence 79 rankings: 
 sentence:  "Notrium is played from a top-down perspective , giving an overhead view of proceedings ."
mask_words  perspective
cgBERT  ['angle', 'position', 'viewpoint', 'camera', 'view', 'window', 'console', 'station', 'setting', 'level']
Sentence

cgBERT  ['resurgence', 'rise', 'popularity', 'development', 'history', 'invention', 'advent', 'renaissance', 'use', 'decline']
Sentence 104 rankings: 
 sentence:  Urepel is a commune in the PyrÌ©nÌ©es - Atlantiques department in south-western France .
mask_words  south-western
cgBERT  ['western', 'west', 'central', 'northwestern', 'northern', 'aquitaine', 'northwest', 'in', 'eastern', 'basque']
Sentence 105 rankings: 
 sentence:  "GeÌ_lson de Carvalho Soares or simply GeÌ_lson , is a striker ."
mask_words  striker
cgBERT  ['pseudonym', 'name', 'poet', 'nickname', 'poem', 'writer', 'brazilian', 'musician', 'joke', 'vampire']
Sentence 106 rankings: 
 sentence:  "Lancelot de Carles called her `` beautiful with an elegant figure '' , and a Venetian in Paris in 1528 also reported that she was said to be beautiful ."
mask_words  reported
cgBERT  ['wrote', 'said', 'mentioned', 'stated', 'mentions', 'says', 'noted', 'recorded', 'states', 'records']
Sentence 107 rankings: 
 sentence:  "In 2005 

cgBERT  ['usually', 'often', 'typically', 'commonly', 'generally', 'sometimes', 'normally', 'also', 'always', 'frequently']
Sentence 130 rankings: 
 sentence:  Slaves were previously introduced by the British and French who colonized the island in the 18th century .
mask_words  introduced
cgBERT  ['owned', 'held', 'imported', 'used', 'brought', 'enslaved', 'taken', 'transported', 'purchased', 'captured']
Sentence 131 rankings: 
 sentence:  "Cholera , sometimes known as Asiatic or epidemic cholera , is an infectious gastroenteritis caused by enterotoxin-producing strains of the bacterium Vibrio cholerae ."
mask_words  strains
cgBERT  ['forms', 'species', 'cells', 'members', 'spores', 'organisms', 'variants', 'subspecies', 'individuals', 'varieties']
Sentence 132 rankings: 
 sentence:  A hurricane struck Dominica and Puerto Rico between July 22 and July 24 .
mask_words  struck
cgBERT  ['devastated', 'hit', 'ravaged', 'affected', 'impacted', 'battered', 'threatened', 'brushed', 'destroyed

cgBERT  ['useful', 'binding', 'necessary', 'comforting', 'helpful', 'illuminating', 'sound', 'compelling', 'valuable', 'appropriate']
Sentence 157 rankings: 
 sentence:  A haunted house is defined as a house that is believed to be a center for supernatural occurrences or paranormal phenomena .
mask_words  occurrences
cgBERT  ['activity', 'events', 'activities', 'phenomena', 'forces', ',', 'visitation', 'beings', 'acts', 'entities']
Sentence 158 rankings: 
 sentence:  "Glaciologically , ice age implies the presence of extensive ice sheets in the northern and southern hemispheres ; by this definition we are still in the ice age that began at the start of the Pleistocene ."
mask_words  definition
cgBERT  ['date', 'time', 'point', 'day', 'means', ',', 'stage', 'estimate', 'year', 'statement']
Sentence 159 rankings: 
 sentence:  "From Scarborough Monday to Saturdays there is generally an hourly First TransPennine Express service to York , Leeds , Manchester Piccadilly and Liverpool Lime Str

cgBERT  ['procedure', 'algorithm', 'process', 'method', 'analysis', 'protocol', 'correction', 'encryption', 'prediction', 'test']
Sentence 182 rankings: 
 sentence:  "In 1989 , these diversified interests started growing into separate organizations , as the fisheries project became Grameen Motsho and the irrigation project became Grameen Krishi ."
mask_words  organizations
cgBERT  ['projects', 'entities', 'companies', 'businesses', 'ventures', 'enterprises', 'activities', 'concerns', 'groups', 'corporations']
Sentence 183 rankings: 
 sentence:  "In August 1995 , the TTC suffered its worst subway accident in what it refers to as the Russell Hill accident , on the Yonge-University-Spadina Line south of St. Clair West station ."
mask_words  suffered
cgBERT  ['experienced', 'had', 'saw', 'reported', 'witnessed', 'survived', 'experiences', 'encountered', 'sustained', 'faced']
Sentence 184 rankings: 
 sentence:  "The Short-beaked Echidna is an iconic animal in contemporary Australia , notabl

cgBERT  ['common', 'extreme', 'unusual', 'obvious', 'severe', 'dangerous', 'difficult', 'challenging', 'familiar', 'frequent']
Sentence 208 rankings: 
 sentence:  "Barnes & Noble , Inc. is the largest book retailer in the United States , operating mainly through its Barnes & Noble Booksellers chain of bookstores headquartered in lower Fifth Avenue in Lower Manhattan , New York City ."
mask_words  headquartered
cgBERT  ['located', 'based', 'centered', 'anchored', 'situated', 'originating', 'concentrated', ',', 'and', 'established']
Sentence 209 rankings: 
 sentence:  Hurricane-force wind gusts were reported in New England .
mask_words  hurricane-force
cgBERT  ['no', 'heavy', 'frequent', 'several', 'occasional', 'severe', 'strong', 'record', 'high', 'some']
Sentence 210 rankings: 
 sentence:  Spain and Luxembourg currently have Bourbon monarchs .
mask_words  currently
cgBERT  ['both', 'also', 'all', 'each', 'still', 'do', 'may', 'now', 'only', ',']
Sentence 211 rankings: 
 sentence:  "Th

cgBERT  ['parts', 'portions', 'areas', 'part', 'regions', 'pockets', 'stretches', 'amounts', 'territories', 'tracts']
Sentence 234 rankings: 
 sentence:  "Jeanne Demessieux had a prodigious memory : she had memorized more than 2,500 works , including the complete organ works of Johann Sebastian Bach , CÌ©sar Franck , Franz Liszt , Felix Mendelssohn and Marcel DuprÌ© ."
mask_words  prodigious
cgBERT  ['remarkable', 'wonderful', 'great', 'good', 'superb', 'brilliant', 'splendid', 'perfect', 'photographic', 'magnificent']
Sentence 235 rankings: 
 sentence:  "At the end of each season , a series of playoff games are contested among the top six teams in the NFC , consisting of the four division champions and the two other teams with the best win-loss records ."
mask_words  contested
cgBERT  ['played', 'held', 'scheduled', 'arranged', 'organized', 'staged', 'announced', 'hosted', 'awarded', 'determined']
Sentence 236 rankings: 
 sentence:  Veritas and Undina would have been the largest fragm

cgBERT  ['illustrations', 'photographs', 'drawings', 'depictions', 'descriptions', 'pictures', 'images', 'records', 'paintings', 'accounts']
Sentence 260 rankings: 
 sentence:  Additional praise was given to its scope and future potential based on user-created content ; minor criticism was reserved for specific elements of the gameplay mechanics and level creation facilities .
mask_words  elements
cgBERT  ['aspects', 'features', 'parts', 'details', 'weaknesses', 'limitations', 'characteristics', 'areas', 'attributes', 'flaws']
Sentence 261 rankings: 
 sentence:  "Loamhedge is a fantasy novel by Brian Jacques , published in 2003 ."
mask_words  published
cgBERT  ['released', 'written', 'completed', 'printed', 'finished', 'set', 'issued', 'authored', 'republished', 'serialized']
Sentence 262 rankings: 
 sentence:  "The Eastern Orthodox Church accepts Psalm 151 as canonical . Roman Catholics , Protestants , and most Jews consider it apocryphal ."
mask_words  consider
cgBERT  ['believe', 'r

cgBERT  ['called', 'call', 'named', 'name', 'gave', 'found', 'made', 'knew', 'calls', 'spelled']
Sentence 286 rankings: 
 sentence:  "It was founded in 1964 in Hamilton , Ontario by Canadian hockey player Tim Horton and Jim Charade , after an initial venture in hamburger restaurants ."
mask_words  founded
cgBERT  ['started', 'established', 'formed', 'created', 'launched', 'incorporated', 'conceived', 'opened', 'begun', 'born']
Sentence 287 rankings: 
 sentence:  The annual mean temperature is 25 C ; regional mean temperatures range from 18 C in the heart of the Cordillera Central to as high as 27 C in arid regions .
mask_words  annual
cgBERT  ['global', 'world', 'local', 'international', 'worldwide', 'mean', 'universal', 'absolute', 'national', 'daily']
Sentence 288 rankings: 
 sentence:  Epistemology or theory of knowledge is the branch of philosophy concerned with the nature and scope of knowledge .
mask_words  scope
cgBERT  ['methods', 'definition', 'application', 'agency', 'methodo

cgBERT  ['show', 'series', 'broadcast', 'shows', 'countdown', 'survey', 'chart', 'broadcasts', 'format', 'magazine']
Sentence 313 rankings: 
 sentence:  "Liriodendron tulipifera , commonly known as the American tulip tree , tulip poplar or yellow poplar , is the Western Hemisphere representative of the two-species Liriodendron genus and the tallest eastern hardwood ."
mask_words  commonly
cgBERT  ['also', 'often', 'popularly', 'sometimes', 'better', 'otherwise', 'generally', 'colloquially', 'variously', 'usually']
Sentence 314 rankings: 
 sentence:  "Another allegation asserts that during the previous year , on Saint Bartholomew 's Day , Vlad had 30,000 of the merchants and officials of the Transylvanian city of Bra ov impaled for breaking his authority ."
mask_words  asserts
cgBERT  ['is', 'states', 'says', 'relates', 'claims', 'holds', 'was', ':', 'tells', 'suggests']
Sentence 315 rankings: 
 sentence:  "The result is a region of braided streams , marshes , and lakes the size of Belg

cgBERT  ['reliable', 'important', 'valuable', 'credible', 'powerful', 'independent', 'legitimate', 'accessible', 'useful', 'effective']
Sentence 339 rankings: 
 sentence:  "The Epping Ongar Railway operates on a preserved railway along the final section of the old Great Eastern Railway and London Underground Central Line branch line between Epping and Ongar , with an intermediate station at North Weald ."
mask_words  final
cgBERT  ['southern', 'remaining', 'northern', 'eastern', 'western', 'central', 'surviving', 'last', 'short', 'former']
Sentence 340 rankings: 
 sentence:  "In the Immunity Challenge , Koror had an early lead , but Ulong made a slight comeback ."
mask_words  slight
cgBERT  ['stunning', 'quick', 'dramatic', 'strong', 'late', 'spectacular', 'miraculous', 'huge', 'surprising', 'great']
Sentence 341 rankings: 
 sentence:  "It is also widely believed that the size and number of connections in the prefrontal cortex relates directly to sentience , as the prefrontal cortex in

cgBERT  ['lack', 'resist', 'prevent', 'need', 'avoid', 'have', 'undergo', 'allow', 'reject', 'prohibit']
Sentence 363 rankings: 
 sentence:  A final violent confrontation with police took place at Glenrowan .
mask_words  confrontation
cgBERT  ['clash', 'encounter', 'skirmish', 'shootout', 'incident', 'dispute', 'fight', 'battle', 'argument', 'exchange']
Sentence 364 rankings: 
 sentence:  "A tropical wave , which had previously spawned Tropical Depression Six , interacted with an upper-level low pressure area to develop an area of deep convection near Hispaniola on July 23 , 2003 ."
mask_words  previously
cgBERT  ['already', 'just', 'earlier', 'also', 'recently', 'originally', 'formerly', 'initially', 'itself', 'nearly']
Sentence 365 rankings: 
 sentence:  Other females within the social group may assist in the birthing process and tend to the newborns .
mask_words  newborns
cgBERT  ['infant', 'baby', 'young', 'infants', 'babies', 'pregnancy', 'child', 'children', 'offspring', 'birth']

cgBERT  ['change', 'maintain', 'alter', 'lose', 'keep', 'modify', 'retain', 'hide', 'mimic', 'shift']
Sentence 389 rankings: 
 sentence:  "In Switzerland , the constitutional ban on absinthe was repealed in 2000 during an overhaul of the national constitution , although the prohibition was written into ordinary law instead ."
mask_words  prohibition
cgBERT  ['ban', 'repeal', 'law', 'restriction', 'exception', 'amendment', 'rule', 'clause', 'exemption', 'act']
Sentence 390 rankings: 
 sentence:  "One theory suggests that the source of the name is an ancient Finnic language , in which it means `` dark '' and `` turbid '' ."
mask_words  turbid
cgBERT  ['dark', 'black', 'cold', 'darkness', 'evil', 'terrible', 'deep', 'white', 'dead', 'shadow']
Sentence 391 rankings: 
 sentence:  The geological composition of rock and soil dictates the type of shore which is created .
mask_words  dictates
cgBERT  ['determines', 'affects', 'determine', 'influences', 'defines', 'is', 'controls', 'indicates', 

cgBERT  ['cross', 'serve', 'connect', 'traverse', 'span', 'reach', 'cover', 'pass', 'service', 'follow']
Sentence 414 rankings: 
 sentence:  "Security-Enhanced Linux is a Linux feature that provides a mechanism for supporting access control security policies , including U.S. Department of Defense style mandatory access controls , through the use of Linux Security Modules in the Linux kernel ."
mask_words  provides
cgBERT  ['implements', 'offers', 'defines', 'includes', 'introduces', 'is', 'enables', 'creates', 'specifies', 'adds']
Sentence 415 rankings: 
 sentence:  "Johns and Williams would have spent the next seven months working on a government work party in the local area , before being transferred to Millbank Prison ."
mask_words  transferred
cgBERT  ['sent', 'moved', 'taken', 'sentenced', 'posted', 'committed', 'shipped', 'deported', 'returned', 'assigned']
Sentence 416 rankings: 
 sentence:  It lies on the southern side of Lake Constance .
mask_words  lies
cgBERT  ['is', 'sits',

cgBERT  ['scandal', 'attention', 'notoriety', 'gossip', 'interest', 'publicity', 'sensation', 'curiosity', 'speculation', 'fame']
Sentence 439 rankings: 
 sentence:  "SDLP leader John Hume , MP , identified the possibility that a negotiated settlement might be possible and began secret talks with Adams in 1988 ."
mask_words  identified
cgBERT  ['recognised', 'saw', 'considered', 'recognized', 'acknowledged', 'realised', 'accepted', 'raised', 'suggested', 'investigated']
Sentence 440 rankings: 
 sentence:  "A brief stint on Go Kart Records saw the release of `` Fly the Flag '' , and now with their latest release of `` Windward Tides and Wayward Sails '' in 2003 finds them on Union Label Group ."
mask_words  latest
cgBERT  ['sophomore', 'recent', 'first', 'current', 'debut', 'second', 'third', 'newest', 'last', 'new']
Sentence 441 rankings: 
 sentence:  It is currently used mostly for football matches and is the home stadium of Union Sportive du Foyer de la RÌ©gie Abidjan-Niger .
mask_wo

cgBERT  ['close', 'complex', 'unique', 'direct', 'cooperative', 'special', 'working', 'ongoing', 'friendly', 'respectful']
Sentence 464 rankings: 
 sentence:  None of your watched items were edited in the time period displayed .
mask_words  edited
cgBERT  ['used', 'found', 'present', 'here', 'anywhere', 'located', 'available', 'included', 'actually', 'seen']
Sentence 465 rankings: 
 sentence:  "Originally made for the Apple II , they were later ported to other platforms ."
mask_words  ported
cgBERT  ['migrated', 'adapted', 'moved', 'expanded', 'converted', 'extended', 'brought', 'added', 'shipped', 'released']
Sentence 466 rankings: 
 sentence:  His work is displayed in the Museum of Modern Art in Yerevan .
mask_words  displayed
cgBERT  ['exhibited', 'represented', 'held', 'housed', 'shown', 'located', 'kept', 'included', 'featured', 'preserved']
Sentence 467 rankings: 
 sentence:  "He was a prolific discoverer of asteroids , discovering 122 in all , from 136 Austria in 1874 to 1073 Ge

cgBERT  ['addition', 'relation', 'connection', 'reaction', 'regards', 'conjunction', 'contrast', 'regard', 'reference', 'accordance']
Sentence 491 rankings: 
 sentence:  It reached a peak position of number thirty-seven on the Billboard Hot Modern Rock Tracks chart .
mask_words  reached
cgBERT  ['attained', 'achieved', 'had', 'earned', 'made', 'hit', 'has', 'obtained', 'gained', 'garnered']
Sentence 492 rankings: 
 sentence:  "Pskov is an ancient city located in the north-west of Russia about east from the Estonian border , on the Velikaya River ."
mask_words  located
cgBERT  ['situated', ',', 'lying', '...', 'somewhere', 'now', 'standing', 'founded', 'lies', 'found']
Sentence 493 rankings: 
 sentence:  The major scale may predominate the minor scale in Western music because of its unique harmonic properties ; in particular the major third is much stronger in the harmonic series than the minor third .
mask_words  particular
cgBERT  ['fact', 'general', 'which', 'that', 'theory', 'additi

INFO:__main__:***** Running evaluation *****
INFO:__main__:  Num examples = 500


cgBERT  ['parts', 'countries', 'areas', 'regions', 'nations', 'quarters', 'corners', 'cities', 'portions', 'aspects']
The score of evaluation for BERT candidate generation
0.906 0.253 0.19676466013376886 0.22136669874879691
The score of evaluation for full LS pipeline
0.694 0.652 0.958
15
第  1 次循环
prob_mask: 0.0
Sentence 0 rankings: 
 sentence:  "In March 1992 , Linux version 0.95 was the first to be capable of running X. This large version number jump was due to a feeling that a version 1.0 with no major missing pieces was imminent ."
mask_words  pieces
cgBERT  ['features', 'code', 'parts', 'functionality', 'feature', 'bugs', 'issues', 'components', 'files', 'elements']
Sentence 1 rankings: 
 sentence:  Much of the water carried by these streams is diverted .
mask_words  diverted
cgBERT  ['recycled', 'acidic', 'fresh', 'saline', 'agricultural', 'filtered', 'underground', 'wastewater', 'rain', 'groundwater']
Sentence 2 rankings: 
 sentence:  "Harry also becomes the worthy possessor of 

cgBERT  ['tail', 'cockpit', 'body', 'hull', 'nose', 'layout', 'chassis', 'platform', 'cabin', 'parts']
Sentence 26 rankings: 
 sentence:  "The last event was held on June 11 , 2000 , not to be held again due to the acquisition of WCW by World Wrestling Federation ."
mask_words  acquisition
cgBERT  ['purchase', 'takeover', 'absorption', 'replacement', 'merger', 'ownership', 'folding', 'dissolution', 'creation', 'defeat']
Sentence 27 rankings: 
 sentence:  Kowal suggested the name and the IAU endorsed it in 1975 .
mask_words  endorsed
cgBERT  ['adopted', 'approved', 'accepted', 'chose', 'used', 'selected', 'coined', 'registered', 'recognized', 'ratified']
Sentence 28 rankings: 
 sentence:  Dry air wrapping around the southern periphery of the cyclone eroded most of the deep convection by early on September 12 .
mask_words  periphery
cgBERT  ['edge', 'portion', 'end', 'half', 'tip', 'side', 'core', 'portions', 'part', 'edges']
Sentence 29 rankings: 
 sentence:  "Brief additional internal 

cgBERT  ['limited', 'many', 'special', 'advanced', 'extended', 'complex', 'powerful', 'extensive', 'various', 'multiple']
Sentence 51 rankings: 
 sentence:  This period spanned the years from 1278 through 1288 .
mask_words  spanned
cgBERT  ['covers', 'includes', 'encompasses', 'comprises', 'lists', 'discusses', 'concerns', 'contains', 'chronicles', 'describes']
Sentence 52 rankings: 
 sentence:  "Realising that the gang could not elude the police forever , Moondyne Joe formulated a plan to escape the colony by traveling overland to the colony of South Australia ."
mask_words  elude
cgBERT  ['evade', 'avoid', 'escape', 'resist', 'serve', 'fight', 'fool', 'withstand', 'dodge', 'survive']
Sentence 53 rankings: 
 sentence:  "Southeastern Oklahoma , also known by its official tourism designation , Kiamichi Country , encompasses the southeastern quarter of the state of Oklahoma ."
mask_words  encompasses
cgBERT  ['is', 'comprises', 'covers', 'occupies', 'includes', 'constitutes', 'represents

cgBERT  ['moved', 'removed', 'migrated', 'came', 'went', 'relocated', 'passed', 'was', 'from', 'returned']
Sentence 76 rankings: 
 sentence:  "Ohio State also announced in 2006 , that it would be designating at least $ 110 million of its research efforts to what it termed `` fundamental concerns '' such as research towards a cure for cancer , renewable energy sources and sustainable drinking water supplies ."
mask_words  fundamental
cgBERT  ['health', 'environmental', 'emerging', 'special', 'urgent', 'sustainability', 'societal', 'global', 'priority', 'key']
Sentence 77 rankings: 
 sentence:  "Long ago , there was a decision by the late former Prime Minister , Rafik Hariri , to restore the synagogue and surround it with a garden ."
mask_words  restore
cgBERT  ['expand', 'rebuild', 'buy', 'close', 'extend', 'purchase', 'save', 'preserve', 'build', 'remove']
Sentence 78 rankings: 
 sentence:  "Port Arthur was also the destination for juvenile convicts , receiving many boys , some as youn

cgBERT  ['completing', 'catching', 'throwing', 'passing', 'totaling', 'completed', 'receiving', 'recording', 'making', 'returning']
Sentence 102 rankings: 
 sentence:  "A carotenoid dye , crocin , allows saffron to impart a rich golden-yellow hue to dishes and textiles . Saffron has further medicinal applications ."
mask_words  golden-yellow
cgBERT  ['red', 'reddish', 'green', 'blue', 'yellow', 'purple', 'orange', 'pink', 'yellowish', 'greenish']
Sentence 103 rankings: 
 sentence:  The revival of lute in the 20th century revitalized the interest of composers in the instruments of the lute family .
mask_words  revival
cgBERT  ['resurgence', 'rise', 'popularity', 'development', 'history', 'invention', 'advent', 'renaissance', 'use', 'decline']
Sentence 104 rankings: 
 sentence:  Urepel is a commune in the PyrÌ©nÌ©es - Atlantiques department in south-western France .
mask_words  south-western
cgBERT  ['western', 'west', 'central', 'northwestern', 'northern', 'aquitaine', 'northwest', 'in'

cgBERT  ['was', 'worked', 'stayed', 'served', 'dealt', 'lived', 'continued', 'associated', 'lasted', 'collaborated']
Sentence 128 rankings: 
 sentence:  From 1908 to 1911 he was the principal conductor of the Vienna Hofoper succeeding Gustav Mahler ; he retained the conductorship of the Vienna Philharmonic until 1927 .
mask_words  retained
cgBERT  ['held', 'continued', 'kept', 'maintained', 'resumed', 'assumed', 'occupied', 'took', 'had', 'received']
Sentence 129 rankings: 
 sentence:  "Mexican cinnamon hot chocolate is traditionally served alongside a variety of Mexican pastries known as pan dulce and , as in Spain , churros ."
mask_words  traditionally
cgBERT  ['usually', 'often', 'typically', 'commonly', 'generally', 'sometimes', 'normally', 'also', 'always', 'frequently']
Sentence 130 rankings: 
 sentence:  Slaves were previously introduced by the British and French who colonized the island in the 18th century .
mask_words  introduced
cgBERT  ['owned', 'held', 'imported', 'used', '

cgBERT  ['performances', 'music', 'characters', 'acting', 'dialogue', 'actors', 'moments', 'visuals', 'melodies', 'imagery']
Sentence 155 rankings: 
 sentence:  Seismograms are essential for measuring earthquakes using the Richter scale .
mask_words  essential
cgBERT  ['used', 'instruments', 'tools', 'devices', 'methods', 'techniques', 'graphs', 'scales', 'records', 'charts']
Sentence 156 rankings: 
 sentence:  "But , from time to time the Church hierarchy , and even some popes , have given opinions on various matters ; although these `` guidelines '' are not binding on Catholics , many tradition-minded Catholics find them persuasive ."
mask_words  persuasive
cgBERT  ['useful', 'binding', 'necessary', 'comforting', 'helpful', 'illuminating', 'sound', 'compelling', 'valuable', 'appropriate']
Sentence 157 rankings: 
 sentence:  A haunted house is defined as a house that is believed to be a center for supernatural occurrences or paranormal phenomena .
mask_words  occurrences
cgBERT  ['act

cgBERT  ['writer', 'author', 'cartoonist', 'broadcaster', 'filmmaker', 'journalist', 'artist', 'comedian', 'illustrator', 'publisher']
Sentence 180 rankings: 
 sentence:  Students are obligated to attend the central high school .
mask_words  obligated
cgBERT  ['required', 'able', 'allowed', 'expected', 'eligible', 'encouraged', 'assigned', 'supposed', 'free', 'permitted']
Sentence 181 rankings: 
 sentence:  The technique consists of precoding the data so as to cancel the effect of the interference .
mask_words  technique
cgBERT  ['procedure', 'algorithm', 'process', 'method', 'analysis', 'protocol', 'correction', 'encryption', 'prediction', 'test']
Sentence 182 rankings: 
 sentence:  "In 1989 , these diversified interests started growing into separate organizations , as the fisheries project became Grameen Motsho and the irrigation project became Grameen Krishi ."
mask_words  organizations
cgBERT  ['projects', 'entities', 'companies', 'businesses', 'ventures', 'enterprises', 'activitie

cgBERT  ['area', 'country', 'population', 'region', 'surface', 'state', 'nation', 'land', 'mass', 'zone']
Sentence 206 rankings: 
 sentence:  "In the opening and middlegame , the king will rarely play an active role in the development of an offensive or defensive position ."
mask_words  rarely
cgBERT  ['often', 'usually', 'not', 'generally', 'typically', 'normally', 'always', 'never', 'sometimes', 'frequently']
Sentence 207 rankings: 
 sentence:  The most troublesome situation usually occurs with short journeys from west to east .
mask_words  troublesome
cgBERT  ['common', 'extreme', 'unusual', 'obvious', 'severe', 'dangerous', 'difficult', 'challenging', 'familiar', 'frequent']
Sentence 208 rankings: 
 sentence:  "Barnes & Noble , Inc. is the largest book retailer in the United States , operating mainly through its Barnes & Noble Booksellers chain of bookstores headquartered in lower Fifth Avenue in Lower Manhattan , New York City ."
mask_words  headquartered
cgBERT  ['located', 'base

cgBERT  ['large', 'shifting', 'permanent', 'natural', 'periodic', 'raised', 'continuous', 'mobile', 'scattered', 'high']
Sentence 232 rankings: 
 sentence:  107 Camilla is one of the largest main belt asteroids .
mask_words  largest
cgBERT  ['smallest', 'smaller', 'oldest', 'inner', 'biggest', 'brightest', 'youngest', 'stony', 'older', 'small']
Sentence 233 rankings: 
 sentence:  "First mentioned by the Ancient Greek geographer Ptolemy , the pre-Christian settlement of the Saxon people originally covered an area a little more to the northwest , with parts of the southern Jutland Peninsula , Old Saxony and small sections of the eastern Low Countries ."
mask_words  sections
cgBERT  ['parts', 'portions', 'areas', 'part', 'regions', 'pockets', 'stretches', 'amounts', 'territories', 'tracts']
Sentence 234 rankings: 
 sentence:  "Jeanne Demessieux had a prodigious memory : she had memorized more than 2,500 works , including the complete organ works of Johann Sebastian Bach , CÌ©sar Franck , 

cgBERT  ['next', 'similar', 'attached', 'equivalent', 'successor', 'close', 'opposite', 'belonging', 'annexed', 'precursor']
Sentence 257 rankings: 
 sentence:  OpenOffice.org Calc is the spreadsheet component of the OpenOffice.org software package .
mask_words  component
cgBERT  ['version', 'editor', 'engine', 'application', 'part', 'client', 'portion', 'module', 'interface', 'extension']
Sentence 258 rankings: 
 sentence:  "The Countess tells him it is only Susanna , trying on her wedding dress ."
mask_words  tells
cgBERT  ['told', 'informs', 'assured', 'convinces', 'assure', 'convinced', 'informed', 'shows', 'warns', 'insists']
Sentence 259 rankings: 
 sentence:  "Early representations of the balalaika show it with anywhere from two to six strings , which resembles certain Central Asian instruments ."
mask_words  representations
cgBERT  ['illustrations', 'photographs', 'drawings', 'depictions', 'descriptions', 'pictures', 'images', 'records', 'paintings', 'accounts']
Sentence 260 ra

cgBERT  ['owned', 'headed', 'run', 'led', 'managed', 'overseen', 'controlled', 'directed', 'supervised', 'represented']
Sentence 282 rankings: 
 sentence:  He is portrayed by Mark Hamill .
mask_words  portrayed
cgBERT  ['voiced', 'played', 'directed', 'written', 'represented', 'created', 'illustrated', 'narrated', 'owned', 'produced']
Sentence 283 rankings: 
 sentence:  "Super Smash Bros. was developed by HAL Laboratory , a Nintendo second-party developer , during ."
mask_words  developed
cgBERT  ['created', 'designed', 'made', 'produced', 'built', 'conceived', 'published', 'released', 'invented', 'licensed']
Sentence 284 rankings: 
 sentence:  The northern and western edges of the Cotswolds are marked by steep escarpments down to the Severn valley and the Warwickshire Avon .
mask_words  escarpments
cgBERT  ['slopes', 'cliffs', 'drops', 'hills', 'declines', 'steps', 'terraces', 'foothills', 'descent', 'walls']
Sentence 285 rankings: 
 sentence:  "They renamed the place Alcante or Alcan

cgBERT  ['settlers', 'residents', 'ancestors', 'people', 'immigrants', 'families', 'generations', 'colonists', 'generation', 'pioneers']
Sentence 310 rankings: 
 sentence:  "Thousands of videos exist on YouTube of users showing the original video to their friends and taping their reactions , although some videos seem to be staged ."
mask_words  taping
cgBERT  ['recording', 'watching', 'discussing', 'documenting', 'showing', 'observing', 'sharing', 'seeing', 'filming', 'explaining']
Sentence 311 rankings: 
 sentence:  "However , on 13 April 2008 , before the completion of the initial five years , it was announced that Bryce was to become the next Governor-General of Australia ."
mask_words  initial
cgBERT  ['first', 'previous', 'next', 'planned', 'preceding', 'required', 'last', 'expected', 'original', 'following']
Sentence 312 rankings: 
 sentence:  "Kasem is best known by name as a music historian and disc jockey , most notably as host of the weekly American Top 40 radio program from 

cgBERT  [',', '-', '.', 'well', 'all', 'sexually', '—', '...', 'financially', 'properly']
Sentence 336 rankings: 
 sentence:  "Gumbasia , a 3 minute 34 second short film produced in 1953 , was the first clay animation produced by Art Clokey , who went on to create the classic series Gumby and Davy and Goliath using the same technique ."
mask_words  classic
cgBERT  ['television', 'tv', 'animated', 'cartoon', 'popular', 'pbs', '1960s', 'radio', 'comedy', 'nickelodeon']
Sentence 337 rankings: 
 sentence:  "Also , the sound of the regional language qawwali can be totally different from that of mainstream qawwali ."
mask_words  mainstream
cgBERT  ['standard', 'central', 'the', 'modern', 'local', 'southern', 'traditional', 'northern', 'ordinary', 'classical']
Sentence 338 rankings: 
 sentence:  Many people see Al Jazeera as a more trustworthy source of information than government and foreign channels .
mask_words  trustworthy
cgBERT  ['reliable', 'important', 'valuable', 'credible', 'powerfu

cgBERT  ['forbid', 'condemn', 'ban', 'outlaw', 'oppose', 'tolerate', 'reject', 'eliminate', 'abandon', 'prevent']
Sentence 360 rankings: 
 sentence:  "To reduce space and increase reading speed , virtually all Braille books are transcribed in what is known as Grade 2 Braille , which uses a system of contractions to reduce space and speed the process of reading ."
mask_words  reduce
cgBERT  ['save', 'minimize', 'conserve', 'eliminate', 'decrease', 'preserve', 'increase', 'use', 'cut', 'retain']
Sentence 361 rankings: 
 sentence:  This is a considerably higher total than that of any other European country .
mask_words  considerably
cgBERT  ['much', 'significantly', 'far', 'slightly', 'substantially', 'relatively', 'somewhat', 'rather', 'noticeably', 'notably']
Sentence 362 rankings: 
 sentence:  "Apples require cross-pollination between individuals by insects ; all are self-sterile , and self-pollination is impossible , making pollinating insects essential ."
mask_words  require
cgBERT  

cgBERT  ['controlled', 'operated', 'managed', 'distributed', 'run', 'held', 'acquired', 'funded', 'handled', 'licensed']
Sentence 387 rankings: 
 sentence:  "Alexander Stepanovich Popov was a Russian physicist who first demonstrated the practical application of electromagnetic waves , although he did not apply for a patent for his invention ."
mask_words  demonstrated
cgBERT  ['proposed', 'described', 'considered', 'suggested', 'investigated', 'discovered', 'studied', 'conceived', 'saw', 'developed']
Sentence 388 rankings: 
 sentence:  "In nature , there is a strong evolutionary pressure for animals to blend into their environment or conceal their shape ; for prey animals to avoid predators and for predators to be able to sneak up on prey ."
mask_words  conceal
cgBERT  ['change', 'maintain', 'alter', 'lose', 'keep', 'modify', 'retain', 'hide', 'mimic', 'shift']
Sentence 389 rankings: 
 sentence:  "In Switzerland , the constitutional ban on absinthe was repealed in 2000 during an overha

cgBERT  ['mythical', 'giant', 'monstrous', 'supposed', 'alleged', 'infamous', 'elusive', 'mythological', 'mysterious', 'notorious']
Sentence 412 rankings: 
 sentence:  "Typically , a fast shutter speed will require a larger aperture to ensure sufficient light exposure , and a slow shutter speed will require a smaller aperture to avoid excessive exposure ."
mask_words  avoid
cgBERT  ['prevent', 'ensure', 'limit', 'assure', 'eliminate', 'guarantee', 'allow', 'reduce', 'minimize', 'provide']
Sentence 413 rankings: 
 sentence:  "Railways now bypass the three major falls , and much of the trade of central Africa passes along the river , including copper , palm oil , sugar , coffee , and cotton ."
mask_words  bypass
cgBERT  ['cross', 'serve', 'connect', 'traverse', 'span', 'reach', 'cover', 'pass', 'service', 'follow']
Sentence 414 rankings: 
 sentence:  "Security-Enhanced Linux is a Linux feature that provides a mechanism for supporting access control security policies , including U.S. Depa

cgBERT  ['discomfort', 'blindness', 'confusion', 'delay', 'darkness', 'paralysis', 'relaxation', 'recovery', 'difficulty', 'pain']
Sentence 438 rankings: 
 sentence:  "Mary Toft , also spelled Tofts , was an English woman from Godalming , Surrey , who in 1726 became the subject of considerable controversy when she tricked doctors into believing that she had given birth to rabbits ."
mask_words  controversy
cgBERT  ['scandal', 'attention', 'notoriety', 'gossip', 'interest', 'publicity', 'sensation', 'curiosity', 'speculation', 'fame']
Sentence 439 rankings: 
 sentence:  "SDLP leader John Hume , MP , identified the possibility that a negotiated settlement might be possible and began secret talks with Adams in 1988 ."
mask_words  identified
cgBERT  ['recognised', 'saw', 'considered', 'recognized', 'acknowledged', 'realised', 'accepted', 'raised', 'suggested', 'investigated']
Sentence 440 rankings: 
 sentence:  "A brief stint on Go Kart Records saw the release of `` Fly the Flag '' , and n

cgBERT  ['occupied', 'captured', 'taken', 'held', 'attacked', 'sacked', 'seized', 'burned', 'recaptured', 'invaded']
Sentence 463 rankings: 
 sentence:  "One of the defining aspects of the Nintendo GameCube is the rejuvenated relationship between Nintendo and its licensees . Unlike previous generations in which Nintendo was seen by some as bullying its third-party game developers , Nintendo openly sought game-development aid on the Nintendo GameCube ."
mask_words  rejuvenated
cgBERT  ['close', 'complex', 'unique', 'direct', 'cooperative', 'special', 'working', 'ongoing', 'friendly', 'respectful']
Sentence 464 rankings: 
 sentence:  None of your watched items were edited in the time period displayed .
mask_words  edited
cgBERT  ['used', 'found', 'present', 'here', 'anywhere', 'located', 'available', 'included', 'actually', 'seen']
Sentence 465 rankings: 
 sentence:  "Originally made for the Apple II , they were later ported to other platforms ."
mask_words  ported
cgBERT  ['migrated', '

cgBERT  ['regarded', 'among', 'deemed', 'as', 'believed', 'recognized', 'amongst', 'called', 'thought', 'acknowledged']
Sentence 489 rankings: 
 sentence:  Harvey became a strong extratropical storm during the night on August 8 and survived another few days before dissipating northwest of the Azores on August 14 .
mask_words  survived
cgBERT  ['lasted', 'persisted', 'continued', 'lingered', 'stayed', 'spent', 'tracked', 'for', 'remained', 'strengthened']
Sentence 490 rankings: 
 sentence:  "In response to the weather system , the American National Weather Service issued wind watches for the Oregon Coast ."
mask_words  response
cgBERT  ['addition', 'relation', 'connection', 'reaction', 'regards', 'conjunction', 'contrast', 'regard', 'reference', 'accordance']
Sentence 491 rankings: 
 sentence:  It reached a peak position of number thirty-seven on the Billboard Hot Modern Rock Tracks chart .
mask_words  reached
cgBERT  ['attained', 'achieved', 'had', 'earned', 'made', 'hit', 'has', 'obta

INFO:__main__:***** Running evaluation *****
INFO:__main__:  Num examples = 500


cgBERT  ['parts', 'countries', 'areas', 'regions', 'nations', 'quarters', 'corners', 'cities', 'portions', 'aspects']
The score of evaluation for BERT candidate generation
0.906 0.253 0.19676466013376886 0.22136669874879691
The score of evaluation for full LS pipeline
0.694 0.652 0.958
25
第  2 次循环
prob_mask: 0.0
Sentence 0 rankings: 
 sentence:  "In March 1992 , Linux version 0.95 was the first to be capable of running X. This large version number jump was due to a feeling that a version 1.0 with no major missing pieces was imminent ."
mask_words  pieces
cgBERT  ['features', 'code', 'parts', 'functionality', 'feature', 'bugs', 'issues', 'components', 'files', 'elements']
Sentence 1 rankings: 
 sentence:  Much of the water carried by these streams is diverted .
mask_words  diverted
cgBERT  ['recycled', 'acidic', 'fresh', 'saline', 'agricultural', 'filtered', 'underground', 'wastewater', 'rain', 'groundwater']
Sentence 2 rankings: 
 sentence:  "Harry also becomes the worthy possessor of 

cgBERT  ['tail', 'cockpit', 'body', 'hull', 'nose', 'layout', 'chassis', 'platform', 'cabin', 'parts']
Sentence 26 rankings: 
 sentence:  "The last event was held on June 11 , 2000 , not to be held again due to the acquisition of WCW by World Wrestling Federation ."
mask_words  acquisition
cgBERT  ['purchase', 'takeover', 'absorption', 'replacement', 'merger', 'ownership', 'folding', 'dissolution', 'creation', 'defeat']
Sentence 27 rankings: 
 sentence:  Kowal suggested the name and the IAU endorsed it in 1975 .
mask_words  endorsed
cgBERT  ['adopted', 'approved', 'accepted', 'chose', 'used', 'selected', 'coined', 'registered', 'recognized', 'ratified']
Sentence 28 rankings: 
 sentence:  Dry air wrapping around the southern periphery of the cyclone eroded most of the deep convection by early on September 12 .
mask_words  periphery
cgBERT  ['edge', 'portion', 'end', 'half', 'tip', 'side', 'core', 'portions', 'part', 'edges']
Sentence 29 rankings: 
 sentence:  "Brief additional internal 

cgBERT  ['limited', 'many', 'special', 'advanced', 'extended', 'complex', 'powerful', 'extensive', 'various', 'multiple']
Sentence 51 rankings: 
 sentence:  This period spanned the years from 1278 through 1288 .
mask_words  spanned
cgBERT  ['covers', 'includes', 'encompasses', 'comprises', 'lists', 'discusses', 'concerns', 'contains', 'chronicles', 'describes']
Sentence 52 rankings: 
 sentence:  "Realising that the gang could not elude the police forever , Moondyne Joe formulated a plan to escape the colony by traveling overland to the colony of South Australia ."
mask_words  elude
cgBERT  ['evade', 'avoid', 'escape', 'resist', 'serve', 'fight', 'fool', 'withstand', 'dodge', 'survive']
Sentence 53 rankings: 
 sentence:  "Southeastern Oklahoma , also known by its official tourism designation , Kiamichi Country , encompasses the southeastern quarter of the state of Oklahoma ."
mask_words  encompasses
cgBERT  ['is', 'comprises', 'covers', 'occupies', 'includes', 'constitutes', 'represents

cgBERT  ['moved', 'removed', 'migrated', 'came', 'went', 'relocated', 'passed', 'was', 'from', 'returned']
Sentence 76 rankings: 
 sentence:  "Ohio State also announced in 2006 , that it would be designating at least $ 110 million of its research efforts to what it termed `` fundamental concerns '' such as research towards a cure for cancer , renewable energy sources and sustainable drinking water supplies ."
mask_words  fundamental
cgBERT  ['health', 'environmental', 'emerging', 'special', 'urgent', 'sustainability', 'societal', 'global', 'priority', 'key']
Sentence 77 rankings: 
 sentence:  "Long ago , there was a decision by the late former Prime Minister , Rafik Hariri , to restore the synagogue and surround it with a garden ."
mask_words  restore
cgBERT  ['expand', 'rebuild', 'buy', 'close', 'extend', 'purchase', 'save', 'preserve', 'build', 'remove']
Sentence 78 rankings: 
 sentence:  "Port Arthur was also the destination for juvenile convicts , receiving many boys , some as youn

cgBERT  ['completing', 'catching', 'throwing', 'passing', 'totaling', 'completed', 'receiving', 'recording', 'making', 'returning']
Sentence 102 rankings: 
 sentence:  "A carotenoid dye , crocin , allows saffron to impart a rich golden-yellow hue to dishes and textiles . Saffron has further medicinal applications ."
mask_words  golden-yellow
cgBERT  ['red', 'reddish', 'green', 'blue', 'yellow', 'purple', 'orange', 'pink', 'yellowish', 'greenish']
Sentence 103 rankings: 
 sentence:  The revival of lute in the 20th century revitalized the interest of composers in the instruments of the lute family .
mask_words  revival
cgBERT  ['resurgence', 'rise', 'popularity', 'development', 'history', 'invention', 'advent', 'renaissance', 'use', 'decline']
Sentence 104 rankings: 
 sentence:  Urepel is a commune in the PyrÌ©nÌ©es - Atlantiques department in south-western France .
mask_words  south-western
cgBERT  ['western', 'west', 'central', 'northwestern', 'northern', 'aquitaine', 'northwest', 'in'

cgBERT  ['was', 'worked', 'stayed', 'served', 'dealt', 'lived', 'continued', 'associated', 'lasted', 'collaborated']
Sentence 128 rankings: 
 sentence:  From 1908 to 1911 he was the principal conductor of the Vienna Hofoper succeeding Gustav Mahler ; he retained the conductorship of the Vienna Philharmonic until 1927 .
mask_words  retained
cgBERT  ['held', 'continued', 'kept', 'maintained', 'resumed', 'assumed', 'occupied', 'took', 'had', 'received']
Sentence 129 rankings: 
 sentence:  "Mexican cinnamon hot chocolate is traditionally served alongside a variety of Mexican pastries known as pan dulce and , as in Spain , churros ."
mask_words  traditionally
cgBERT  ['usually', 'often', 'typically', 'commonly', 'generally', 'sometimes', 'normally', 'also', 'always', 'frequently']
Sentence 130 rankings: 
 sentence:  Slaves were previously introduced by the British and French who colonized the island in the 18th century .
mask_words  introduced
cgBERT  ['owned', 'held', 'imported', 'used', '

cgBERT  ['performances', 'music', 'characters', 'acting', 'dialogue', 'actors', 'moments', 'visuals', 'melodies', 'imagery']
Sentence 155 rankings: 
 sentence:  Seismograms are essential for measuring earthquakes using the Richter scale .
mask_words  essential
cgBERT  ['used', 'instruments', 'tools', 'devices', 'methods', 'techniques', 'graphs', 'scales', 'records', 'charts']
Sentence 156 rankings: 
 sentence:  "But , from time to time the Church hierarchy , and even some popes , have given opinions on various matters ; although these `` guidelines '' are not binding on Catholics , many tradition-minded Catholics find them persuasive ."
mask_words  persuasive
cgBERT  ['useful', 'binding', 'necessary', 'comforting', 'helpful', 'illuminating', 'sound', 'compelling', 'valuable', 'appropriate']
Sentence 157 rankings: 
 sentence:  A haunted house is defined as a house that is believed to be a center for supernatural occurrences or paranormal phenomena .
mask_words  occurrences
cgBERT  ['act

cgBERT  ['writer', 'author', 'cartoonist', 'broadcaster', 'filmmaker', 'journalist', 'artist', 'comedian', 'illustrator', 'publisher']
Sentence 180 rankings: 
 sentence:  Students are obligated to attend the central high school .
mask_words  obligated
cgBERT  ['required', 'able', 'allowed', 'expected', 'eligible', 'encouraged', 'assigned', 'supposed', 'free', 'permitted']
Sentence 181 rankings: 
 sentence:  The technique consists of precoding the data so as to cancel the effect of the interference .
mask_words  technique
cgBERT  ['procedure', 'algorithm', 'process', 'method', 'analysis', 'protocol', 'correction', 'encryption', 'prediction', 'test']
Sentence 182 rankings: 
 sentence:  "In 1989 , these diversified interests started growing into separate organizations , as the fisheries project became Grameen Motsho and the irrigation project became Grameen Krishi ."
mask_words  organizations
cgBERT  ['projects', 'entities', 'companies', 'businesses', 'ventures', 'enterprises', 'activitie

cgBERT  ['area', 'country', 'population', 'region', 'surface', 'state', 'nation', 'land', 'mass', 'zone']
Sentence 206 rankings: 
 sentence:  "In the opening and middlegame , the king will rarely play an active role in the development of an offensive or defensive position ."
mask_words  rarely
cgBERT  ['often', 'usually', 'not', 'generally', 'typically', 'normally', 'always', 'never', 'sometimes', 'frequently']
Sentence 207 rankings: 
 sentence:  The most troublesome situation usually occurs with short journeys from west to east .
mask_words  troublesome
cgBERT  ['common', 'extreme', 'unusual', 'obvious', 'severe', 'dangerous', 'difficult', 'challenging', 'familiar', 'frequent']
Sentence 208 rankings: 
 sentence:  "Barnes & Noble , Inc. is the largest book retailer in the United States , operating mainly through its Barnes & Noble Booksellers chain of bookstores headquartered in lower Fifth Avenue in Lower Manhattan , New York City ."
mask_words  headquartered
cgBERT  ['located', 'base

cgBERT  ['large', 'shifting', 'permanent', 'natural', 'periodic', 'raised', 'continuous', 'mobile', 'scattered', 'high']
Sentence 232 rankings: 
 sentence:  107 Camilla is one of the largest main belt asteroids .
mask_words  largest
cgBERT  ['smallest', 'smaller', 'oldest', 'inner', 'biggest', 'brightest', 'youngest', 'stony', 'older', 'small']
Sentence 233 rankings: 
 sentence:  "First mentioned by the Ancient Greek geographer Ptolemy , the pre-Christian settlement of the Saxon people originally covered an area a little more to the northwest , with parts of the southern Jutland Peninsula , Old Saxony and small sections of the eastern Low Countries ."
mask_words  sections
cgBERT  ['parts', 'portions', 'areas', 'part', 'regions', 'pockets', 'stretches', 'amounts', 'territories', 'tracts']
Sentence 234 rankings: 
 sentence:  "Jeanne Demessieux had a prodigious memory : she had memorized more than 2,500 works , including the complete organ works of Johann Sebastian Bach , CÌ©sar Franck , 

cgBERT  ['next', 'similar', 'attached', 'equivalent', 'successor', 'close', 'opposite', 'belonging', 'annexed', 'precursor']
Sentence 257 rankings: 
 sentence:  OpenOffice.org Calc is the spreadsheet component of the OpenOffice.org software package .
mask_words  component
cgBERT  ['version', 'editor', 'engine', 'application', 'part', 'client', 'portion', 'module', 'interface', 'extension']
Sentence 258 rankings: 
 sentence:  "The Countess tells him it is only Susanna , trying on her wedding dress ."
mask_words  tells
cgBERT  ['told', 'informs', 'assured', 'convinces', 'assure', 'convinced', 'informed', 'shows', 'warns', 'insists']
Sentence 259 rankings: 
 sentence:  "Early representations of the balalaika show it with anywhere from two to six strings , which resembles certain Central Asian instruments ."
mask_words  representations
cgBERT  ['illustrations', 'photographs', 'drawings', 'depictions', 'descriptions', 'pictures', 'images', 'records', 'paintings', 'accounts']
Sentence 260 ra

cgBERT  ['owned', 'headed', 'run', 'led', 'managed', 'overseen', 'controlled', 'directed', 'supervised', 'represented']
Sentence 282 rankings: 
 sentence:  He is portrayed by Mark Hamill .
mask_words  portrayed
cgBERT  ['voiced', 'played', 'directed', 'written', 'represented', 'created', 'illustrated', 'narrated', 'owned', 'produced']
Sentence 283 rankings: 
 sentence:  "Super Smash Bros. was developed by HAL Laboratory , a Nintendo second-party developer , during ."
mask_words  developed
cgBERT  ['created', 'designed', 'made', 'produced', 'built', 'conceived', 'published', 'released', 'invented', 'licensed']
Sentence 284 rankings: 
 sentence:  The northern and western edges of the Cotswolds are marked by steep escarpments down to the Severn valley and the Warwickshire Avon .
mask_words  escarpments
cgBERT  ['slopes', 'cliffs', 'drops', 'hills', 'declines', 'steps', 'terraces', 'foothills', 'descent', 'walls']
Sentence 285 rankings: 
 sentence:  "They renamed the place Alcante or Alcan

cgBERT  ['settlers', 'residents', 'ancestors', 'people', 'immigrants', 'families', 'generations', 'colonists', 'generation', 'pioneers']
Sentence 310 rankings: 
 sentence:  "Thousands of videos exist on YouTube of users showing the original video to their friends and taping their reactions , although some videos seem to be staged ."
mask_words  taping
cgBERT  ['recording', 'watching', 'discussing', 'documenting', 'showing', 'observing', 'sharing', 'seeing', 'filming', 'explaining']
Sentence 311 rankings: 
 sentence:  "However , on 13 April 2008 , before the completion of the initial five years , it was announced that Bryce was to become the next Governor-General of Australia ."
mask_words  initial
cgBERT  ['first', 'previous', 'next', 'planned', 'preceding', 'required', 'last', 'expected', 'original', 'following']
Sentence 312 rankings: 
 sentence:  "Kasem is best known by name as a music historian and disc jockey , most notably as host of the weekly American Top 40 radio program from 

cgBERT  [',', '-', '.', 'well', 'all', 'sexually', '—', '...', 'financially', 'properly']
Sentence 336 rankings: 
 sentence:  "Gumbasia , a 3 minute 34 second short film produced in 1953 , was the first clay animation produced by Art Clokey , who went on to create the classic series Gumby and Davy and Goliath using the same technique ."
mask_words  classic
cgBERT  ['television', 'tv', 'animated', 'cartoon', 'popular', 'pbs', '1960s', 'radio', 'comedy', 'nickelodeon']
Sentence 337 rankings: 
 sentence:  "Also , the sound of the regional language qawwali can be totally different from that of mainstream qawwali ."
mask_words  mainstream
cgBERT  ['standard', 'central', 'the', 'modern', 'local', 'southern', 'traditional', 'northern', 'ordinary', 'classical']
Sentence 338 rankings: 
 sentence:  Many people see Al Jazeera as a more trustworthy source of information than government and foreign channels .
mask_words  trustworthy
cgBERT  ['reliable', 'important', 'valuable', 'credible', 'powerfu

cgBERT  ['forbid', 'condemn', 'ban', 'outlaw', 'oppose', 'tolerate', 'reject', 'eliminate', 'abandon', 'prevent']
Sentence 360 rankings: 
 sentence:  "To reduce space and increase reading speed , virtually all Braille books are transcribed in what is known as Grade 2 Braille , which uses a system of contractions to reduce space and speed the process of reading ."
mask_words  reduce
cgBERT  ['save', 'minimize', 'conserve', 'eliminate', 'decrease', 'preserve', 'increase', 'use', 'cut', 'retain']
Sentence 361 rankings: 
 sentence:  This is a considerably higher total than that of any other European country .
mask_words  considerably
cgBERT  ['much', 'significantly', 'far', 'slightly', 'substantially', 'relatively', 'somewhat', 'rather', 'noticeably', 'notably']
Sentence 362 rankings: 
 sentence:  "Apples require cross-pollination between individuals by insects ; all are self-sterile , and self-pollination is impossible , making pollinating insects essential ."
mask_words  require
cgBERT  

cgBERT  ['controlled', 'operated', 'managed', 'distributed', 'run', 'held', 'acquired', 'funded', 'handled', 'licensed']
Sentence 387 rankings: 
 sentence:  "Alexander Stepanovich Popov was a Russian physicist who first demonstrated the practical application of electromagnetic waves , although he did not apply for a patent for his invention ."
mask_words  demonstrated
cgBERT  ['proposed', 'described', 'considered', 'suggested', 'investigated', 'discovered', 'studied', 'conceived', 'saw', 'developed']
Sentence 388 rankings: 
 sentence:  "In nature , there is a strong evolutionary pressure for animals to blend into their environment or conceal their shape ; for prey animals to avoid predators and for predators to be able to sneak up on prey ."
mask_words  conceal
cgBERT  ['change', 'maintain', 'alter', 'lose', 'keep', 'modify', 'retain', 'hide', 'mimic', 'shift']
Sentence 389 rankings: 
 sentence:  "In Switzerland , the constitutional ban on absinthe was repealed in 2000 during an overha

cgBERT  ['mythical', 'giant', 'monstrous', 'supposed', 'alleged', 'infamous', 'elusive', 'mythological', 'mysterious', 'notorious']
Sentence 412 rankings: 
 sentence:  "Typically , a fast shutter speed will require a larger aperture to ensure sufficient light exposure , and a slow shutter speed will require a smaller aperture to avoid excessive exposure ."
mask_words  avoid
cgBERT  ['prevent', 'ensure', 'limit', 'assure', 'eliminate', 'guarantee', 'allow', 'reduce', 'minimize', 'provide']
Sentence 413 rankings: 
 sentence:  "Railways now bypass the three major falls , and much of the trade of central Africa passes along the river , including copper , palm oil , sugar , coffee , and cotton ."
mask_words  bypass
cgBERT  ['cross', 'serve', 'connect', 'traverse', 'span', 'reach', 'cover', 'pass', 'service', 'follow']
Sentence 414 rankings: 
 sentence:  "Security-Enhanced Linux is a Linux feature that provides a mechanism for supporting access control security policies , including U.S. Depa

cgBERT  ['discomfort', 'blindness', 'confusion', 'delay', 'darkness', 'paralysis', 'relaxation', 'recovery', 'difficulty', 'pain']
Sentence 438 rankings: 
 sentence:  "Mary Toft , also spelled Tofts , was an English woman from Godalming , Surrey , who in 1726 became the subject of considerable controversy when she tricked doctors into believing that she had given birth to rabbits ."
mask_words  controversy
cgBERT  ['scandal', 'attention', 'notoriety', 'gossip', 'interest', 'publicity', 'sensation', 'curiosity', 'speculation', 'fame']
Sentence 439 rankings: 
 sentence:  "SDLP leader John Hume , MP , identified the possibility that a negotiated settlement might be possible and began secret talks with Adams in 1988 ."
mask_words  identified
cgBERT  ['recognised', 'saw', 'considered', 'recognized', 'acknowledged', 'realised', 'accepted', 'raised', 'suggested', 'investigated']
Sentence 440 rankings: 
 sentence:  "A brief stint on Go Kart Records saw the release of `` Fly the Flag '' , and n

cgBERT  ['occupied', 'captured', 'taken', 'held', 'attacked', 'sacked', 'seized', 'burned', 'recaptured', 'invaded']
Sentence 463 rankings: 
 sentence:  "One of the defining aspects of the Nintendo GameCube is the rejuvenated relationship between Nintendo and its licensees . Unlike previous generations in which Nintendo was seen by some as bullying its third-party game developers , Nintendo openly sought game-development aid on the Nintendo GameCube ."
mask_words  rejuvenated
cgBERT  ['close', 'complex', 'unique', 'direct', 'cooperative', 'special', 'working', 'ongoing', 'friendly', 'respectful']
Sentence 464 rankings: 
 sentence:  None of your watched items were edited in the time period displayed .
mask_words  edited
cgBERT  ['used', 'found', 'present', 'here', 'anywhere', 'located', 'available', 'included', 'actually', 'seen']
Sentence 465 rankings: 
 sentence:  "Originally made for the Apple II , they were later ported to other platforms ."
mask_words  ported
cgBERT  ['migrated', '

cgBERT  ['regarded', 'among', 'deemed', 'as', 'believed', 'recognized', 'amongst', 'called', 'thought', 'acknowledged']
Sentence 489 rankings: 
 sentence:  Harvey became a strong extratropical storm during the night on August 8 and survived another few days before dissipating northwest of the Azores on August 14 .
mask_words  survived
cgBERT  ['lasted', 'persisted', 'continued', 'lingered', 'stayed', 'spent', 'tracked', 'for', 'remained', 'strengthened']
Sentence 490 rankings: 
 sentence:  "In response to the weather system , the American National Weather Service issued wind watches for the Oregon Coast ."
mask_words  response
cgBERT  ['addition', 'relation', 'connection', 'reaction', 'regards', 'conjunction', 'contrast', 'regard', 'reference', 'accordance']
Sentence 491 rankings: 
 sentence:  It reached a peak position of number thirty-seven on the Billboard Hot Modern Rock Tracks chart .
mask_words  reached
cgBERT  ['attained', 'achieved', 'had', 'earned', 'made', 'hit', 'has', 'obta

INFO:__main__:***** Running evaluation *****
INFO:__main__:  Num examples = 500


cgBERT  ['parts', 'countries', 'areas', 'regions', 'nations', 'quarters', 'corners', 'cities', 'portions', 'aspects']
The score of evaluation for BERT candidate generation
0.906 0.253 0.19676466013376886 0.22136669874879691
The score of evaluation for full LS pipeline
0.694 0.652 0.958
35
第  3 次循环
prob_mask: 0.0
Sentence 0 rankings: 
 sentence:  "In March 1992 , Linux version 0.95 was the first to be capable of running X. This large version number jump was due to a feeling that a version 1.0 with no major missing pieces was imminent ."
mask_words  pieces
cgBERT  ['features', 'code', 'parts', 'functionality', 'feature', 'bugs', 'issues', 'components', 'files', 'elements']
Sentence 1 rankings: 
 sentence:  Much of the water carried by these streams is diverted .
mask_words  diverted
cgBERT  ['recycled', 'acidic', 'fresh', 'saline', 'agricultural', 'filtered', 'underground', 'wastewater', 'rain', 'groundwater']
Sentence 2 rankings: 
 sentence:  "Harry also becomes the worthy possessor of 

cgBERT  ['tail', 'cockpit', 'body', 'hull', 'nose', 'layout', 'chassis', 'platform', 'cabin', 'parts']
Sentence 26 rankings: 
 sentence:  "The last event was held on June 11 , 2000 , not to be held again due to the acquisition of WCW by World Wrestling Federation ."
mask_words  acquisition
cgBERT  ['purchase', 'takeover', 'absorption', 'replacement', 'merger', 'ownership', 'folding', 'dissolution', 'creation', 'defeat']
Sentence 27 rankings: 
 sentence:  Kowal suggested the name and the IAU endorsed it in 1975 .
mask_words  endorsed
cgBERT  ['adopted', 'approved', 'accepted', 'chose', 'used', 'selected', 'coined', 'registered', 'recognized', 'ratified']
Sentence 28 rankings: 
 sentence:  Dry air wrapping around the southern periphery of the cyclone eroded most of the deep convection by early on September 12 .
mask_words  periphery
cgBERT  ['edge', 'portion', 'end', 'half', 'tip', 'side', 'core', 'portions', 'part', 'edges']
Sentence 29 rankings: 
 sentence:  "Brief additional internal 

cgBERT  ['limited', 'many', 'special', 'advanced', 'extended', 'complex', 'powerful', 'extensive', 'various', 'multiple']
Sentence 51 rankings: 
 sentence:  This period spanned the years from 1278 through 1288 .
mask_words  spanned
cgBERT  ['covers', 'includes', 'encompasses', 'comprises', 'lists', 'discusses', 'concerns', 'contains', 'chronicles', 'describes']
Sentence 52 rankings: 
 sentence:  "Realising that the gang could not elude the police forever , Moondyne Joe formulated a plan to escape the colony by traveling overland to the colony of South Australia ."
mask_words  elude
cgBERT  ['evade', 'avoid', 'escape', 'resist', 'serve', 'fight', 'fool', 'withstand', 'dodge', 'survive']
Sentence 53 rankings: 
 sentence:  "Southeastern Oklahoma , also known by its official tourism designation , Kiamichi Country , encompasses the southeastern quarter of the state of Oklahoma ."
mask_words  encompasses
cgBERT  ['is', 'comprises', 'covers', 'occupies', 'includes', 'constitutes', 'represents

cgBERT  ['moved', 'removed', 'migrated', 'came', 'went', 'relocated', 'passed', 'was', 'from', 'returned']
Sentence 76 rankings: 
 sentence:  "Ohio State also announced in 2006 , that it would be designating at least $ 110 million of its research efforts to what it termed `` fundamental concerns '' such as research towards a cure for cancer , renewable energy sources and sustainable drinking water supplies ."
mask_words  fundamental
cgBERT  ['health', 'environmental', 'emerging', 'special', 'urgent', 'sustainability', 'societal', 'global', 'priority', 'key']
Sentence 77 rankings: 
 sentence:  "Long ago , there was a decision by the late former Prime Minister , Rafik Hariri , to restore the synagogue and surround it with a garden ."
mask_words  restore
cgBERT  ['expand', 'rebuild', 'buy', 'close', 'extend', 'purchase', 'save', 'preserve', 'build', 'remove']
Sentence 78 rankings: 
 sentence:  "Port Arthur was also the destination for juvenile convicts , receiving many boys , some as youn

cgBERT  ['completing', 'catching', 'throwing', 'passing', 'totaling', 'completed', 'receiving', 'recording', 'making', 'returning']
Sentence 102 rankings: 
 sentence:  "A carotenoid dye , crocin , allows saffron to impart a rich golden-yellow hue to dishes and textiles . Saffron has further medicinal applications ."
mask_words  golden-yellow
cgBERT  ['red', 'reddish', 'green', 'blue', 'yellow', 'purple', 'orange', 'pink', 'yellowish', 'greenish']
Sentence 103 rankings: 
 sentence:  The revival of lute in the 20th century revitalized the interest of composers in the instruments of the lute family .
mask_words  revival
cgBERT  ['resurgence', 'rise', 'popularity', 'development', 'history', 'invention', 'advent', 'renaissance', 'use', 'decline']
Sentence 104 rankings: 
 sentence:  Urepel is a commune in the PyrÌ©nÌ©es - Atlantiques department in south-western France .
mask_words  south-western
cgBERT  ['western', 'west', 'central', 'northwestern', 'northern', 'aquitaine', 'northwest', 'in'

cgBERT  ['was', 'worked', 'stayed', 'served', 'dealt', 'lived', 'continued', 'associated', 'lasted', 'collaborated']
Sentence 128 rankings: 
 sentence:  From 1908 to 1911 he was the principal conductor of the Vienna Hofoper succeeding Gustav Mahler ; he retained the conductorship of the Vienna Philharmonic until 1927 .
mask_words  retained
cgBERT  ['held', 'continued', 'kept', 'maintained', 'resumed', 'assumed', 'occupied', 'took', 'had', 'received']
Sentence 129 rankings: 
 sentence:  "Mexican cinnamon hot chocolate is traditionally served alongside a variety of Mexican pastries known as pan dulce and , as in Spain , churros ."
mask_words  traditionally


KeyboardInterrupt: 

In [None]:
import pygame
# 音频初始化
pygame.mixer.init()
# 加载音频文件路径 (路径必须真实存在，音频文件格式支持mp3/ogg等格式)
pygame.mixer.music.load(r'D:/1.mp3')
pygame.mixer.music.play()
