# eval

In [None]:
#!/usr/bin/env python3
import nlp
from datasets import load_dataset
from transformers import BertTokenizer, EncoderDecoderModel
tokenizer = BertTokenizer.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
model.to("cuda")
test_dataset = nlp.load_dataset("cnn_dailymail", "3.0.0", split="test")
# test_dataset = load_dataset('cnn_dailymail', "3.0.0", split='test', ignore_verifications=True)

batch_size = 128
# map data correctly
def generate_summary(batch):
    # Tokenizer will automatically set [BOS] <text> [EOS]
    # cut off at BERT max length 512
    inputs = tokenizer(batch["article"], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
    input_ids = inputs.input_ids.to("cuda")
    attention_mask = inputs.attention_mask.to("cuda")
    outputs = model.generate(input_ids, attention_mask=attention_mask)
    # all special tokens including will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    batch["pred"] = output_str
    return batch
results = test_dataset.map(generate_summary, batched=True, batch_size=batch_size, remove_columns=["article"])
# load rouge for validation
rouge = nlp.load_metric("rouge")
pred_str = results["pred"]
label_str = results["highlights"]
rouge_output = rouge.compute(predictions=pred_str, references=label_str, rouge_types=["rouge2"])["rouge2"].mid
print(rouge_output)

# error case analysis

In [4]:
from transformers import EncoderDecoderConfig, EncoderDecoderModel, AutoTokenizer

encoder_decoder_config = EncoderDecoderConfig.from_pretrained('./models/bert2bert_share/ios0_naemb_1e4_preTweetcopy_tweetnorm/2/checkpoint-5000')
bert2bert_model = EncoderDecoderModel.from_pretrained('./models/bert2bert_share/ios0_naemb_1e4_preTweetcopy_tweetnorm/2/checkpoint-5000', config=encoder_decoder_config)
encoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
decoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")


In [5]:
import pandas as pd

PATH_TO_TRAIN_DATA = "WNUT2015_dataset/train_data.json"
PATH_TO_VAL_DATA = "WNUT2015_dataset/test_truth.json"

#-------------------------------------------------------------------------------------------
# load train and validation data
train_dataset = pd.read_json(PATH_TO_TRAIN_DATA, orient="records")
val_dataset = pd.read_json(PATH_TO_VAL_DATA, orient="records")
#---------------------------------
#do some normalization ourselves
def norm(token):
    if token.lower().startswith("@"):
        return "username"
    elif token.lower().startswith('#'):
        return "hashtag"
    elif token.lower().startswith("http") or token.lower().startswith("www"):
        return "httpurl"
    else:
        return token.replace("’", "'").replace("…", "...")

def pre_pocessing_input(x):
    result = []
    token_count = 0
    for item in x:
        item = norm(item)
        #the reason for encode+decode is "?!!" need to be "? ! !"
        input_ids = encoder_tokenizer(item).input_ids
        item = encoder_tokenizer.decode(input_ids, skip_special_tokens=True)
        result.append(item)
        token_count += 1
    return result

def pre_pocessing_output(x):
    result = []
    token_count = 0
    for item in x:
        item = norm(item)
        #the reason for encode+decode is "?!!" need to be "? ! !"
        input_ids = decoder_tokenizer(item).input_ids
        item = decoder_tokenizer.decode(input_ids, skip_special_tokens=True)
        result.append(item)
        token_count += 1
    return result

train_dataset['input'] = train_dataset['input'].apply(pre_pocessing_input)
train_dataset['output'] = train_dataset['output'].apply(pre_pocessing_output)
val_dataset['input'] = val_dataset['input'].apply(pre_pocessing_input)
val_dataset['output'] = val_dataset['output'].apply(pre_pocessing_output)
#-------------------------------------------
#make sentence for token list
make_sentence = lambda x : " ".join(x).lower()

train_dataset['input_sentence'] = train_dataset['input'].apply(make_sentence)
train_dataset['output_sentence'] = train_dataset['output'].apply(make_sentence)
val_dataset['input_sentence'] = val_dataset['input'].apply(make_sentence)
val_dataset['output_sentence'] = val_dataset['output'].apply(make_sentence)

In [7]:
tweet_text = "USERNAME WE cannot DOIN A FUCKEN MOVIE , it's so bad . lol. gonna to be better HTTPURL"
#tweet_text = train_dataset['input_sentence'].iloc[2]

input_ids = encoder_tokenizer(tweet_text, return_tensors="pt").input_ids
output_ids = bert2bert_model.generate(input_ids)

print("tweet: ", tweet_text)
print("model: ", decoder_tokenizer.convert_ids_to_tokens(output_ids[0], skip_special_tokens=True))
print("model: ", decoder_tokenizer.decode(output_ids[0], skip_special_tokens=True))
#print("model: ", " ".join(nltk_tknzr.tokenize(bert_tokenizer.decode(output_ids[0], skip_special_tokens=True))))

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  USERNAME WE cannot DOIN A FUCKEN MOVIE , it's so bad . lol. gonna to be better HTTPURL
model:  ['user', '##name', 'we', 'cannot', 'doing', 'a', 'fuck', '##en', 'movie', ',', 'it', "'", 's', 'so', 'bad', '.', 'laughing', 'out', 'loud', '.', 'gonna', 'to', 'be', 'better', 'http', '##ur', '##l']
model:  username we cannot doing a fucken movie , it's so bad . laughing out loud . gonna to be better httpurl


In [9]:
tweet_text = "USERNAME WE cannot DOIN A FUCKEN MOVIE , it's so bad . lol. gonna to be better HTTPURL"
#tweet_text = train_dataset['input_sentence'].iloc[2]

input_ids = encoder_tokenizer(tweet_text, return_tensors="pt").input_ids
output_ids = bert2bert_model.generate(input_ids)

print("tweet: ", tweet_text)
print("model: ", decoder_tokenizer.decode(output_ids[0], skip_special_tokens=True))
#print("model: ", " ".join(nltk_tknzr.tokenize(bert_tokenizer.decode(output_ids[0], skip_special_tokens=True))))

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  USERNAME WE cannot DOIN A FUCKEN MOVIE , it's so bad . lol. gonna to be better HTTPURL
model:  username we cannot doing a fucken movie , it's so bad . laughing out loud . gonna to be better httpurl


In [11]:
for i in range(0, 20):
#     print("example"+str(i))
    tweet_text = val_dataset.iloc[i]['input_sentence']
    normal_english = val_dataset.iloc[i]['output_sentence'].lower()

    input_ids = encoder_tokenizer(tweet_text, return_tensors="pt").input_ids
    output_ids = bert2bert_model.generate(input_ids)

    print("tweet: ", tweet_text)
    print("model: ", decoder_tokenizer.decode(output_ids[0], skip_special_tokens=True))
    print("label: ", normal_english)

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  username yeh but still that's wild lol
model:  username yeah but still that's wild laughing out loud
label:  username yeah but still that's wild laughing out loud


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  dick in janice , im poppin xanax and speakin spanish .
model:  dick in janice , i'm popping xanax and speaking french .
label:  dick in janice , i'm popping xanax and speaking spanish .


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  ucsb i fear the next rampage will b cuz media told us everything hashtag ever diid . to get ratings . hashtag hashtag hashtag
model:  ucsb i fear the next move will be because they got us everything hashtag ever diid . to get rating . hashtag hashtag
label:  ucsb i fear the next rampage will because  media told us everything hashtag ever did . to get ratings . hashtag hashtag hashtag


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  rt username : username username username not even gays are gonna look to u lmao
model:  rt username : username username kidname not even gays are gonna look to you laughing my ass off
label:  rt username : username username username not even gays are gonna look to you laughing my ass off


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  username username username card in 5 mins is with wh but this was the game so far bk lol but how to find who the ref is
model:  username username fanname card in 5 minutes is with what but this was the game so far bk laughing out loud but how to find who the ref is
label:  username username username card in 5 minutes is with wh but this was the game so far bk laughing out loud but how to find who the ref is


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  username haha oh okk ! yeaah , they've been dating for a while now ! ! they seem crazy cute ! ! !
model:  username haha oh okk ! yeaah , they've been dating for a while now ! ! they seems crazy cute ! ! !
label:  username haha oh okay ! yeah , they've been dating for a while now ! ! they seem crazy cute ! ! !


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  samsung working on oculus rift - like vr headsets for galaxy devices : report - firstpost httpurl
model:  galaxy working on oculus rift - like vr headset for galaxy device : report - firstpost httpurl
label:  samsung working on oculus rift - like vr headsets for galaxy devices : report - firstpost httpurl


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  rt username : best snapchat i've seen all day lmfao httpurl
model:  rt username : best snapchat i've seen all day laughing my fucking ass off httpurl
label:  rt username : best snapchat i've seen all day laughing my fucking ass off httpurl


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  legends indeed rt username : feela sistah spoken word collective . . . . . . . . . . . . hashtag httpurl
model:  legend indeed rt username : feela sistah speaking word . . . ! . . httpurl . . hashtag httpurls
label:  legends indeed rt username : feels sistah spoken word collective . . . . . . . . . . . . hashtag httpurl


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  thisnewssoccer : adam lallana pilih tottenham hotspur ketimbang liverpool httpurl hashtag
model:  thisnewssoccer : adam lalana pilih tottenham spurs ketimbang liverpool httpurl hashtag
label:  this news soccer : adam lallana pilih tottenham hotspur ketimbang liverpool httpurl hashtag


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  enter to hashtag a prize pack of lindor truffles and lindor sticks ( arcv $ 52 ) from lindt & username ! hashtag httpurl
model:  join to hashtag a prize pack of pindor truf elections and pindor sticks ( capv $ 56 ) from pin et etung & hashtag httpurl
label:  enter to hashtag a prize pack of lindor truffles and lindor sticks ( arcv $ 52 ) from lindt & username ! hashtag httpurl


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  httpurl hashtag " . . . hashtag is the result of an aggressive mind . " . pema chodron
model:  httpurl hashtag " . . . hashtag is the result of an offensive mind . " . pema chodron
label:  httpurl hashtag " . . . hashtag is the result of an aggressive mind . " . pema chodron


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  rt username : we r doin a fucken movie with lionsgate how did we even get here from barking on a train bahahahaha
model:  rt username : we are doing a fucken movie with lionsgate how did we even get here from barking on a train bahahahaha
label:  rt username : we are doing a fucking movie with lionsgate how did we even get here from barking on a train bahahahaha


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  lol . don't act cute here pls
model:  laughing out loud . don't act cute here please
label:  laughing out loud . don't act cute here please


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  rt username : idek why i bother . . . smh
model:  rt username : i don't know why i bothered . . . shaking my head
label:  rt username : i don't even know why i bother . . . shaking my head


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  rt username : [ hq ] 140526 xiumin , luhan username mbc idol futsal championship ( cr . shade of the bloom ) httpurl
model:  rt username : [ hq ] 140526 xiumin , luhan username mbc idol futsal championship ( cr . shade of the bloom ) httpurl
label:  rt username : [ hq ] 140526 xiumin , luhan username mbc idol futsal championship ( cr . shade of the bloom ) httpurl


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  username ayy skiperooo wassup cuz what u mean by marlton ?
model:  username ayy skiperooo was what you mean by marlton ?
label:  username ayy skiperooo what's up because what you mean by marlton ?


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  rt username : sean , darragh and i , in croke park ! unbelievable weekend ! httpurl
model:  rt username : sean , darragh and i , in croke park ! unbelievable weekend ! httpurl
label:  rt username : sean , darragh and i , in croke park ! unbelievable weekend ! httpurl


Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence


tweet:  username umm i thought that convo was pretty funny soooooo
model:  username umm i thought that conversation was pretty funny so
label:  username umm i thought that conversation was pretty funny so
tweet:  stop be so fucken gay you three aint nobody got time for that username username username
model:  stop be so fucking gay you three ain't nobody got time for that username username
label:  stop be so fucking gay you three ain't nobody got time for that username username username


In [16]:
src = []
target = []
pred = []
for i in range(0, val_dataset.shape[0]):
#     print("example"+str(i))
    tweet_text = val_dataset.iloc[i]['input_sentence']
    normal_english = val_dataset.iloc[i]['output_sentence'].lower()

    input_ids = encoder_tokenizer(tweet_text, return_tensors="pt").input_ids
    output_ids = bert2bert_model.generate(input_ids)

    src.append(tweet_text)
    pred.append(decoder_tokenizer.decode(output_ids[0], skip_special_tokens=True))
    target.append(normal_english)

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

In [17]:
d = {'src': src, 'target': target, 'model_pred': pred}
eval_df = pd.DataFrame(data=d)

In [18]:
eval_df.to_csv('tweetnorm_eval_result.csv')

In [19]:
train_src = []
train_target = []
train_pred = []
for i in range(0, train_dataset.shape[0]):
#     print("example"+str(i))
    tweet_text = train_dataset.iloc[i]['input_sentence']
    normal_english = train_dataset.iloc[i]['output_sentence'].lower()

    input_ids = encoder_tokenizer(tweet_text, return_tensors="pt").input_ids
    output_ids = bert2bert_model.generate(input_ids)

    train_src.append(tweet_text)
    train_pred.append(decoder_tokenizer.decode(output_ids[0], skip_special_tokens=True))
    train_target.append(normal_english)

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 102 (first `

In [20]:
d = {'src': train_src, 'target': train_target, 'model_pred': train_pred}
train_df = pd.DataFrame(data=d)

In [21]:
train_df.to_csv('tweetnorm_train_result.csv')

In [25]:
train_df

Unnamed: 0,src,target,model_pred
0,"rt username : "" ah . . . username : why sub oz...","rt username : "" ah . . . username : why sub oz...","rt username : "" ah . . . username ; why sub oz..."
1,u have a very sexy header username rawr,you have a very sexy header username rawr,you have a very sexy header username rawr
2,i miss u my bie ! where u wanna out wif me ? h...,i miss you my bie ! where you wanna out with m...,i miss you my bie ! where you wanna out with m...
3,""" cantik . rt username : julie christie , 1968...",""" cantik . rt username : julie christie , 1968...",""" cantik . rt username : julie christie , 1968..."
4,rt username : did calum slip ? ! ! omfg,rt username : did calum slip ? ! ! oh my fucki...,rt username : did calum slip ? ! ! oh my fucki...
...,...,...,...
2945,rt username : that was a dope fight hashtag rt...,rt username : that was a dope fight hashtag rt...,rt username : that was a dope fight hashtag rt...
2946,upload success ! ! ! apink - mr . chu ( dance ...,upload success ! ! ! apink - mr . chu ( dance ...,upload success ! ! ! apink - mr . chu ( dance ...
2947,handing out the jaggery for my wada special at...,handing out the jaggery for my water special a...,handing out the jaggery for my water special a...
2948,"you ain't fuckin you ain't suckin , what chu d...","you ain't fucking you ain't sucking , what you...","you ain't fucking you ain t sucking , what you..."
