In [101]:
from torch.utils.data import Dataset, DataLoader
class e2eDataset(Dataset):
    def __init__(self, csv_file, tokenizer):
        """
        Args:
            csv_file (string): csv 파일의 경로
        """
        self.dataset = pd.read_csv(csv_file)
        self.columns = self.dataset.columns
        self.conditions = self.dataset[self.columns[0]]
        self.sentences = self.dataset[self.columns[1]]
        self.tokenizer = tokenizer
        
        self.typ_list = {}
        for k in range(len(self.conditions)):
            cond_set = self.conditions[k].split(',')
            for m in range(len(cond_set)):
                cond_set[m] = cond_set[m].strip()
                pos = cond_set[m].index('[')
                if cond_set[m][:pos] in self.typ_list.keys():
                    self.typ_list[cond_set[m][:pos]].add(cond_set[m][pos+1:-1])
                else:            
                    self.typ_list[cond_set[m][:pos]] = {cond_set[m][pos+1:-1]}        

    def __len__(self):
        return len(self.conditions)

    def __getitem__(self, idx):
        cond = self.conditions[idx]
        cond_set = cond.split(',')
        condition_string = ''
        for m in range(len(cond_set)):
            cond_set[m] = cond_set[m].strip()
            pos = cond_set[m].index('[')
            
            condition_string += '<' + cond_set[m][:pos] + '>' + cond_set[m][pos+1:-1] + ' '
        
        sen = self.sentences[idx]
        input_string = condition_string + '<START>'
        input_ids = torch.tensor(self.tokenizer.encode(input_string, add_special_tokens=True))
        
        input_len = len(input_ids)

        return input_ids, sen, condition_string


In [102]:
from model import *
my_model = mymodel().cuda()
my_model.eval()
my_model.load_state_dict(torch.load('./gen_model/base/2/model'))
print('ok') 

I0423 02:37:42.599551 140472973551424 tokenization_utils.py:504] loading file https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json from cache at /home/ds_user1/.cache/torch/transformers/f2808208f9bec2320371a9f5f891c184ae0b674ef866b79c58177067d15732dd.1512018be4ba4e8726e41b9145129dc30651ea4fec86aa61f4b9f40bf94eac71
I0423 02:37:42.600461 140472973551424 tokenization_utils.py:504] loading file https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt from cache at /home/ds_user1/.cache/torch/transformers/d629f792e430b3c76a1291bb2766b0a047e36fae0588f9dbc1ae51decdff691b.70bec105b4158ed9a1747fea67a43f5dee97855c64d62b6ec3742f4cfdb5feda
I0423 02:37:42.677000 140472973551424 tokenization_utils.py:663] Adding <START> to the vocabulary
I0423 02:37:42.677967 140472973551424 tokenization_utils.py:741] Assigning <START> to the bos_token key of the tokenizer
I0423 02:37:42.678765 140472973551424 tokenization_utils.py:663] Adding <name> to the vocabulary
I0423 02:37:42.679470 

ok


In [103]:
# e2e_dataset = e2eDataset(csv_file='dataset/testset_w_refs.csv', tokenizer=my_model.tokenizer)
# e2e_dataset = e2eDataset(csv_file='dataset/trainset.csv', tokenizer=my_model.tokenizer)
e2e_dataset = e2eDataset(csv_file='dataset/devset.csv', tokenizer=my_model.tokenizer)
dataloader = DataLoader(e2e_dataset, batch_size=1, shuffle=False, num_workers=4)   

In [119]:
# x = my_model.tokenizer.additional_special_tokens_ids
x = [50258, 50259, 50263, 50260, 50261, 50264, 50265, 50262]
print(x)
print(my_model.tokenizer.all_special_tokens)
for i in range(len(x)):
    print(my_model.tokenizer.decode([x[i]]))

[50258, 50259, 50263, 50260, 50261, 50264, 50265, 50262]
['<name>', '<|endoftext|>', '<area>', '<near>', '<START>', '<customer rating>', '<familyFriendly>', '<food>', '<eatType>', '<priceRange>']
<name>
<eatType>
<food>
<priceRange>
<customer rating>
<area>
<familyFriendly>
<near>


In [4]:
from tqdm import tqdm
same_condition = []
ref_sentences = []
input_ids_list = []
pre_condition_string = ''
start = 0
for i_batch, sample_batched in tqdm(enumerate(dataloader)):
    sen = sample_batched[1][0]
#     print(i_batch, sen)
    condition_string = sample_batched[2]  
    input_ids = sample_batched[0].squeeze(0).cuda()
        
    if start == 0 or condition_string == pre_condition_string:      
        if start == 0:
            input_ids_list.append(input_ids)
        same_condition.append(sen)        
        pre_condition_string = condition_string
        start += 1
    else:   
        input_ids_list.append(input_ids)
        ref_sentences.append(same_condition)
        pre_condition_string = condition_string
        same_condition = [sen]
        start += 1

#     if i_batch == 30:
#         break            
ref_sentences.append(same_condition)

4672it [00:03, 1311.04it/s]


In [5]:
len(ref_sentences), len(input_ids_list)
# input_ids_list[0]

(547, 547)

In [5]:
import time
import nltk
nltk.download('punkt')
from nltk import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
max_len = 50
start_time = time.time()
bleu_score = 0
bleu_1 = 0

f_dev = open('./predictions/base/f_dev_2.txt', 'w')
f_pred = open('./predictions/base/f_pred_2.txt', 'w')

for k in range(len(ref_sentences)):
    input_ids = input_ids_list[k]
    input_len = len(input_ids)

    ori_tokens = []
    for m in range(len(ref_sentences[k])):
        f_dev.write(ref_sentences[k][m]+'\n')
        ori_tokens.append(word_tokenize(ref_sentences[k][m]))
    if k < len(ref_sentences)-1:
        f_dev.write('\n')
    
    for _ in range(max_len):
        model_out = my_model.model_feeding(input_ids) # (batch, seq_len, emb_dim)
        pred_idx = model_out.argmax(1)[-1]        
        if pred_idx == my_model.tokenizer.eos_token_id:
            break            
        input_ids = torch.cat((input_ids, pred_idx.unsqueeze(0)), 0)        
    
    out_sen = my_model.tokenizer.decode(input_ids[input_len:])
    f_pred.write(out_sen+'\n')
    
#     print(ref_sentences[k])
#     print(out_sen)    
    
    out_tokens = word_tokenize(out_sen)
    
    bleu_1_score = sentence_bleu(ori_tokens, out_tokens, weights=(1, 0, 0, 0))
    bleu_2_score = sentence_bleu(ori_tokens, out_tokens, weights=(0.5, 0.5, 0, 0))
    bleu_3_score = sentence_bleu(ori_tokens, out_tokens, weights=(0.33, 0.33, 0.33, 0))
    bleu_4_score = sentence_bleu(ori_tokens, out_tokens, weights=(0.25, 0.25, 0.25, 0.25))
    
    bleu_1 += bleu_1_score

    bleu_score += min(1, len(out_tokens)/len(ori_tokens))*((bleu_1_score*bleu_2_score*bleu_3_score*bleu_4_score)**(0.25))    
    
#     print("time: {}".format(time.time()-start_time))
#     print('')
f_dev.close()
f_pred.close()
    
print("BLEU score: {}".format(bleu_score/len(ref_sentences)*100))
print("BLEU1 score: {}".format(bleu_1/len(ref_sentences)*100))

[nltk_data] Downloading package punkt to /home/ds_user1/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


BLEU score: 73.41162457057993
BLEU1 score: 89.30558590312106


In [13]:
BLEU score: 71.18825887456514
BLEU1 score: 88.76109911863252

./measure_scores.py ../predictions/base/f_dev_6.txt ../predictions/base/f_pred_6.txt
./measure_scores.py ../predictions/base2/f_dev_2.txt ../predictions/base2/f_pred_2.txt
./measure_scores.py ../predictions/large/f_dev_5.txt ../predictions/large/f_pred_5.txt
./measure_scores.py ../predictions/f_dev_2.txt ../predictions/large2/f_pred_2.txt

### Large
#### my_output_1
BLEU: 0.6826
NIST: 8.4243
METEOR: 0.4557
ROUGE_L: 0.7032
CIDEr: 2.1234

#### my_output_2
BLEU: 0.7228
NIST: 8.5241
METEOR: 0.4851
ROUGE_L: 0.7461
CIDEr: 2.4645

#### my_output_3
BLEU: 0.7035
NIST: 8.5937
METEOR: 0.4700
ROUGE_L: 0.7252
CIDEr: 2.3310

#### my_output_4
BLEU: 0.6738
NIST: 8.4018
METEOR: 0.4576
ROUGE_L: 0.7075
CIDEr: 2.2172

#### my_output_5
BLEU: 0.6927
NIST: 8.4429
METEOR: 0.4662
ROUGE_L: 0.7180
CIDEr: 2.2729

### base
#### my_output_2
BLEU: 0.6812
NIST: 8.5491
METEOR: 0.4442
ROUGE_L: 0.7036
CIDEr: 2.1261

#### my_output_6
BLEU: 0.6655
NIST: 8.4830
METEOR: 0.4475
ROUGE_L: 0.6992
CIDEr: 2.1077
    
#### my_output_final
BLEU: 0.6529
NIST: 8.3116
METEOR: 0.4430
ROUGE_L: 0.6842
CIDEr: 1.9766

### Pragmatically Informative Text Generation
BLEU 68.60
NIST 8.73
METEOR 45.25
R-L 70.82
CIDEr 2.37

## Total model inference

In [None]:
# from model_large import *
from model import *
from tqdm import tqdm
import time
import nltk
nltk.download('punkt')
from nltk import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
max_len = 70

my_model = mymodel().cuda()
my_model.eval()

for i in range(1, 9):
    model_name = './gen_model/base_devtrain_5/'+str(i)+'/model'
    my_model.load_state_dict(torch.load(model_name))
    print('ok') 
    if i == 1:
#         e2e_dataset = e2eDataset(csv_file='dataset/devset.csv', tokenizer=my_model.tokenizer)
        e2e_dataset = e2eDataset(csv_file='dataset/testset_w_refs.csv', tokenizer=my_model.tokenizer)
        dataloader = DataLoader(e2e_dataset, batch_size=1, shuffle=False, num_workers=4)
        same_condition = []
        ref_sentences = []
        input_ids_list = []
        pre_condition_string = ''
        start = 0
        for i_batch, sample_batched in tqdm(enumerate(dataloader)):
            sen = sample_batched[1][0]
            condition_string = sample_batched[2]  
            input_ids = sample_batched[0].squeeze(0).cuda()

            if start == 0 or condition_string == pre_condition_string:      
                if start == 0:
                    input_ids_list.append(input_ids)
                same_condition.append(sen)        
                pre_condition_string = condition_string
                start += 1
            else:   
                input_ids_list.append(input_ids)
                ref_sentences.append(same_condition)
                pre_condition_string = condition_string
                same_condition = [sen]
                start += 1
        ref_sentences.append(same_condition)    

    bleu_score = 0
    bleu_1 = 0

#     f_dev = open('./predictions/testset/large2/f_dev_'+str(i)+'.txt', 'w')
#     f_pred = open('./predictions/devset/base4/f_pred_'+str(i)+'.txt', 'w')
    f_pred = open('./predictions/testset/base5_dev/f_pred_'+str(i)+'.txt', 'w')

    for k in range(len(ref_sentences)):
        input_ids = input_ids_list[k]
        input_len = len(input_ids)

        ori_tokens = []
        for m in range(len(ref_sentences[k])):
#             f_dev.write(ref_sentences[k][m]+'\n')
            ori_tokens.append(word_tokenize(ref_sentences[k][m]))
#         if k < len(ref_sentences)-1:
#             f_dev.write('\n')

        for _ in range(max_len):
            model_out = my_model.model_feeding(input_ids) # (batch, seq_len, emb_dim)
            pred_idx = model_out.argmax(1)[-1]        
            if pred_idx == my_model.tokenizer.eos_token_id:
                break            
            input_ids = torch.cat((input_ids, pred_idx.unsqueeze(0)), 0)        

        out_sen = my_model.tokenizer.decode(input_ids[input_len:])
        f_pred.write(out_sen+'\n')

        out_tokens = word_tokenize(out_sen)

        bleu_1_score = sentence_bleu(ori_tokens, out_tokens, weights=(1, 0, 0, 0))
        bleu_2_score = sentence_bleu(ori_tokens, out_tokens, weights=(0.5, 0.5, 0, 0))
        bleu_3_score = sentence_bleu(ori_tokens, out_tokens, weights=(0.33, 0.33, 0.33, 0))
        bleu_4_score = sentence_bleu(ori_tokens, out_tokens, weights=(0.25, 0.25, 0.25, 0.25))

        bleu_1 += bleu_1_score

        bleu_score += min(1, len(out_tokens)/len(ori_tokens))*((bleu_1_score*bleu_2_score*bleu_3_score*bleu_4_score)**(0.25))    

#     f_dev.close()
    f_pred.close()
    
    print(i, "th model")
    print("BLEU score: {}".format(bleu_score/len(ref_sentences)*100))
    print("BLEU1 score: {}".format(bleu_1/len(ref_sentences)*100))    

## BERTscore

In [None]:
### BERT score with human reference
from bert_score import score
import glob
human_files = "/project/work/E2E/predictions/testset/f_dev.txt"

human_open = open(human_files, "r")
human_dataset = human_open.readlines()
human_open.close()

human_references = []

temp_reference = []
for i in range(len(human_dataset)):
    if human_dataset[i] == '\n':
        human_references.append(temp_reference)
        temp_reference = []
    else:
        temp_reference.append(human_dataset[i].strip())
human_references.append(temp_reference)
human_compare = []
for i in range(len(human_references)):
    for k in range(len(human_references[i])):
        human_compare.append(human_references[i][k])

output_path = "/project/work/E2E/predictions/testset/base2_sample10/*"
pred_files = glob.glob(output_path)

score_list = []
for i in range(len(pred_files)):    
    cands = []
    pred_data_open = open(pred_files[i], "r")
    pred_data_dataset = pred_data_open.readlines()
    pred_len = len(pred_data_dataset)
    pred_data_open.close()
    
    for k in range(len(pred_data_dataset)):
        out_sen = pred_data_dataset[k].strip()
        repeat_num = len(human_references[k])
        for _ in range(repeat_num):
            cands.append(out_sen)

    P, R, F1 = score(cands, human_compare, lang='en', verbose=True)
    F1_list=list(F1.numpy())
    BERT_score = sum(F1_list)/len(F1_list)
    
    score_list.append(BERT_score)    

In [80]:
pred_files, score_list

(['/project/work/E2E/predictions/testset/base2_sample10/f_pred_3.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_7.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_5.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_9.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_1.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_10.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_4.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_8.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_6.txt',
  '/project/work/E2E/predictions/testset/base2_sample10/f_pred_2.txt'],
 [0.9340897221415886,
  0.9375306307291542,
  0.9385130896945256,
  0.9396600867420175,
  0.896774981541596,
  0.9385862989438868,
  0.9347218730773291,
  0.9386929859872213,
  0.9381917336336107,
  0.9325849555993218])

In [None]:
### BERT score with human reference
import csv
from bert_score import score
import glob
human_files = "/project/work/E2E/predictions/testset/f_dev.txt"

human_open = open(human_files, "r")
human_dataset = human_open.readlines()
human_open.close()

human_references = []

temp_reference = []
for i in range(len(human_dataset)):
    if human_dataset[i] == '\n':
        human_references.append(temp_reference)
        temp_reference = []
    else:
        temp_reference.append(human_dataset[i].strip())
human_references.append(temp_reference)
human_compare = []
for i in range(len(human_references)):
    for k in range(len(human_references[i])):
        human_compare.append(human_references[i][k])

output_path = "/project/work/E2E/compared_system/system_outputs/primary_txt/*"
pred_files = glob.glob(output_path)

score_list = []
for i in range(len(pred_files)):    
    cands = []
    pred_data_open = open(pred_files[i], "r")
    pred_data_dataset = pred_data_open.readlines()
    pred_len = len(pred_data_dataset)
    pred_data_open.close()
    
    for k in range(len(pred_data_dataset)):
        out_sen = pred_data_dataset[k].strip()
        repeat_num = len(human_references[k])
        for _ in range(repeat_num):
            cands.append(out_sen)

    P, R, F1 = score(cands, human_compare, lang='en', verbose=True)
    F1_list=list(F1.numpy())
    BERT_score = sum(F1_list)/len(F1_list)
    
    score_list.append(BERT_score)  
print(pred_files, score_list)    

In [39]:
for i in range(len(pred_files)):
    print(pred_files[i], score_list[i])

/project/work/E2E/compared_system/system_outputs/primary_txt/adapt.txt 0.9224714525544057
/project/work/E2E/compared_system/system_outputs/primary_txt/forge3.txt 0.9277185035519018
/project/work/E2E/compared_system/system_outputs/primary_txt/tgen.txt 0.9391622960808789
/project/work/E2E/compared_system/system_outputs/primary_txt/chen.txt 0.9121247716430608
/project/work/E2E/compared_system/system_outputs/primary_txt/sheff2.txt 0.9337284921863148
/project/work/E2E/compared_system/system_outputs/primary_txt/tuda.txt 0.9388613972027826
/project/work/E2E/compared_system/system_outputs/primary_txt/forge1.txt 0.9296145912378093
/project/work/E2E/compared_system/system_outputs/primary_txt/dangnt.txt 0.9390964954615199
/project/work/E2E/compared_system/system_outputs/primary_txt/zhaw2.txt 0.9328836475052662
/project/work/E2E/compared_system/system_outputs/primary_txt/zhang.txt 0.931265755944889
/project/work/E2E/compared_system/system_outputs/primary_txt/tr2.txt 0.927562745489242
/project/work

In [None]:
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_1.txt
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_2.txt
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_3.txt
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_4.txt
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_5.txt
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_6.txt
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_7.txt
# !./e2e-metrics/measure_scores.py ./predictions/testset/f_dev.txt ./predictions/testset/base5_dev/f_pred_8.txt
# !./e2e-metrics/measure_scores.py ./predictions/joosung2/testset/f_dev.txt ./predictions/final/1base1_sample10_6.txt
# !./e2e-metrics/measure_scores.py ./predictions/joosung2/testset/f_dev.txt ./predictions/final/1base2_sample10_9.txt
# !./e2e-metrics/measure_scores.py ./predictions/joosung2/testset/f_dev.txt ./predictions/final/1base4_devtest_4.txt
# !./e2e-metrics/measure_scores.py ./predictions/joosung2/testset/f_dev.txt ./predictions/final/2base1_sample50_7.txt
!./e2e-metrics/measure_scores.py ./predictions/joosung2/testset/f_dev.txt /project/work/E2E/compared_system/system_outputs/primary_txt/harv.txt

./predictions/joosung2/testset/f_dev.txt /project/work/E2E/compared_system/system_outputs/primary_txt/adapt.txt None
Running MS-COCO evaluator...
creating index...
index created!
Loading and preparing results...     
DONE (t=0.00s)
creating index...
index created!
tokenization...
PTBTokenizer tokenized 129948 tokens at 550313.49 tokens per second.
PTBTokenizer tokenized 18044 tokens at 185180.01 tokens per second.
setting up scorers...
computing METEOR score...
METEOR: 0.402
computing Rouge score...
ROUGE_L: 0.587
computing CIDEr score...
CIDEr: 1.504
Creating temp directory  /tmp/e2e-eval-5ml836vl
Running MTEval to compute BLEU & NIST...


## Tsv to txt

In [35]:
import pandas as pd
output_path = "/project/work/E2E/compared_system/system_outputs/primary/*"
comapred_files = glob.glob(output_path)

for i in range(len(comapred_files)):
    dataset = pd.read_csv(comapred_files[i], delimiter='\t', header=None)
    
    name = comapred_files[i].split('/')[-1].split('.')[0]
    txt_files = "/project/work/E2E/compared_system/system_outputs/primary_txt/"+name+".txt"
    f = open(txt_files, "w")
    gen_sentences = dataset[1]
    
    for k in range(1, len(gen_sentences)):
        f.write(gen_sentences[k]+'\n')
    f.close()

In [36]:
len(gen_sentences)

631

## zero-shot test

In [1]:
from model import *

max_len = 70
my_model = mymodel().cuda()
my_model.eval()
model_name = './gen_model/base_devtrain_4/4/model'
my_model.load_state_dict(torch.load(model_name))

I0426 01:41:04.955722 140426112489280 file_utils.py:41] PyTorch version 1.2.0 available.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
I0426 01:41:08.670083 140426112489280 tokenization_utils.py:504] loading file https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json from cache at /home/ds_user1/.cache/torch/transformers/f2808208f9bec2320371a9f5f891c184ae0b674ef866b79c58177067d15732dd.1512018be4ba4e8726e41b91451

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=554.0, style=ProgressStyle(description_…

I0426 01:41:10.361212 140426112489280 file_utils.py:489] storing https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json in cache at /home/ds_user1/.cache/torch/transformers/4be02c5697d91738003fb1685c9872f284166aa32e061576bbe6aaeb95649fcf.4c1d7fc2ac6ddabeaf0c8bec2ffc7dc112f668f5871a06efcff113d2797ec7d5
I0426 01:41:10.362656 140426112489280 file_utils.py:492] creating metadata file for /home/ds_user1/.cache/torch/transformers/4be02c5697d91738003fb1685c9872f284166aa32e061576bbe6aaeb95649fcf.4c1d7fc2ac6ddabeaf0c8bec2ffc7dc112f668f5871a06efcff113d2797ec7d5
I0426 01:41:10.363654 140426112489280 filelock.py:318] Lock 140426033563408 released on /home/ds_user1/.cache/torch/transformers/4be02c5697d91738003fb1685c9872f284166aa32e061576bbe6aaeb95649fcf.4c1d7fc2ac6ddabeaf0c8bec2ffc7dc112f668f5871a06efcff113d2797ec7d5.lock
I0426 01:41:10.364660 140426112489280 configuration_utils.py:275] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json fr




I0426 01:41:11.145596 140426112489280 modeling_utils.py:503] loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin from cache at /home/ds_user1/.cache/torch/transformers/4295d67f022061768f4adc386234dbdb781c814c39662dd1662221c309962c55.778cf36f5c4e5d94c8cd9cefcf2a580c8643570eb327f0d4a1f007fab2acbdf1


<All keys matched successfully>

In [5]:
import random
import pandas as pd
csv_file='dataset/testset_w_refs.csv'
dataset = pd.read_csv(csv_file)
columns = dataset.columns
conditions = dataset[columns[0]]
        
typ_list = {}
for k in range(len(conditions)):
    cond_set = conditions[k].split(',')
    for m in range(len(cond_set)):
        cond_set[m] = cond_set[m].strip()
        pos = cond_set[m].index('[')
        if cond_set[m][:pos] in typ_list.keys():
            typ_list[cond_set[m][:pos]].add(cond_set[m][pos+1:-1])
        else:            
            typ_list[cond_set[m][:pos]] = {cond_set[m][pos+1:-1]}   

def sample_batch(tokenizer, cond_name, cond_set):
    condition_string = ''
    for m in range(len(cond_set)):
        condition_string += cond_name[m] + cond_set[m] + ' '

    input_string = condition_string + '<START>'
    input_ids = torch.tensor(tokenizer.encode(input_string, add_special_tokens=True))

    input_len = len(input_ids)

    return input_ids, condition_string

# <name> <eatType> <food> <priceRange> <customer rating> <area> <familyFriendly> <near>

name = '<NAME>'
eatType = '<EATTYPE>' # '<EATTYPE>'
food = None # '<FOOD>'
priceRange = None
customer_rating = None #'<CUSTOMER_RATING>'
area = '<AREA>'
familyFriendly = 'no'
near = '<NEAR>' # None

cond_list = []
conditions = []
if name is not None:
    placeholder_name = random.choice(list(typ_list['name']))
    cond_list.append('<name>')
    conditions.append(placeholder_name)
if eatType is not None:
    placeholder_eatType = random.choice(list(typ_list['eatType']))
    cond_list.append('<eatType>')
    conditions.append(placeholder_eatType)
if food is not None:
    placeholder_food = random.choice(list(typ_list['food']))
    cond_list.append('<food>')
    conditions.append(placeholder_food)
if priceRange is not None:
    placeholder_priceRange = random.choice(list(typ_list['priceRange']))
    cond_list.append('<priceRange>')
    conditions.append(placeholder_priceRange)    
if customer_rating is not None:
    placeholder_customer_rating = random.choice(list(typ_list['customer rating']))
    cond_list.append('<customer rating>')
    conditions.append(placeholder_customer_rating)        
if area is not None:
    placeholder_area = random.choice(list(typ_list['area']))
    cond_list.append('<area>')
    conditions.append(placeholder_area)    
if familyFriendly is not None:
    cond_list.append('<familyFriendly>')
    conditions.append(familyFriendly)            
if near is not None:
    placeholder_near = random.choice(list(typ_list['near']))
    cond_list.append('<near>')
    conditions.append(placeholder_near)        


# del cond_name[2]
# del conditions[2]

sample = sample_batch(my_model.tokenizer, cond_list, conditions)

input_ids = sample[0].cuda()
condition_string = sample[1]  
input_len = len(input_ids)

max_len = 70
for _ in range(max_len):
    model_out = my_model.model_feeding(input_ids) # (batch, seq_len, emb_dim)
    pred_idx = model_out.argmax(1)[-1]        
    if pred_idx == my_model.tokenizer.eos_token_id:
        break            
    input_ids = torch.cat((input_ids, pred_idx.unsqueeze(0)), 0)        

out_sen = my_model.tokenizer.decode(input_ids[input_len:])
print(cond_list)
print(conditions)
print(out_sen)

if name is not None:
    out_sen = out_sen.replace(placeholder_name, name)
if eatType is not None:
    out_sen = out_sen.replace(placeholder_eatType, eatType)
if food is not None:
    out_sen = out_sen.replace(placeholder_food, food)
if priceRange is not None:
    out_sen = out_sen.replace(placeholder_priceRange, priceRange)
if customer_rating is not None:
    out_sen = out_sen.replace(placeholder_customer_rating, customer_rating)
if area is not None:
    out_sen = out_sen.replace(placeholder_area, area)  
if near is not None:
    out_sen = out_sen.replace(placeholder_near, near)
print(out_sen)

['<name>', '<eatType>', '<area>', '<familyFriendly>', '<near>']
['Blue Spice', 'pub', 'city centre', 'no', 'Rainbow Vegetarian Café']
Blue Spice is a pub located in the city centre near Rainbow Vegetarian Café. It is not family-friendly.
<NAME> is a <EATTYPE> located in the <AREA> near <NEAR>. It is not family-friendly.


In [213]:
cond_list, conditions

(['<name>', '<food>', '<customer rating>', '<area>', '<familyFriendly>'],
 ['Clowns', 'Fast food', 'average', 'city centre', 'yes'])

## delexicalization

In [178]:
class e2eDataset(Dataset):
    def __init__(self, csv_file1, csv_file2, tokenizer):
        """
        Args:
            csv_file (string): csv 파일의 경로
        """
        self.dataset1 = pd.read_csv(csv_file1)
        self.dataset2 = pd.read_csv(csv_file2)
        
        self.columns1 = self.dataset1.columns
        self.columns2 = self.dataset2.columns
        
        self.conditions = list(self.dataset1[self.columns1[0]]) + list(self.dataset2[self.columns2[0]])
        self.sentences = list(self.dataset1[self.columns1[1]]) + list(self.dataset2[self.columns2[1]])
        self.tokenizer = tokenizer
        
        self.typ_list = {}
        for k in range(len(self.conditions)):
            cond_set = self.conditions[k].split(',')
            for m in range(len(cond_set)):
                cond_set[m] = cond_set[m].strip()
                pos = cond_set[m].index('[')
                if cond_set[m][:pos] in self.typ_list.keys():
                    self.typ_list[cond_set[m][:pos]].add(cond_set[m][pos+1:-1])
                else:            
                    self.typ_list[cond_set[m][:pos]] = {cond_set[m][pos+1:-1]}        

    def __len__(self):
        return len(self.conditions)

    def __getitem__(self, idx):
        sen = self.sentences[idx]
        
        cond = self.conditions[idx]
        cond_set = cond.split(',')        
        condition_string = ''
        
        
        p = random.random()

        if p > 0.3: # 70%
            for m in range(len(cond_set)):
                cond_set[m] = cond_set[m].strip()
                pos = cond_set[m].index('[')

                condition_string += '<' + cond_set[m][:pos] + '>' + cond_set[m][pos+1:-1] + ' '
        else: # p <= 0.3 / 30%
            nochange_list = ['priceRange', 'customer rating', 'familyFriendly']
            for m in range(len(cond_set)):
                cond_set[m] = cond_set[m].strip()
                
                pos = cond_set[m].index('[')
                if cond_set[m][:pos] not in nochange_list:
                    placeholder = '<' + cond_set[m][:pos] + '>'
                    condition_string += placeholder + ' '
                    sen = sen.replace(cond_set[m][pos+1:-1], placeholder)        
                else:
                    condition_string += '<' + cond_set[m][:pos] + '>' + cond_set[m][pos+1:-1] + ' '
        
        input_string = condition_string + '<START>' + sen
        input_ids = torch.tensor(self.tokenizer.encode(input_string, add_special_tokens=True))
        
        label_string = sen + ' <|endoftext|>'
        label_ids = torch.tensor(self.tokenizer.encode(label_string, add_special_tokens=True))

        return input_string, input_ids, label_ids    

In [189]:
e2e_dataset = e2eDataset(csv_file1='dataset/trainset.csv', csv_file2='dataset/devset.csv', tokenizer=my_model.tokenizer)
print(e2e_dataset[200][0])
# print(e2e_dataset.typ_list.keys())
# print(e2e_dataset.typ_list)
# priceRange, customer rating, familyFriendly

<name>Fitzbillies <priceRange>moderate <customer rating>1 out of 5 <familyFriendly>yes <near>Express by Holiday Inn <START>Fitzbillies is a kids friendly place located a few steps of the Express by Holiday Inn. Its prices are moderates, and it has  a poor rating  between its clients.


# check data

In [7]:
f_dev = open('f_dev.txt', 'r')
f_pred = open('f_pred.txt', 'r')
f_dev_dataset = f_dev.readlines()
f_pred_dataset = f_pred.readlines()

In [8]:
len(f_dev_dataset), len(f_pred_dataset)

(5218, 547)

In [9]:
data=[x for x in f_dev_dataset if x != '\n']
data2=[x for x in f_pred_dataset if x != '\n']
len(data), len(data2)

(4672, 547)

In [10]:
4672+546

5218

In [11]:
f_dev = open('./e2e-metrics/example-inputs/devel-conc.txt', 'r')
f_pred = open('./e2e-metrics/example-inputs/baseline-output.txt', 'r')
f_dev_dataset = f_dev.readlines()
f_pred_dataset = f_pred.readlines()

In [12]:
len(f_dev_dataset), len(f_pred_dataset)

(146, 10)

In [13]:
data=[x for x in f_dev_dataset if x != '\n']
data2=[x for x in f_pred_dataset if x != '\n']
len(data), len(data2)

(137, 10)

In [171]:
import random
p=random.random()
p

0.15699480814634292