In [1]:
# Lower the toxicity using a hate speech reward model, rewarding for low hate
# Using PPO and quantitative and qualitative evaluation
# trl will give access to PPO trainer
!pip install torch torchdata transformers datasets evaluate rouge_score peft trl --quiet

In [3]:
# AutoModelForSequenceClassification -- load our facebook sequence classifier // if hate speech or not
from datasets import load_dataset
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoTokenizer, GenerationConfig
from peft import PeftModel, PeftConfig, LoraConfig, TaskType

# transformers reinforcement learning - trl
from trl import PPOTrainer, PPOConfig, AutoModelForSeq2SeqLMWithValueHead #specific need for PPO
from trl import create_reference_model
from trl.core import LengthSampler #filter / sample from our data / first 5/12

import torch
import evaluate


import pandas as pd
import numpy as np

#tdqm lib makes the oops show a smart progress meter
from tqdm import tqdm
tqdm.pandas()

In [4]:
model_name= 'google/flan-t5-base'
ds_hf = "knkarthick/dialogsum"

original_ds = load_dataset(ds_hf)

original_ds

Found cached dataset csv (C:/Users/natalr2/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [42]:
def build_dataset(model_name,
                  dataset_name,
                  input_min_text_length,
                  input_max_text_length):
    
    #load dataset, in this case only train will be enough
    dataset = load_dataset(dataset_name, split = "train")
    
    #filter the dialogues of len between min and max
    dataset = dataset.filter(lambda x: len(x["dialogue"]) > input_min_text_length and len(x["dialogue"]) <= input_max_text_length)
    
    #prepare tokenize, device_map auto allows to switch between GPU and CPU automatically
    tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")
    
    def tokenize(sample):
        
        # wrap dialog with instruction
        prompt = f"""
Summarize the following conversation.

{sample["dialogue"]}

Summary:
"""
        sample["input_ids"] = tokenizer.encode(prompt)
        
        # this must be called "query", which is a requirement of PPO lib
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample
        
    
    #tokenize each dialogue
    dataset = dataset.map(tokenize, batched=False)
    dataset.set_format(type = "torch")
    
    dataset_splits = dataset.train_test_split(test_size = 0.2, shuffle = False, seed = 42)
    
    return dataset_splits

dataset = build_dataset(model_name=model_name,
                        dataset_name=ds_hf,
                        input_min_text_length= 200,
                        input_max_text_length = 1000 
                        
                       )

print(dataset)
    
    

Found cached dataset csv (C:/Users/natalr2/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Loading cached processed dataset at C:\Users\natalr2\.cache\huggingface\datasets\knkarthick___csv\knkarthick--dialogsum-cd36827d3490488d\0.0.0\6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1\cache-d904df43a047dc8b.arrow


Map:   0%|          | 0/10022 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'query'],
        num_rows: 8017
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'query'],
        num_rows: 2005
    })
})


In [43]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params} \npercentage of trainable model parameters: ({all_model_params}/{trainable_model_params})\n"


In [44]:
lora_config = LoraConfig(
    r=32, #rank -- high
    lora_alpha = 32,
    target_modules = ["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM #FLAN-T5
)

model = AutoModelForSeq2SeqLM.from_pretrained(model_name, 
                                              torch_dtype=torch.bfloat16)

peft_model = PeftModel.from_pretrained(model,
                                       "./peft-dialogue-summary-checkpoint-local/", 
                                        lora_config=lora_config,
                                        torch_dtype=torch.bfloat16,
                                        device_map="auto",
                                        is_trainable=True)


print(f' PEFT model parameters to be updated:\n{print_number_of_trainable_model_parameters(peft_model)}\n')


 PEFT model parameters to be updated:
trainable model parameters: 3538944
all model parameters: 251116800 
percentage of trainable model parameters: (251116800/3538944)




In [45]:
# preparing the model to fine-tune LLM using RL
#prepare PPO model, passing the PEFT model to ir, PPO will be used to optimze the RL policy against the reward model

ppo_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(peft_model, 
                                                                torch_dtype=torch.bfloat16,
                                                              is_trainable=True) # putting the model in fine tuning mode // # to generate summaries/predictions we set it to false

print(f' PPO model parameters to be updated (ValueHead + 769 params):\n{print_number_of_trainable_model_parameters(ppo_model)}\n')
print(ppo_model.v_head)

 PPO model parameters to be updated (ValueHead + 769 params):
trainable model parameters: 3539713
all model parameters: 251117569 
percentage of trainable model parameters: (251117569/3539713)


ValueHead(
  (dropout): Dropout(p=0.1, inplace=False)
  (summary): Linear(in_features=768, out_features=1, bias=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)


In [None]:
# 3539713 = 769 parameter more than the last model = 768 of the ValueHead + 1 that is our BIAS

In [46]:
ref_model = create_reference_model(ppo_model)
# kl divergence use to compare the original model to the ppo model
print(f'Reference model parameteres to be updated\n{print_number_of_trainable_model_parameters(ref_model)}\n')

Reference model parameteres to be updated
trainable model parameters: 0
all model parameters: 251117569 
percentage of trainable model parameters: (251117569/0)




In [17]:
#loading the toxicity model from facebook to identify hate speech
toxicity_model_name = "facebook/roberta-hate-speech-dynabench-r2-target"
toxicity_tokenizer = AutoTokenizer.from_pretrained(toxicity_model_name, device_map="auto")
toxicity_model = AutoModelForSequenceClassification.from_pretrained(toxicity_model_name, device_map="auto") #classifier with two labels, not hate and hate
print(toxicity_model.config.id2label)

Downloading tokenizer_config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/816 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

{0: 'nothate', 1: 'hate'}


In [47]:
non_toxic_text = "I want to kiss you"

toxicity_input_ids = toxicity_tokenizer(non_toxic_text, return_tensors="pt").input_ids

logits = toxicity_model(toxicity_input_ids).logits
print(f'logits [nothate, hate]:{logits.tolist()[0]}')

#prob not hate
probabilities = logits.softmax(dim=-1).tolist()[0]
print(f'prob [nothate, hate]:{probabilities}')

#get logits not hate - THIS IS THE REWARD
not_hate_index = 0 # in a lot of cases the 'positive' class could be the oposite
nothate_reward = (logits[:, not_hate_index]).tolist()
print(f'reward [high]:{nothate_reward}')

logits [nothate, hate]:[4.057403087615967, -3.959798812866211]
prob [nothate, hate]:[0.9996703863143921, 0.0003296325448900461]
reward [high]:[4.057403087615967]


In [48]:
toxic_text = "You are disgusting and terrible and fuck you"

toxicity_input_ids = toxicity_tokenizer(toxic_text, return_tensors="pt").input_ids

logits = toxicity_model(toxicity_input_ids).logits
print(f'logits [nothate, hate]:{logits.tolist()[0]}')

#prob not hate
probabilities = logits.softmax(dim=-1).tolist()[0]
print(f'prob [nothate, hate]:{probabilities}')

#get logits not hate - THIS IS THE REWARD
nothate_reward = (logits[:, not_hate_index]).tolist()
print(f'reward [low]:{nothate_reward}')

logits [nothate, hate]:[-0.6169748902320862, 0.4805370569229126]
prob [nothate, hate]:[0.2502063512802124, 0.7497936487197876]
reward [low]:[-0.6169748902320862]


In [49]:
device = 0 if torch.cuda.is_available() else "cpu"

sentiment_pipe = pipeline("sentiment-analysis",
                          model=toxicity_model_name,
                          device=device)

reward_logits_kwargs = {
    "top_k": None, #return all scores
    "function_to_apply": "none", # set to none to retrieve raw logits
    "batch_size": 16 
}

reward_probabilities_kwargs = {
    "top_k": None, #return all scores
    "function_to_apply": "Softmax", # set to none to retrieve raw logits
    "batch_size": 16 
}


print("Reward model output for non-toxic text:")
print(sentiment_pipe(non_toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(non_toxic_text, **reward_probabilities_kwargs))
print("Reward model output for toxic text:")
print(sentiment_pipe(toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(toxic_text, **reward_probabilities_kwargs))

Reward model output for non-toxic text:
[{'label': 'nothate', 'score': 4.057403087615967}, {'label': 'hate', 'score': -3.959798812866211}]
[{'label': 'nothate', 'score': 0.9996703863143921}, {'label': 'hate', 'score': 0.00032963251578621566}]
Reward model output for toxic text:
[{'label': 'hate', 'score': 0.4805370569229126}, {'label': 'nothate', 'score': -0.6169748902320862}]
[{'label': 'hate', 'score': 0.7497936487197876}, {'label': 'nothate', 'score': 0.2502063512802124}]


In [29]:
toxicity_evaluator = evaluate.load("toxicity",
                                   toxicity_model_name,
                                   module_type = "measurement",
                                   toxic_label = "hate")

Downloading builder script:   0%|          | 0.00/6.08k [00:00<?, ?B/s]

In [50]:
toxicity_score = toxicity_evaluator.compute(predictions=[non_toxic_text])

print("Toxicity score for non-toxic text:")
print(toxicity_score["toxicity"])


toxicity_score = toxicity_evaluator.compute(predictions=[toxic_text])

print("Toxicity score for toxic text:")
print(toxicity_score["toxicity"])


Toxicity score for non-toxic text:
[0.00032963251578621566]
Toxicity score for toxic text:
[0.7497936487197876]


In [53]:
# defining a function to evaluate and calculate the toxicity scores, means, stddev to further try to reduce toxicity


def evaluate_toxicity(model,
                      toxicity_evaluator,
                      tokenizer,
                      dataset,
                      num_samples):
    
    max_new_tokens = 100
    
    toxicities = []
    input_texts = []
    
    for i, sample in tqdm(enumerate(dataset)):
        input_text =  sample['query']
        
        if i > num_samples:
            break
            
        input_ids = tokenizer(input_text, return_tensors="pt", padding=True).input_ids
            
        generation_config = GenerationConfig(max_new_tokens=max_new_tokens,
                                                 top_k=0.0,
                                                 top_p=1.0,
                                                 do_sample=True)
            
        response_token_ids = model.generate(input_ids=input_ids,
                                                generation_config=generation_config)
            
        generated_text = tokenizer.decode(response_token_ids[0], skip_special_tokens=True)
            
        toxicity_score = toxicity_evaluator.compute(predictions=[(input_text + " " + generated_text)])
            
        toxicities.extend(toxicity_score["toxicity"])
            
        

    #compute mean and std np
    mean = np.mean(toxicities)
    std = np.std(toxicities)
    
    return mean, std

In [54]:
# the goal is to reduce the mean toxicity score

tokenizer = AutoTokenizer.from_pretrained(model_name, device_map = "auto")

mean_before_detoxification, std_before_detoxification = evaluate_toxicity(model=ref_model,
                                                                          toxicity_evaluator=toxicity_evaluator,
                                                                          tokenizer=tokenizer,
                                                                          dataset=dataset["test"],
                                                                          num_samples=10)

print(f' toxicity [mean, std] before detox: [{mean_before_detoxification}, {std_before_detoxification}]')

11it [00:26,  2.39s/it]

 toxicity [mean, std] before detox: [0.00034751372401263904, 0.00013464479365913763]





In [63]:
# inicitalize PPOT trainer
learning_rate=1.41e-5
max_ppo_epochs=1
mini_batch_size=4
batch_size=16

config = PPOConfig(
    model_name=model_name,
    learning_rate=learning_rate,
    ppo_epochs=max_ppo_epochs,
    mini_batch_size=mini_batch_size,
    batch_size=batch_size
)

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

#uncomment the lines to test collator
# test_data = [{"key1" : "value1", "key2":"value2", "key3":"valuee3"}]
#print(f'Collator input: {test_data}')
#print(f'Collator output: {collator(test_data)}')


ppo_trainer = PPOTrainer(config=config,
                         model=ppo_model,
                         ref_model=ref_model, # reference to KLdivergence
                         tokenizer=tokenizer,
                         dataset=dataset["train"],
                         data_collator=collator)
                         

In [69]:
# we don't want the KL divergence goes to high, in this case KLdiv trying to hacking

# in this code we are grabing each of the samples 

output_min_length = 100
output_max_length = 400
output_length_sampler = LengthSampler(output_min_length, output_max_length)

generation_kwargs = {
    "min_length": 5,
    "top_k": 0.0,
    "top_p":1.0,
    "do_sample":True
}

reward_kwargs = {
    "top_k": None, #return all scores
    "function_to_apply":"none", # want the raw logits without softmax.
    "batch_size": 16
}

max_ppo_steps = 10

# summarizing the text
for step, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    #break when you reach max_steps
    if step>= max_ppo_steps:
        break
        
    prompt_tensors = batch["input_ids"]
        
        # get response from flan-t5/peft llm
    summary_tensors = []
        
    for prompt_tensor in prompt_tensors:
        max_new_tokens = output_length_sampler()
            
        generation_kwargs["max_new_tokens"] = max_new_tokens
        summary = ppo_trainer.generate(prompt_tensor, **generation_kwargs)
            
        summary_tensors.append(summary.squeeze()[-max_new_tokens:])
            
    # this needs to be called response
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in summary_tensors]
  
    # using the sentiment pipeline / zipping query and response together and passing this pair to the pipeline / hate or not? / pulling out the response to our ppoTrainer
    #executing PPO training to minimize loss
    #not actually modifying all the model, just the 1,4% lora parameters that we can change
    #compute reward outputs
    query_response_pairs = [q + r for q, r in zip(batch["query"], batch["response"])]
    rewards = sentiment_pipe(query_response_pairs, **reward_kwargs)
        
    #you use nothate item because this is the score for the positive nothate class
    reward_tensors = [torch.tensor(reward[not_hate_index]["score"]) for reward in rewards]
        
    #run PPO step
    stats = ppo_trainer.step(prompt_tensors, summary_tensors, reward_tensors)
    ppo_trainer.log_stats(stats, batch, reward_tensors)

    print(f'objective/kl: {stats["objective/kl"]}')
    print(f'ppo/returns/mean/: {stats["ppo/returns/mean"]}')
    print(f'ppo/policy/advantages_mean: {stats["ppo/policy/advantages_mean"]}')        
    print('-'.join('' for x in range(100)))            

1it [04:01, 241.78s/it]

objective/kl: -0.039288848638534546
ppo/returns/mean/: 1.8147528171539307
ppo/policy/advantages_mean: 0.3819125294685364
---------------------------------------------------------------------------------------------------


2it [08:17, 250.05s/it]

objective/kl: -0.00917330663651228
ppo/returns/mean/: 1.2585248947143555
ppo/policy/advantages_mean: 0.26183825731277466
---------------------------------------------------------------------------------------------------


3it [12:46, 258.56s/it]

objective/kl: -0.03868161141872406
ppo/returns/mean/: 1.5983028411865234
ppo/policy/advantages_mean: 0.20214909315109253
---------------------------------------------------------------------------------------------------


4it [16:28, 244.06s/it]

objective/kl: 0.028587106615304947
ppo/returns/mean/: 1.6518182754516602
ppo/policy/advantages_mean: 0.3893478810787201
---------------------------------------------------------------------------------------------------


5it [20:13, 237.15s/it]

objective/kl: 0.028780454769730568
ppo/returns/mean/: 1.3318476676940918
ppo/policy/advantages_mean: 0.2121773362159729
---------------------------------------------------------------------------------------------------


6it [23:59, 233.57s/it]

objective/kl: 0.03412031754851341
ppo/returns/mean/: 1.452364206314087
ppo/policy/advantages_mean: 0.19378435611724854
---------------------------------------------------------------------------------------------------


7it [26:45, 211.50s/it]

objective/kl: 0.023303095251321793
ppo/returns/mean/: 2.1469554901123047
ppo/policy/advantages_mean: 0.039320722222328186
---------------------------------------------------------------------------------------------------


8it [29:40, 199.93s/it]

objective/kl: -0.004564889706671238
ppo/returns/mean/: 2.022526502609253
ppo/policy/advantages_mean: 0.147976815700531
---------------------------------------------------------------------------------------------------


9it [32:43, 194.50s/it]

objective/kl: -0.007140921428799629
ppo/returns/mean/: 1.6228461265563965
ppo/policy/advantages_mean: 0.0345914289355278
---------------------------------------------------------------------------------------------------


10it [35:59, 215.93s/it]

objective/kl: 0.0017532026395201683
ppo/returns/mean/: 1.8552656173706055
ppo/policy/advantages_mean: 0.10104025900363922
---------------------------------------------------------------------------------------------------





In [77]:
#evaluate the model quantitativally
batch_size = 20
compare_results = {}

df_batch = dataset["test"][0:batch_size]

compare_results["query"] = df_batch["query"]
prompt_tensors = df_batch["input_ids"]

summary_tensors_ref = []
summary_tensors = []

#get response from ppo and base model.
for i in tqdm(range(batch_size)):
    gen_len = output_length_sampler()
    generation_kwargs["max_new_tokens"] = gen_len
    
    summary = ref_model.generate(
        input_ids = torch.as_tensor(prompt_tensors[i]).unsqueeze(dim=0).to(device),
        **generation_kwargs
    ).squeeze()[-gen_len:]
    summary_tensors_ref.append(summary)
    
    summary = ppo_model.generate(
        input_ids = torch.as_tensor(prompt_tensors[i]).unsqueeze(dim=0).to(device),
        **generation_kwargs
    ).squeeze()[-gen_len:]
    summary_tensors.append(summary)
    
# Decode responses
compare_results["response_before"] =  [tokenizer.decode(summary_tensors_ref[i]) for i in range(batch_size)]
compare_results["response_after"] =  [tokenizer.decode(summary_tensors[i]) for i in range(batch_size)]

#sentiment analysis of query/response pairs before/after
texts_before = [d + s for d, s in zip(compare_results["query"], compare_results["response_before"])]
rewards_before = sentiment_pipe(texts_before, **reward_kwargs)
compare_results["reward_before"] = [reward[not_hate_index]["score"] for reward in rewards_before]


texts_after = [d + s for d, s in zip(compare_results["query"], compare_results["response_after"])]
rewards_after = sentiment_pipe(texts_before, **reward_kwargs)
compare_results["reward_after"] = [reward[not_hate_index]["score"] for reward in rewards_after]



100%|██████████| 20/20 [01:46<00:00,  5.32s/it]


In [78]:
pd.set_option('display.max_colwidth', 500)
df_compare_results = pd.DataFrame(compare_results)
df_compare_results["reward_diff"] = df_compare_results['reward_after'] - df_compare_results['reward_before']
df_compare_results_sorted = df_compare_results.sort_values(by=['reward_diff'], ascending=False).reset_index(drop=True)
df_compare_results_sorted

Unnamed: 0,query,response_before,response_after,reward_before,reward_after,reward_diff
0,"Summarize the following conversation. #Person1#: I would like to order some internet today. #Person2#: What kind would you like? #Person1#: What kind of internet is there? #Person2#: You can get DEL or dial-up. #Person1#: Which of those two is best? #Person2#: I would recommend DEL. #Person1#: So that one better? #Person2#: It's better because it doesn't tie up the phone. #Person1#: What do you mean by that? #Person2#: DEL isn't connected through your phone line, but dial-up is. #Person1#: S...",<pad> Commute your cell phone to your home.</s>,<pad> #Person1#: I need to buy some broadband latex. #Person2#: That's right</s>,3.03139,3.03139,0.0
1,"Summarize the following conversation. #Person1#: Judy, what is everybody talking about? #Person2#: Haven't you heard? Richard was fired by our manager. #Person1#: You're kidding. It can't be true. #Person2#: Believe it or not. Everybody is talking about it in the company. #Person1#: Really? I'm surprised. #Person2#: Me too. Summary: </s>","<pad> Judy, please believe everyone. Richard was fired by our manager.</s>",<pad> Judy and Classy about Judy's move.</s>,3.904954,3.904954,0.0
2,"Summarize the following conversation. #Person1#: Where shall I register, please? #Person2#: Here. Do you have a registration card? #Person1#: Yes. Here you are. #Person2#: Please register your information here and pay for it. And I'll make a medical record for you. #Person1#: OK. How much do I need to pay for the registration? #Person2#: Please pay ten yuan for the registration. #Person1#: Here is my money. #Person2#: This is your registration card. Please don't lose it and bring it whenever...","<pad> Dallas C. Park's clinic is located in China's Town Center shopping mall. taxi (town limo) service is now provided, but you can make a local day-trip from Guangzhou.</s>",<pad> and look. and sign in. and leave the prescription.</s>,3.943122,3.943122,0.0
3,"Summarize the following conversation. #Person1#: So how did you like the restaurant? #Person2#: Actually, it could have been better. #Person1#: What didn't you like about it? #Person2#: It is a new restaurant. I don't think they have their act together yet. #Person1#: What did you think about the food? #Person2#: I felt that the food was pretty mediocre. #Person1#: The service wasn't that great, either. #Person2#: I agree. The service was not good. #Person1#: Do you think that you want to tr...",<pad> To let you know what the express is like.</s>,<pad> Assuming food quality was poor and the service wasn't good person2 says... this is a new restaurant.</s>,3.387951,3.387951,0.0
4,"Summarize the following conversation. #Person1#: How much are you asking for this? #Person2#: I'm offering them to you at 150 yuan a piece. Is that all right? #Person1#: Is tax already included in their price? #Person2#: Yes. Our price can't be matched. #Person1#: Would you consider a volume discount? #Person2#: If you buy 1, 000 or more, you'll get a 10 % discount. #Person1#: I'll accept your offer. Summary: </s>",<pad> People at the Maa * change buttons to cm movement in 10n.</s>,<pad> The bill of 150 yuan a person is 1500 dollars.</s>,3.958113,3.958113,0.0
5,"Summarize the following conversation. #Person1#: I'd like to have this cashed, please. #Person2#: Please put you name and address here. May I see your passport? #Person1#: Yes. #Person2#: How would you like it? #Person1#: Ten hundreds and ten twenties, and the rest in small change, please. #Person2#: OK. Here you are. Summary: </s>",<pad> Talk to Device dealer</s>,<pad> Pict for $10 and saved at person1.</s>,3.289397,3.289397,0.0
6,"Summarize the following conversation. #Person1#: Here is the final draft of our contract. I'm glad that we have reached an agreement on almost every term in our trade. #Person2#: Yes, it seems to me we have come quite a long way. However, let me take a close look at the final draft. #Person1#: Do you have some points to bring up? #Person2#: Well, everything we've discussed seems to be here. #Person1#: Yes, including a description of the shirts you want to purchase this time, the total amount...",<pad> #Person1#: See the final draft of our contract (#1371).</s>,"<pad> ---- My pleasure, Mr. Keith.</s>",4.364748,4.364748,0.0
7,"Summarize the following conversation. #Person1#: Could you help me figure out how to look for a job? #Person2#: We have lots of options, what type of job do you need? #Person1#: I want to work in an office. #Person2#: Do you want to work part-time or full-time? #Person1#: I want to work full-time. #Person2#: We have binders with local job listings or you can make use of the computers. OK? #Person1#: I am confused a bit but I am sure that I can figure it out. #Person2#: If you make an appoint...",<pad> Find jobs in the job center.</s>,"<pad> The characters are asked to pick a job. The job centers have hundreds of job job postings, all right?</s>",3.839757,3.839757,0.0
8,"Summarize the following conversation. #Person1#: It smells like an ashtray in here! #Person2#: Hi honey! What's wrong? Why do you have that look on your face? #Person1#: What's wrong? I thought we agreed that you were gonna quit smoking. #Person2#: No! I said I was going to cut down which is very different. You can't just expect me to go cold turkey overnight! #Person1#: Look, there are other ways to quit. You can try the nicotine patch, or nicotine chewing gum. We spend a fortune on cigaret...","<pad> #Person1: Have you talked to your girlfriend this morning? She said, ""I am having trouble with alcoholamps."" He is then annoyed for smoking in public. He says there are other ways, like nicotine patches or nicotine chewing gum. He is simply sorting through the smoke and this lady became nauseated. #Person1: Sorry honey too, honey. You're still smoking!!</s>",<pad> The two people are talking about approval.</s>,2.772885,2.772885,0.0
9,"Summarize the following conversation. #Person1#: Mom, I just finished my paper. Can you proofread it before I hand it in? #Person2#: Sure, let's take a look. Sweetie, this is terrific. Your ideas are so original. #Person1#: Thanks. #Person2#: I can tell you worked hard on it. #Person1#: I really did! I started thinking about what I wanted to say three weeks ago. #Person2#: Well, it was definitely worth all the time. #Person1#: Let's just hope my teacher agrees. Summary: </s>","<pad> Person1 needs a check to make sure she prepared good, original papers. They should be translated well into English.</s>",<pad> Hopefully it will be perfect soon.</s>,3.830252,3.830252,0.0
