Install dependecies

In [1]:
!pip install transformers[torch] datasets==2.10.0 evaluate rouge_score -q
!pip install git+https://github.com/google-research/bleurt.git -q
!pip install bert_score -q
!pip install demoji -q

  Preparing metadata (setup.py) ... [?25l[?25hdone


Imports

In [2]:
from datasets import load_dataset
import json
from transformers import DataCollatorForSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
import evaluate
import numpy as np
import torch
import demoji

Load and preprocess data. Remove emojis, file embeddings and \r, \n.

In [13]:
path = "drive/MyDrive/NLP/PR/"
dataset = load_dataset("json", data_files={"train":[path+"samsum/train.json"], "val":[path+"samsum/val.json"], "test":[path+"samsum/test.json"]})



  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
def preprocess(examples):
  processed = []
  out = {}
  # print(examples)
  for k, v in examples.items():
    if k!="id":
      for x in v:
        processed.append(demoji.replace(x.replace("\r","").replace("\n"," "), ''))
      out[k] = processed
      processed=[]
  return out



# def give_emoji_free_text(text):
#     return demoji.replace(text, '')

dataset = dataset.map(preprocess, batched=True)



Load model and tokenizer

In [5]:
checkpoint = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

tokenize and create batches of data 

In [6]:
def batchify(examples):
  dialogues = ["summarize: "+x for x in examples["dialogue"]]
  data = tokenizer(dialogues, max_length=256, truncation=True, padding="max_length")
  labels = tokenizer(text_target=examples["summary"], max_length=64, truncation=True, padding="max_length")
  data["labels"] = labels["input_ids"]
  return data

In [7]:
dataset = dataset.map(batchify, batched=True, remove_columns=["id","dialogue","summary"])
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)



Map:   0%|          | 0/819 [00:00<?, ? examples/s]

Define evaluation metrics

In [8]:
rouge = evaluate.load("rouge")
bleu = evaluate.load('bleu')
bleurt = evaluate.load('bleurt', module_type='metric', checkpoint='bleurt-base-512')
bert = evaluate.load('bertscore', lang="en", idf=True)



Computing checksums: 100%|##########| 1/1 [00:06<00:00,  6.67s/it]

Utility function to compute the desired metrics

In [9]:
# def preprocess_logits_for_metrics(preds, labels):
#     preds = preds.argmax(-1).squeeze()
#     return preds

def compute_metrics(preds):
    predictions, labels = preds
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    print(decoded_preds, decoded_labels)

    rouge_score = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    bleu_score = bleu.compute(predictions=decoded_preds, references=decoded_labels)
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    bleurt_score = bleurt.compute(predictions=decoded_preds, references=decoded_labels)
    bert_score = bert.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
    result = {
        "gen_len": np.mean(prediction_lens),
        "rouge_score": rouge_score,
        "bleu_score": bleu_score,
        "bleurt_score":np.median(bleurt_score["scores"]),
        "bert_score":[np.mean(np.array(bert_score[k], dtype=np.float32)) for k in ["precision","recall","f1"]]
    }

    return result

Training loop

In [10]:
device = "cuda" if torch.cuda.is_available() else "cpu"
training_args = Seq2SeqTrainingArguments(
    output_dir="flant5_sum_samsum",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    # eval_steps=1,
    learning_rate=2e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    save_strategy="no",
    logging_strategy="epoch",
    num_train_epochs=10,
    predict_with_generate=True,
    fp16=True,
    push_to_hub=True,
    disable_tqdm=False,
    hub_strategy="end",
    push_to_hub_token="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"
)

model = model.to(device)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["val"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    # preprocess_logits_for_metrics=preprocess_logits_for_metrics
)

trainer.train()

/content/flant5_sum_samsum is already a clone of https://huggingface.co/sentientconch/flant5_sum_samsum. Make sure you pull the latest changes with `repo.git_pull()`.
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Gen Len,Rouge Score,Bleu Score,Bleurt Score,Bert Score
1,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
2,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
3,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
4,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
5,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
6,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
7,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
8,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
9,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"
10,0.0,,16.676039,"{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}","{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}",-0.486309,"[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]"


['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation o

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

['B: Hi Tom, are you busy tomorrow afternoon?', 'Emma wants to buy an advent calendar for her kids. Rob used to get one every year as', 'Madison is pregnant. Iggy and Jackie are worried about it. Iggy used to think that', 'Marla finds a pink underwear under her bed.', 'Robert needs to buy guitar cable. Fred will find it on google maps.', 'Keith wants to buy milk and cereals. Megan will check the drawer next to the fridge', 'Samantha and Evelyn are surprised that Samatha is making that noise.', "Tom's new place is in Fiesole. He invited Luis and Adam for dinner", 'Jane wants to make a reservation for 6 people tonight at 21:00. Vegano Resto', "Nancy's Texan drawl. Kids behaving themselves. Nancy is coming home", 'Laura needs a new printer. Jamie recommends Laura to buy a second hand one.', "Haylee is in the dairy section. She's looking for the coconut milk youg", 'Norbert and Wendy need to hurry to catch the tour. Norbert missed the last one because', 'Cheryl went to the Jandia Peninsula

Trainer is attempting to log a value of "{'rouge1': 0.4648609117501229, 'rouge2': 0.23489748856950105, 'rougeL': 0.3936027885754436, 'rougeLsum': 0.3932448622689456}" of type <class 'dict'> for key "eval/rouge_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'bleu': 0.12048170853922512, 'precisions': [0.5838656689176857, 0.28994082840236685, 0.17667882428663376, 0.11335841956726246], 'brevity_penalty': 0.49929356415876747, 'length_ratio': 0.5901233238192687, 'translation_length': 10958, 'reference_length': 18569}" of type <class 'dict'> for key "eval/bleu_score" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9187235832214355, 0.9003126621246338, 0.9092234373092651]" of type <class 'list'> for key "eval/bert_score" as a scalar. This invocation of Tensorboard's writer.add_scal

TrainOutput(global_step=9210, training_loss=0.0, metrics={'train_runtime': 5442.2292, 'train_samples_per_second': 27.07, 'train_steps_per_second': 1.692, 'total_flos': 5.043922658131968e+16, 'train_loss': 0.0, 'epoch': 10.0})

In [11]:
trainer.push_to_hub()

Upload file pytorch_model.bin:   0%|          | 1.00/945M [00:00<?, ?B/s]

Upload file runs/Jul30_07-35-23_a09bf80fcd43/events.out.tfevents.1690702529.a09bf80fcd43.11997.0:   0%|       …

Upload file runs/Jul30_07-30-06_a09bf80fcd43/events.out.tfevents.1690702211.a09bf80fcd43.3925.1:   0%|        …

Upload file runs/Jul30_07-05-49_a09bf80fcd43/events.out.tfevents.1690700755.a09bf80fcd43.3925.0:   0%|        …

Upload file runs/Jul30_06-55-35_a09bf80fcd43/events.out.tfevents.1690700144.a09bf80fcd43.555.0:   0%|         …

Upload file runs/Jul30_07-31-51_a09bf80fcd43/events.out.tfevents.1690702314.a09bf80fcd43.3925.2:   0%|        …

Upload file spiece.model:   0%|          | 1.00/773k [00:00<?, ?B/s]

Upload file training_args.bin:   0%|          | 1.00/4.06k [00:00<?, ?B/s]

To https://huggingface.co/sentientconch/flant5_sum_samsum
   5e5d077..e8a3184  main -> main

   5e5d077..e8a3184  main -> main

To https://huggingface.co/sentientconch/flant5_sum_samsum
   e8a3184..35821b1  main -> main

   e8a3184..35821b1  main -> main



'https://huggingface.co/sentientconch/flant5_sum_samsum/commit/e8a31840dde8e25284d86a6aa8d115ebe692d7d7'

Example inference

In [14]:
from transformers import pipeline

summarizer = pipeline("summarization", model=model.to('cpu'), tokenizer=tokenizer)
summarizer("summarize: "+dataset['test'][1]['dialogue'])

Your max_length is set to 200, but your input_length is only 157. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=78)


[{'summary_text': "Eric and Rob like Eric's stand-ups on youtube. Eric is going to watch them now. Rob will watch them. Eric likes the train part."}]

In [None]:
dataset['test'][1]

{'id': '13729565',
 'summary': 'Eric and Rob are going to watch a stand-up on youtube.',
 'dialogue': "Eric: MACHINE! Rob: That's so gr8! Eric: I know! And shows how Americans see Russian ;) Rob: And it's really funny! Eric: I know! I especially like the train part! Rob: Hahaha! No one talks to the machine like that! Eric: Is this his only stand-up? Rob: Idk. I'll check. Eric: Sure. Rob: Turns out no! There are some of his stand-ups on youtube. Eric: Gr8! I'll watch them now! Rob: Me too! Eric: MACHINE! Rob: MACHINE! Eric: TTYL? Rob: Sure :)"}