## Training planner and realizer on DeepNLG data

In [1]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
from DeepNLGDatasetReader import run_parser
from Utils import PlannerDataset, RealizerDataset, T2TDataCollator, deepnlg_entry_to_examples

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
train_entries = run_parser('deepnlg/train')
dev_entries = run_parser('deepnlg/dev')

Prepare all satellite data needed for training

In [3]:
train_examples = []
dev_examples = []

for entry in train_entries:
    train_examples.extend(deepnlg_entry_to_examples(entry))

for entry in dev_entries:
    dev_examples.extend(deepnlg_entry_to_examples(entry))

A single example with all relevant metadata

In [4]:
train_examples[0]

{'category': 'ComicsCharacter',
 'eid': 'Id1',
 'size': '4',
 'triples_map': {0: '<S> Arion_(comicsCharacter) <P> creator <O> Jan_Duursema',
  1: '<S> Jan_Duursema <P> award <O> Eisner_Award',
  2: '<S> Arion_(comicsCharacter) <P> alternativeName <O> "Ahriahn"',
  3: '<S> Arion_(comicsCharacter) <P> creator <O> Paul_Kupperberg'},
 'planner_input': '<S> Arion_(comicsCharacter) <P> creator <O> Jan_Duursema <S> Jan_Duursema <P> award <O> Eisner_Award <S> Arion_(comicsCharacter) <P> alternativeName <O> "Ahriahn" <S> Arion_(comicsCharacter) <P> creator <O> Paul_Kupperberg',
 'lid': 'Id1',
 'text': "Arion (also known as Ahri'ahn) is a comic character created by Paul Kupperberg and Jan Duursema, who won the Eisner award.",
 'plan': 'S 2 3 0 1',
 'realizer_input': '<sentence> <S> Arion_(comicsCharacter) <P> alternativeName <O> "Ahriahn" <S> Arion_(comicsCharacter) <P> creator <O> Paul_Kupperberg <S> Arion_(comicsCharacter) <P> creator <O> Jan_Duursema <S> Jan_Duursema <P> award <O> Eisner_Awar

### Planner
Load pretrained model and configure tokenizer

In [5]:
planner = T5ForConditionalGeneration.from_pretrained('t5-base').to(DEVICE)
planner_tokenizer = T5Tokenizer.from_pretrained('t5-base')
planner_tokenizer.add_tokens(['<S>', '<P>', '<O>'])

3

Prepare data for training

In [6]:
train_data = PlannerDataset(train_examples, planner_tokenizer)
dev_data = PlannerDataset(dev_examples, planner_tokenizer)

Configure training arguments

In [7]:
args = TrainingArguments(
   f"neural_planner",
   evaluation_strategy='steps',
   eval_steps=500, 
   save_total_limit=5, 
   learning_rate=2e-5,
   per_device_train_batch_size=4,
   per_device_eval_batch_size=4,
   num_train_epochs=1,
   weight_decay=0.01,
   push_to_hub=False
)

trainer = Trainer(
    model=planner,
    args=args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    data_collator=T2TDataCollator
    )

Train

In [None]:
trainer.train()

Example for planner inference

In [29]:
planner.eval()
out_ids = planner.generate(dev_data[0]['input_ids'].unsqueeze(0).to(DEVICE))
planner_tokenizer.decode(out_ids[0], skip_special_tokens=True)

'S 0 1 2 3'

### Realizer
Load pretrained model and configure tokenizer

In [5]:
realizer = T5ForConditionalGeneration.from_pretrained('t5-base').to(DEVICE)
realizer_tokenizer = T5Tokenizer.from_pretrained('t5-base')
realizer_tokenizer.add_tokens(['<S>', '<P>', '<P*>', '<O>', '<sentence>'])

5

Prepare data for training

In [6]:
train_data = RealizerDataset(train_examples, realizer_tokenizer)
dev_data = RealizerDataset(dev_examples, realizer_tokenizer)

Configure training arguments

In [7]:
args = TrainingArguments(
   f"neural_realizer",
   evaluation_strategy='steps',
   eval_steps=500, 
   save_total_limit=5, 
   learning_rate=2e-5,
   per_device_train_batch_size=2,
   per_device_eval_batch_size=2,
   num_train_epochs=2,
   weight_decay=0.01,
   push_to_hub=False
)

trainer = Trainer(
    model=realizer,
    args=args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    data_collator=T2TDataCollator 
)

Train

In [8]:
trainer.train()

***** Running training *****
  Num examples = 18102
  Num Epochs = 2
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 9052


Step,Training Loss,Validation Loss
500,1.5388,0.983314
1000,1.066,0.877459
1500,0.9659,0.81425
2000,0.9284,0.779613
2500,0.8653,0.760631
3000,0.862,0.73808
3500,0.8123,0.727151
4000,0.8299,0.717078
4500,0.8101,0.705202
5000,0.7552,0.697488


***** Running Evaluation *****
  Num examples = 2268
  Batch size = 4
Saving model checkpoint to neural_realizer/checkpoint-500
Configuration saved in neural_realizer/checkpoint-500/config.json
Model weights saved in neural_realizer/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2268
  Batch size = 4
Saving model checkpoint to neural_realizer/checkpoint-1000
Configuration saved in neural_realizer/checkpoint-1000/config.json
Model weights saved in neural_realizer/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2268
  Batch size = 4
Saving model checkpoint to neural_realizer/checkpoint-1500
Configuration saved in neural_realizer/checkpoint-1500/config.json
Model weights saved in neural_realizer/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2268
  Batch size = 4
Saving model checkpoint to neural_realizer/checkpoint-2000
Configuration saved in neural_realizer/checkpoint-2000/config.json


TrainOutput(global_step=9052, training_loss=0.8541778508825635, metrics={'train_runtime': 5822.8382, 'train_samples_per_second': 6.218, 'train_steps_per_second': 1.555, 'total_flos': 1.020521605146624e+16, 'train_loss': 0.8541778508825635, 'epoch': 2.0})

Example for realizer inference

In [18]:
realizer.eval()
out_ids = realizer.generate(dev_data[0]['input_ids'].unsqueeze(0).to(DEVICE), max_length=64)
realizer_tokenizer.decode(out_ids[0], skip_special_tokens=True)

'Tim Brooke-Taylor starred in Bananaman which was broadcast by STV on October 3, 1983. It was created by Steve Bright.'