## TLDR

A simple example (sentiment analysis task)

In [1]:
import t2t

In [2]:
trainer_arguments = t2t.TrainerArguments(
    # model
    model_name_or_path="t5-3b",
    cache_dir="/workspace/cache",
    # data inputs
    train_file="../sample_data/trainlines.json",
    validation_file="../sample_data/validlines.json",
    max_source_length=8,
    max_target_length=8,
    # taining outputs
    output_dir="/tmp/saved_model",
    overwrite_output_dir=True,
    # training settings
    num_train_epochs=1,
    per_device_train_batch_size=1,
    learning_rate=1e-5,
    gradient_checkpointing=True,
    prefix="predict sentiment: ",
    # validation settings
    per_device_eval_batch_size=1,
    evaluation_strategy="epoch",
)
trainer = t2t.Trainer(arguments=trainer_arguments)

Set TrainingArguments.mode to seq2seq
Loading t5-3b (for large models, this might take a while)
Files will be cached at: /workspace/cache
Ensure this directory is persistent if you do not want to download model files again!


Using custom data configuration default-ddac0f997dfe0137
Reusing dataset json (/workspace/cache/json/default-ddac0f997dfe0137/0.0.0/d75ead8d5cfcbe67495df0f89bd262f0023257fbbbd94a730313295f3d756d50)


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))




In [3]:
trainer.freeze(embeddings=True,
             encoder=True,
             decoder=False)

In [4]:
trainer.model_summary()

Summary
- model name: t5-3b
- model params:
  - train: 1610.7 M
  - total: 2851.6 M
  - vocab: 32100
- prompt tuning only: False


### Train Model

In [5]:
import time

In [6]:
st = time.time()
trainer.train(valid=True)
et = time.time()

HBox(children=(HTML(value='Running tokenizer on train dataset'), FloatProgress(value=0.0, max=8.0), HTML(value…




HBox(children=(HTML(value='Running tokenizer on validation dataset'), FloatProgress(value=0.0, max=3.0), HTML(…




***** Running training *****
  Num examples = 8000
  Num Epochs = 1
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 1
  Gradient Accumulation steps = 1
  Total optimization steps = 8000


Epoch,Training Loss,Validation Loss,Bleu,Gen Len
1,7.6579,3.040773,0.0,5.0


***** Running Evaluation *****
  Num examples = 2001
  Batch size = 1
Saving model checkpoint to /tmp/saved_model/checkpoint-8000
Configuration saved in /tmp/saved_model/checkpoint-8000/config.json
Model weights saved in /tmp/saved_model/checkpoint-8000/pytorch_model.bin
tokenizer config file saved in /tmp/saved_model/checkpoint-8000/tokenizer_config.json
Special tokens file saved in /tmp/saved_model/checkpoint-8000/special_tokens_map.json
Copy vocab file to /tmp/saved_model/checkpoint-8000/spiece.model
Deleting older checkpoint [/tmp/saved_model/checkpoint-126] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Saving model checkpoint to /tmp/saved_model
Configuration saved in /tmp/saved_model/config.json
Model weights saved in /tmp/saved_model/pytorch_model.bin
tokenizer config file saved in /tmp/saved_model/tokenizer_config.json
Special tokens file saved in /tmp/saved_model/special_tokens_map.json
Copy vocab file to /tm

***** train metrics *****
  epoch                    =        1.0
  total_flos               =  1260055GF
  train_loss               =     7.6579
  train_runtime            = 0:09:22.27
  train_samples            =       8000
  train_samples_per_second =     14.228
  train_steps_per_second   =     14.228


In [7]:
print("Time taken:", int(et-st), "seconds")

Time taken: 628 seconds


### Test Model

In [8]:
input_text = "predict sentiment: This is the worst movie I have ever seen!"
trainer.generate_single(input_text, max_length=8)

'unacceptable'

In [9]:
input_text = "predict sentiment: This is the best movie I have ever seen!"
trainer.generate_single(input_text, max_length=8)

'unacceptable'