In [1]:
import numpy as np
import pandas as pd

import nltk
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

from t5.dataset import load_spider_datasets, DatasetIterator
from t5.model import BaseModel, set_train_arguments

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
!huggingface-cli login --token

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/ubuntu/.cache/huggingface/token
Login successful


In [4]:
print(torch.cuda.is_available())

True


## Model Parameters

In [5]:
model_name = "t5-base"
technique = "fine_tuned"
version = 1
checkpoint = 2190

folder_name = f"{model_name}_{technique}_{version}"
train_path = f"results/{folder_name}"
model_path = train_path + f'/{folder_name}'
last_check_point = train_path + f'/checkpoint-{checkpoint}'

hug_model_name = "RoxyRong/t5_base_finetuned_v3"

print("train_path:", train_path)
print("model_path:", model_path)

train_path: results/t5-base_fine_tuned_1
model_path: results/t5-base_fine_tuned_1/t5-base_fine_tuned_1


In [6]:
tokenizer = T5Tokenizer.from_pretrained(model_name, model_max_length=512, legacy=False)
model = T5ForConditionalGeneration.from_pretrained("t5-base")

## Dataset

In [7]:
train_spider, others_spider, dev_spider = load_spider_datasets()
train_spider = train_spider.iloc[np.random.permutation(train_spider.index)].reset_index(drop=True)
others_spider = train_spider.iloc[np.random.permutation(others_spider.index)].reset_index(drop=True)

In [8]:
max_load_at_once = 100

train_data_iterator = DatasetIterator(
    df=train_spider,
    tokenizer=tokenizer,
    max_load_at_once=max_load_at_once,
)

valid_data_iterator = DatasetIterator(
    df=others_spider,
    tokenizer=tokenizer,
    max_load_at_once=max_load_at_once,
)

## Model Training

In [9]:
batch_size = 16
num_epochs = 5
learning_rate=0.001

In [10]:
args = set_train_arguments(
    train_path=train_path,
    batch_size=batch_size,
    num_epochs=num_epochs,
    learning_rate=learning_rate,
)

In [11]:
trainer = BaseModel(
    model=model,
    train_data_iterator = train_data_iterator,
    valid_data_iterator = valid_data_iterator,
    seq2seq_train_args = args,
)

In [12]:
# train from scratch
# trainer.train()

# train from checkpoint
trainer.train_from_checkpoint(last_check_point=last_check_point)



Epoch,Training Loss,Validation Loss


In [13]:
trainer.evaluate()

{'eval_loss': 0.00855855830013752, 'eval_runtime': 35.1199, 'eval_samples_per_second': 47.238, 'eval_steps_per_second': 2.961, 'epoch': 5.0}


In [13]:
# trainer.model_save(model_path=model_path)
trainer.model_upload(hug_model_name=hug_model_name)

pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]