# Let's Train ChefGPT

Let's fine tune our own ChefGPT model. 

## Let's Train

In [1]:
from datasets import load_from_disk

# worked well!
# train_tokenized = load_from_disk('./tokenized_train_dataset_10k_v3')
# val_tokenized = load_from_disk('./tokenized_test_dataset_10k_v3')

train_tokenized = load_from_disk('./tokenized_train_dataset_10k_v4')
val_tokenized = load_from_disk('./tokenized_test_dataset_10k_v4')

In [2]:
train_tokenized.shape

(10000, 3)

In [3]:
val_tokenized.shape

(2000, 3)

In [4]:
#train_tokenized[10]

## Let's Train

In [5]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, T5Config, TrainingArguments, Trainer

In [6]:
# Configure the model
config = T5Config.from_pretrained('t5-base')

# Initialize the model
model = T5ForConditionalGeneration.from_pretrained('t5-base', config=config)

# Disable caching for the model, prevents cache error message during the training!
model.config.use_cache = False

In [7]:
# Configure training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    logging_strategy='epoch',
    logging_dir='./logs',
    gradient_accumulation_steps=2,
#     gradient_checkpointing=True,
    learning_rate=1e-4,
    warmup_steps=100,
    weight_decay=0.01,
    fp16=True,
    load_best_model_at_end=True
)

# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized
)

In [8]:
import torch
torch.cuda.empty_cache()

# Fine-tune the model
trainer.train()



Epoch,Training Loss,Validation Loss
0,3.09,2.004921
2,2.1638,1.923236
2,2.1928,1.980517




TrainOutput(global_step=936, training_loss=2.4821708222739716, metrics={'train_runtime': 1218.062, 'train_samples_per_second': 24.629, 'train_steps_per_second': 0.768, 'total_flos': 1.823950673805312e+16, 'train_loss': 2.4821708222739716, 'epoch': 3.0})

In [10]:
# Save the fine-tuned model
model.save_pretrained("fine_tuned_t5_recipes_base_10k_v4")
# tokenizer.save_pretrained("fine_tuned_t5_recipes_base_5k_v3")

### Next: Let's try the tuned model -> Lab 5