# Let's Train ChefGPT

Let's fine tune our own ChefGPT model. 

## Let's Train

In [None]:
from datasets import load_from_disk

# worked well!
# train_tokenized = load_from_disk('./tokenized_train_dataset_10k_v3')
# val_tokenized = load_from_disk('./tokenized_test_dataset_10k_v3')

train_tokenized = load_from_disk('./tokenized_bloomz_1b7_train_dataset_10k_v4')
val_tokenized = load_from_disk('./tokenized_bloomz_1b7_test_dataset_10k_v4')

In [None]:
train_tokenized.shape

In [None]:
val_tokenized.shape

In [None]:
#train_tokenized[10]

## Let's Train

In [None]:
from transformers import AutoModelForSeq2SeqLM, TrainingArguments, Trainer

In [None]:
# model = AutoModelForSeq2SeqLM.from_pretrained('bigscience/mt0-large', use_cache=False, load_in_8bit=True, device_map="auto")
model = AutoModelForSeq2SeqLM.from_pretrained('bigscience/mt0-xxl', use_cache=False, load_in_8bit=True, device_map="auto")

# Disable caching for the model, prevents cache error message during the training!
model.config.use_cache = False

In [None]:
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType, prepare_model_for_int8_training

peft_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
)

model = prepare_model_for_int8_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
# Configure training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    auto_find_batch_size=True,
#     per_device_train_batch_size=8,
#     per_device_eval_batch_size=8,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    logging_strategy='epoch',
    logging_dir='./logs',
    gradient_accumulation_steps=2,
#     gradient_checkpointing=True,
    learning_rate=1e-4,
    warmup_steps=100,
    weight_decay=0.01,
    load_best_model_at_end=True,
#     optim="adamw_torch",
#     adam_beta1=0.85
)

# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized
)

In [None]:
# import torch
# torch.cuda.empty_cache()

# Fine-tune the model
trainer.train()

In [None]:
# Save the fine-tuned model
model.save_pretrained("fine_tuned_bloomz_1b7_recipes_10k_v4")
# tokenizer.save_pretrained("fine_tuned_t5_recipes_base_5k_v3")