In [None]:
!pip install datasets
!pip install transformers
!pip install tensorflow==2.15
!pip install evaluate
!pip install accelerate

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
from transformers import VisionEncoderDecoderModel, AutoFeatureExtractor,AutoTokenizer
os.environ["WANDB_DISABLED"] = "true"

In [None]:
import nltk
try:
    nltk.data.find("tokenizers/punkt")
except (LookupError, OSError):
    nltk.download("punkt", quiet=True)

In [None]:
image_encoder_model = "google/vit-base-patch16-224-in21k"
text_decode_model = "gpt2"

model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
    image_encoder_model, text_decode_model)
feature_extractor = AutoFeatureExtractor.from_pretrained(image_encoder_model)
tokenizer = AutoTokenizer.from_pretrained(text_decode_model)

tokenizer.pad_token = tokenizer.eos_token
model.config.eos_token_id = tokenizer.eos_token_id
model.config.decoder_start_token_id = tokenizer.bos_token_id
model.config.pad_token_id = tokenizer.pad_token_id

In [None]:
output_dir = "/content"
model.save_pretrained(output_dir)
feature_extractor.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

In [None]:
from datasets import load_from_disk

import zipfile

zip_file_path = 'link to processed_dataset.zip'
extracted_folder_path = 'make a new dir'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

processed_dataset = load_from_disk(extracted_folder_path)
processed_dataset

In [None]:
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

os.mkdir('new dir path')
training_dir = "checkpoints dir"

training_args = Seq2SeqTrainingArguments(
    predict_with_generate=True,
    evaluation_strategy="epoch",
    save_strategy = "epoch",
    num_train_epochs = 1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    output_dir=training_dir,
)

In [None]:
from transformers import default_data_collator
import modules


trainer = Seq2SeqTrainer(
    model=model,
    tokenizer=feature_extractor,
    args=training_args,
    compute_metrics=modules.compute_metrics(tokenizer=tokenizer),
    train_dataset=processed_dataset['train'],
    eval_dataset=processed_dataset['validation'],
    data_collator=default_data_collator
)

In [None]:
trainer.train(resume_from_checkpoint = True)

In [None]:
trainer.save_model(training_dir)
tokenizer.save_pretrained(training_dir)

In [None]:
#to save checkpoint in drive
import shutil
shutil.move("dir", "dir")