In [None]:
! nvidia-smi

In [None]:
!pip install -U "transformers>=4.39.0"
!pip install peft bitsandbytes
!pip install -U "trl>=0.8.3"

In [1]:
import torch
from transformers import AutoTokenizer, AutoProcessor, TrainingArguments, LlavaForConditionalGeneration, BitsAndBytesConfig
from trl import SFTTrainer
from peft import LoraConfig

2024-05-01 17:13:04.395542: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-01 17:13:04.395590: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-01 17:13:04.397078: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
)

In [None]:
model_id = "llava-hf/llava-1.5-7b-hf"

In [None]:
model = LlavaForConditionalGeneration.from_pretrained(model_id,
                                                      quantization_config=quantization_config,
                                                      torch_dtype=torch.float16)

In [None]:
LLAVA_CHAT_TEMPLATE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}"""

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.chat_template = LLAVA_CHAT_TEMPLATE
processor = AutoProcessor.from_pretrained(model_id)
processor.tokenizer = tokenizer

In [None]:
image_folder_path= '/kaggle/input/museum-scraped-data/images_folder/images_folder/'
json_file_path= '/kaggle/input/museum-scraped-data/training_dataset_final_json.json'

In [None]:
class LLavaDataCollator:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, examples):
        texts = []
        images = []

        for example in examples:
            messages = example["messages"]
            text = self.processor.tokenizer.apply_chat_template(
                messages, tokenize=False, add_generation_prompt=False
            )
            texts.append(text)


            image_path = image_folder_path + example["images"][0]

            images.append(Image.open(image_path))


        batch = self.processor(texts, images, return_tensors="pt", padding=True)

        labels = batch["input_ids"].clone()
        if self.processor.tokenizer.pad_token_id is not None:
            labels[labels == self.processor.tokenizer.pad_token_id] = -100
        batch["labels"] = labels

        return batch

data_collator = LLavaDataCollator(processor)

In [None]:
import json
# Open the JSON file
with open(json_file_path, 'r') as json_file:
    # Load the JSON data
    data = json.load(json_file)

train_dataset = data[:100]
eval_dataset = data[100:]

In [None]:
len(train_dataset)

In [None]:
train_dataset[0]

In [None]:
from PIL import Image

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

#display first and last 5 training dataset images
images = [Image.open(image_folder_path + example["images"][0]) for example in train_dataset]

first_five_images = images[:5]
last_five_images = images[-5:]

fig, axes = plt.subplots(2, 5, figsize=(15, 6))

for i, ax in enumerate(axes.flat):
    if i < 5:
        ax.imshow(first_five_images[i])
        ax.axis("off")
    else:
        ax.imshow(last_five_images[i - 5])
        ax.axis("off")

plt.show()

In [None]:
# training_args = TrainingArguments(
#     output_dir="llava-1.5-7b-hf-ft-mix-vsft",
#     report_to="tensorboard",
#     learning_rate=1.4e-5,
#     per_device_train_batch_size=2,
#     gradient_accumulation_steps=1,
#     logging_steps=5,
#     num_train_epochs=1,
#     push_to_hub=True,
#     gradient_checkpointing=True,
#     remove_unused_columns=False,
#     fp16=True,
#     bf16=False
# )
training_args = TrainingArguments(
    output_dir="llava-1.5-7b-hf-ft-mix-vsft",
    report_to="tensorboard",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    logging_steps=5,
    num_train_epochs=10,
    push_to_hub=True,
    gradient_checkpointing=True,
    remove_unused_columns=False,
    fp16=True,
    bf16=False
)

In [None]:
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules="all-linear"
)

In [None]:
!pip install huggingface_hub

In [11]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=lora_config,
    dataset_text_field="text",  # need a dummy field
    tokenizer=tokenizer,
    data_collator=data_collator,
    dataset_kwargs={"skip_prepare_dataset": True},
)

In [None]:
%reload_ext tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/llava-1.5-7b-hf-ft-mix-vsft

In [None]:
trainer.train()

In [None]:
trainer.push_to_hub()

# *Inferenceing

In [None]:
!pip install -q -U peft

In [2]:
from peft import PeftModel

In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
!pip install accelerate

In [4]:
from transformers import LlavaForConditionalGeneration, BitsAndBytesConfig

In [5]:
model_id = "llava-hf/llava-1.5-7b-hf"

In [6]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
)


In [7]:
base_model = LlavaForConditionalGeneration.from_pretrained(model_id,
                                                      quantization_config=quantization_config,
                                                      torch_dtype=torch.float16)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/70.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

In [9]:
# Load the PEFT Lora model (adapter)
peft_lora_adapter_path = "GURU369/llava-1.5-7b-hf-ft-mix-vsft"
peft_lora_adapter = PeftModel.from_pretrained(base_model,peft_lora_adapter_path, adapter_name="lora_adapter")

# Merge the adapters into the base model
base_model.load_adapter(peft_lora_adapter_path, adapter_name="lora_adapter")


adapter_config.json:   0%|          | 0.00/927 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/1.29G [00:00<?, ?B/s]

In [12]:
base_model.push_to_hub("GURU369/llava-1.5-7b-hf-ft-merged",token=True, safe_serialization=True)



adapter_model.safetensors:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/GURU369/llava-1.5-7b-hf-ft-merged/commit/87b316ac39bf2df6b8ef529d327958b7bcef34bf', commit_message='Upload LlavaForConditionalGeneration', commit_description='', oid='87b316ac39bf2df6b8ef529d327958b7bcef34bf', pr_url=None, pr_revision=None, pr_num=None)