<a href="https://colab.research.google.com/github/peteryang/finetune_LLaVA/blob/main/llava_fine_tune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!conda --version
#If !conda --version returns no results, install conda with :
!pip install -q condacolab
#import condacolab
#condacolab.install()

/bin/bash: line 1: conda: command not found


In [None]:
import condacolab
condacolab.install()

⏬ Downloading https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:12
🔁 Restarting kernel...


In [None]:
!conda create -y -q -n llava python=3.10

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: /usr/local/envs/llava

  added / updated specs:
    - python=3.10


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2024.2.2   |       hbcca054_0         152 KB  conda-forge
    ld_impl_linux-64-2.40      |       h55db66e_0         697 KB  conda-forge
    libgcc-ng-13.2.0           |       h77fa898_7         758 KB  conda-forge
    libgomp-13.2.0             |       h77fa898_7         412 KB  conda-forge
    libsqlite-3.45.3           |       h2797004_0         840 KB  conda-forge
    libxcrypt-4.4.36           |       hd590300_1          98 KB  conda-forge
    ncurses-6.5                |       h59595ed_0         867 KB  conda-forge
    openssl-3.3.0              |       hd590300_0

In [None]:
!conda init

no change     /usr/local/condabin/conda
no change     /usr/local/bin/conda
no change     /usr/local/bin/conda-env
no change     /usr/local/bin/activate
no change     /usr/local/bin/deactivate
no change     /usr/local/etc/profile.d/conda.sh
no change     /usr/local/etc/fish/conf.d/conda.fish
no change     /usr/local/shell/condabin/Conda.psm1
no change     /usr/local/shell/condabin/conda-hook.ps1
no change     /usr/local/lib/python3.10/site-packages/xontrib/conda.xsh
no change     /usr/local/etc/profile.d/conda.csh
no change     /root/.bashrc
No action taken.


In [None]:
%%bash
source activate llava

In [None]:
!conda install -y -c nvidia cuda-compiler

In [None]:
!conda list |grep nvidia

cuda-compiler             12.4.1                        0    nvidia
cuda-cuobjdump            12.4.127                      0    nvidia
cuda-cuxxfilt             12.4.127                      0    nvidia
cuda-nvcc                 12.4.131                      0    nvidia
cuda-nvprune              12.4.127                      0    nvidia


In [None]:
!pip install pre-commit==3.0.2

# Install package locally
!pip install --upgrade pip  # enable PEP 660 support
!cd finetune_LLaVA/
!pip install -e /content/finetune_LLaVA

# Install additional packages for training
!pip install -e "/content/finetune_LLaVA[train]"
!pip install flash-attn --no-build-isolation


In [None]:
!git clone https://huggingface.co/liuhaotian/llava-v1.5-7b /content/llava-v1.5-7b

Cloning into '/content/llava-v1.5-7b'...
remote: Enumerating objects: 22, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 22 (delta 2), reused 1 (delta 1), pack-reused 16 (from 1)[K
Unpacking objects: 100% (22/22), 6.82 KiB | 1.14 MiB/s, done.


In [None]:
!pip install datasets

In [None]:
from datasets import load_dataset
from PIL import Image
from io import BytesIO
import requests
import os
import json
import uuid


def process_and_save(dataset, output_folder, subset_name):
    # Define image subfolder within output folder
    subset_folder = os.path.join(output_folder, subset_name)
    image_subfolder = os.path.join(output_folder, 'images')


    if not os.path.exists(image_subfolder):
        os.makedirs(image_subfolder)


    if not os.path.exists(subset_folder):
        os.makedirs(subset_folder)


    # Initialize list to hold all JSON data
    json_data_list = []


    # Process and save images and labels
    for item in dataset:
        # Load image if it's a URL or a file path
        if isinstance(item['image'], str):
            response = requests.get(item['image'])
            image = Image.open(BytesIO(response.content))
        else:
            image = item['image']  # Assuming it's a PIL.Image object


        # Create a unique ID for each image
        unique_id = str(uuid.uuid4())


        # Define image path
        image_path = os.path.join(image_subfolder, f"{unique_id}.jpg")


        # Save image
        image.save(image_path)


        # Remove duplicates and format answers
        answers = item['answers']
        unique_answers = list(set(answers))
        formatted_answers = ", ".join(unique_answers)


        # Structure for LLaVA JSON
        json_data = {
            "id": unique_id,
            "image": f"{unique_id}.jpg",
            "conversations": [
                {
                    "from": "human",
                    "value": item['question']
                },
                {
                    "from": "gpt",
                    "value": formatted_answers
                }
            ]
        }


        # Append to list
        json_data_list.append(json_data)


    # Save the JSON data list to a file
    json_output_path = os.path.join(output_folder, subset_name, 'dataset.json')
    with open(json_output_path, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)


def save_dataset(dataset_name, output_folder, class_name, subset_name, val_samples=None):
    # Load the dataset from Hugging Face
    dataset = load_dataset(dataset_name, split=subset_name)


    # Filter for images with the specified class in 'question_type'
    filtered_dataset = [item for item in dataset if item['question_type'] == class_name]


    # Determine the split for training and validation
    if val_samples is not None and subset_name == 'train':
        train_dataset = filtered_dataset[val_samples:]
        val_dataset = filtered_dataset[:val_samples]
    else:
        train_dataset = filtered_dataset
        val_dataset = []


    # Process and save the datasets
    for subset, data in [('train', train_dataset), ('validation', val_dataset)]:
        if data:
            process_and_save(data, output_folder, subset)




# Usage example
output_folder = 'dataset'
class_name = 'other'
val_samples = 300
save_dataset('Multimodal-Fatima/OK-VQA_train', output_folder, class_name, 'train', val_samples)
save_dataset('Multimodal-Fatima/OK-VQA_test', output_folder, class_name, 'test')