import 

In [1]:
import requests
import os
import json
import uuid
import numpy as np
import skimage
import math
import torch

from datasets import load_dataset
from PIL import Image
from io import BytesIO

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Util:

  @staticmethod
  def parameter(start, end, delta=0):

    value = np.random.randint(start-delta, end+delta)
    parameters = len(range(start-delta, end+delta))

    return value, parameters

In [3]:
DELTA_MIN = 20
DELTA_MAX = 80
SIZE = (100, 100)


generate images

In [25]:
def length(flags=[False, False, False], preset=None):

    var_y = flags[0]
    var_x = flags[1]
    var_width = flags[2]


    sparse = None
    image = None
    label = None
    parameters = 1

    Y_RANGE = (DELTA_MIN, DELTA_MAX)
    X_RANGE = (DELTA_MIN, DELTA_MAX)

    LENGTH, p = Util.parameter(1, Y_RANGE[1]-Y_RANGE[0]+1) # 1..60
    parameters *= p

    if preset:
      LENGTH = preset

    MAX_LENGTH = Y_RANGE[1]-Y_RANGE[0]
    # print 'Max length', MAX_LENGTH

    X = math.floor(SIZE[1] / 2)
    if var_x:
      X, p = Util.parameter(X_RANGE[0], X_RANGE[1])
      parameters *= p

    Y = Y_RANGE[0]
    if var_y:
      
      Y, p = Util.parameter(0, SIZE[0]-MAX_LENGTH)
      # print 'Y',Y
      parameters *= p

    WIDTH = 1
    if var_width:
      sizes = [1, 3, 5, 7, 9, 11]
      WIDTH = np.random.choice(sizes)
      parameters *= len(sizes)

    sparse = [Y, X, LENGTH, WIDTH]

    image = np.zeros(SIZE, dtype=bool)


    half_width = math.floor(WIDTH / 2) # this always floors
    
    # print(Y,LENGTH,X,half_width,WIDTH)
    image[Y:Y+LENGTH, X-half_width:X+half_width+1] = 1


    label = LENGTH

    return sparse, image, label, parameters

In [28]:
def generate_save_dataset(output_folder, subset_name, number_of_data):
    
    subset_folder = os.path.join(output_folder, subset_name)
    image_subfolder = os.path.join(output_folder, 'images')


    if not os.path.exists(image_subfolder):
        os.makedirs(image_subfolder)


    if not os.path.exists(subset_folder):
        os.makedirs(subset_folder)


    # Initialize list to hold all JSON data
    json_data_list = []

    for i in range(number_of_data):
        sparse, image, answer, parameters = length()
        int_array = image.astype(int)
        uint8_array = (int_array * 255).astype(np.uint8) 
        
        img = Image.fromarray(uint8_array)
        
        unique_id = str(uuid.uuid4())


        # Define image path
        image_path = os.path.join(image_subfolder, f"{unique_id}.jpg")


        # Save image
        img.save(image_path)

        json_data = {
            "id": unique_id,
            "image": f"{unique_id}.jpg",
            "conversations": [
                {
                    "from": "human",
                    "value": 'What is the length of the line in this picture?'
                },
                {
                    "from": "gpt",
                    "value": str(answer)
                }
            ]
        }


        # Append to list
        json_data_list.append(json_data)

        json_output_path = os.path.join(output_folder, subset_name, 'dataset.json')
        with open(json_output_path, 'w') as json_file:
            json.dump(json_data_list, json_file, indent=4)

    

Create dataset

In [10]:
output_folder = 'lengthDataset'
number_of_data = 300

generate_save_dataset(output_folder, 'train', number_of_data)
generate_save_dataset(output_folder, 'validation', number_of_data)

generate_save_dataset(output_folder, 'test', 10)


In [4]:
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path
from llava.eval.run_llava import eval_model

model_path = "liuhaotian/llava-v1.5-7b"

tokenizer, model, image_processor, context_len = load_pretrained_model(
    model_path=model_path,
    model_base=None,
    model_name=get_model_name_from_path(model_path),
    offload_folder="/content/llava_model"
)

[2024-03-31 15:55:07,457] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)


You are using a model of type llava to instantiate a model of type llava_llama. This is not supported for all configurations of models and can yield errors.
Downloading shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [02:02<00:00, 61.16s/it]
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:13<00:00,  6.75s/it]


In [36]:
os.chdir("LLaVA")


In [None]:
import wandb
wandb.login(key="*******************************")
wandb.init()


In [5]:
# Assign paths to variables
DEEPSPEED_SCRIPT = "deepspeed llava/train/train_mem.py"
DEEPSPEED_JSON = "./scripts/zero3.json"
MODEL_NAME = "liuhaotian/llava-v1.5-13b "
DATA_PATH = "lengthDataset/train/dataset.json"  # Replace with your JSON data path
IMAGE_FOLDER = "lengthDataset/images"  # Replace with your image folder path
VISION_TOWER = "openai/clip-vit-large-patch14-336"
OUTPUT_DIR = "checkpoints/llava-v1.5-13b-task-lora"  

In [10]:
# Command to run the script
finetune_script = f'''
{DEEPSPEED_SCRIPT} \
    --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
    --deepspeed {DEEPSPEED_JSON} \
    --model_name_or_path {MODEL_NAME} \
    --version v1 \
    --data_path {DATA_PATH} \
    --image_folder {IMAGE_FOLDER} \
    --vision_tower {VISION_TOWER} \
    --mm_projector_type mlp2x_gelu \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --output_dir {OUTPUT_DIR} \
    --num_train_epochs 5 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 50000 \
    --save_total_limit 1 \
    --learning_rate 2e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to wandb \
'''

In [11]:
torch.cuda.empty_cache()

In [12]:
torch.cuda.is_available()

True

In [13]:
!{finetune_script}

[2024-03-31 16:13:19,431] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Detected CUDA_VISIBLE_DEVICES=0: setting --include=localhost:0
[2024-03-31 16:13:21,824] [INFO] [runner.py:571:main] cmd = /home/mahsa.geshvadi001/miniconda3/envs/llava/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None llava/train/train_mem.py --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 --deepspeed ./scripts/zero3.json --model_name_or_path liuhaotian/llava-v1.5-13b --version v1 --data_path lengthDataset/train/dataset.json --image_folder lengthDataset/images --vision_tower openai/clip-vit-large-patch14-336 --mm_projector_type mlp2x_gelu --mm_vision_select_layer -2 --mm_use_im_start_end False --mm_use_im_patch_token False --image_aspect_ratio pad --group_by_modality_length True --bf16 True --output_dir checkpoints/llava-v1.5-13b-task-lora --

Testing

In [None]:
python run_llava.py --model-path "/root/LLaVA/llava/checkpoints/llava-2-7b-chat-task-qlora/best_llava_eval_model_llava_lora" --model-base "/root/LLaVA/llava/llava-v1.5-7b" --image-file "/root/dataset/images/00149b13-fbc6-4232-9af6-bc6af4ea6daf.jpg" --query "What is the length of the line in this picture?"


In [None]:
python llava/eval/run_llava.py --model-path /checkpoints/llama-2-7b-chat-task-qlora/non_lora_trainables --model-base llava-v1.5-7b/pytorch_model-00002-of-00002 --image-file /lengthDataset/images/test.jpg  --query "What is the length of the line in this picture?"


In [None]:
!python scripts/merge_lora_weights.py --model-path lenghtDataset/checkpoints--model-base liuhaotian/llava-v1.5-7b --save-model-path lengthDataset/
