<a href="https://colab.research.google.com/github/kakans/LLM/blob/main/LLAVA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq torch==2.1 --progress-bar off
!pip install -qqq transformers==4.34.1 --progress-bar off
!pip install -qqq accelerate==0.23.0 --progress-bar off
!pip install -qqq bitsandbytes==0.41.1 --progress-bar off
!pip install -qqq llava-torch==1.1.1 --progress-bar off

[0m  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for deepspeed (setup.py) ... [?25l[?25hdone
  Building wheel for ffmpy (setup.py) ... [?25l[?25hdone
  Building wheel for wavedrom (setup.py) ... [?25l[?25hdone
  Building wheel for lit (pyproject.toml) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
lida 0.0.10 requires kaleido, which is not installed.
llmx 0.0.15a0 requires cohere, which is not installed.
llmx 0.0.15a0 requires openai, which is not installed.
llmx 0.0.15a0 requir

In [None]:

import textwrap
from io import BytesIO

import requests
import torch
from llava.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
from llava.conversation import SeparatorStyle, conv_templates
from llava.mm_utils import (
    KeywordsStoppingCriteria,
    get_model_name_from_path,
    process_images,
    tokenizer_image_token,
)
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from PIL import Image


[2024-02-20 14:58:08,892] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [None]:
disable_torch_init()

In [None]:
MODEL = "4bit/llava-v1.5-13b-3GB"
model_name = get_model_name_from_path(MODEL)
model_name

'llava-v1.5-13b-3GB'

In [None]:
cd /content/drive/MyDrive/Llava

/content/drive/MyDrive/Llava


In [None]:
tokenizer, model, image_processor, context_len = load_pretrained_model(
    model_path=MODEL, model_base=None, model_name=model_name, load_4bit=True
)

Downloading tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/33.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/9 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00009.bin:   0%|          | 0.00/2.97G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00009.bin:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00009.bin:   0%|          | 0.00/2.89G [00:00<?, ?B/s]

Downloading (…)l-00004-of-00009.bin:   0%|          | 0.00/2.96G [00:00<?, ?B/s]

Downloading (…)l-00005-of-00009.bin:   0%|          | 0.00/2.89G [00:00<?, ?B/s]

Downloading (…)l-00006-of-00009.bin:   0%|          | 0.00/2.98G [00:00<?, ?B/s]

Downloading (…)l-00007-of-00009.bin:   0%|          | 0.00/2.87G [00:00<?, ?B/s]

Downloading (…)l-00008-of-00009.bin:   0%|          | 0.00/2.89G [00:00<?, ?B/s]

Downloading (…)l-00009-of-00009.bin:   0%|          | 0.00/2.72G [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/4.76k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

In [None]:
def load_image(image_file):
    if image_file.startswith("http://") or image_file.startswith("https://"):
        response = requests.get(image_file)
        image = Image.open(BytesIO(response.content)).convert("RGB")
    else:
        image = Image.open(image_file).convert("RGB")
    return image

In [None]:
def process_image(image):
    args = {"image_aspect_ratio": "pad"}
    image_tensor = process_images([image], image_processor, args)
    return image_tensor.to(model.device, dtype=torch.float16)

In [None]:
CONV_MODE = "llava_v0"

In [None]:
def create_prompt(prompt: str):
    conv = conv_templates[CONV_MODE].copy()
    roles = conv.roles
    prompt = DEFAULT_IMAGE_TOKEN + "\n" + prompt
    conv.append_message(roles[0], prompt)
    conv.append_message(roles[1], None)
    return conv.get_prompt(), conv

In [None]:
def ask_image(image: Image, prompt: str):
    image_tensor = process_image(image)
    prompt, conv = create_prompt(prompt)
    input_ids = (
        tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
        .unsqueeze(0)
        .to(model.device)
    )

    stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
    stopping_criteria = KeywordsStoppingCriteria(
        keywords=[stop_str], tokenizer=tokenizer, input_ids=input_ids
    )

    with torch.inference_mode():
        output_ids = model.generate(
            input_ids,
            images=image_tensor,
            do_sample=True,
            temperature=0.01,
            max_new_tokens=512,
            use_cache=True,
            stopping_criteria=[stopping_criteria],
        )
    return tokenizer.decode(
        output_ids[0, input_ids.shape[1] :], skip_special_tokens=True
    ).strip()

In [None]:

image = load_image("01.jpg")
image.resize((600, 800))

In [None]:
%%time
result = ask_image(image, "can you please extract the statement of declaration?")
print(textwrap.fill(result, width=110))

Yes, the statement of declaration in the image is written in Bahasa Melayu. It is a document that contains
information about the vehicle, such as its make, model, and license plate number. The document also includes
the driver's name and address, as well as the date of the declaration. This statement is typically used to
register a vehicle in Malaysia and is required by the Malaysian government for all vehicles on the road.
CPU times: user 11.6 s, sys: 462 ms, total: 12.1 s
Wall time: 13.7 s
