In [None]:
!pip install peft
!pip install accelerate
!pip install bitsandbytes
!pip install datasets
! pip install flash-attn

Collecting peft
  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13.0->peft)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.13.0->peft)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.13.0->peft)
  Using cached nvidia_cufft_cu12-11.

In [None]:
! pip freeze > requirements.txt

In [None]:
from google.colab import drive
import torch
import pandas as pd
import json
import os
from datasets import Dataset
from tqdm import tqdm
from collections import Counter
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, AutoProcessor, LlavaNextProcessor, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration
from PIL import Image
from contextlib import nullcontext
from PIL import Image

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
with open('/content/drive/MyDrive/ML-Quiz-XRay-ReportGeneration/data/annotation_quiz_all.json', 'r') as f:
  data = json.load(f)

In [None]:
def merger(report):
  temp = ''
  for key, value in report.items():
    if value and len(value) > 0:
      temp += f"{key}: {value},"
  return temp[:-1]

In [None]:
def process_data(data):
  merged_data = []
  for item in data:
    #print(item)
    if 'report' in item:
      merged_data.append({"id": item['id'], "report": merger(item['report'])})
    else:
      merged_data.append({"id": item['id'], "report": item['original_report']})
  return pd.DataFrame(merged_data)

In [None]:
test_df = process_data(data['test'])

In [None]:
val_df = process_data(data['val'])

In [None]:
file_list = [os.path.join(dp, f) for dp, dn, filenames in os.walk('/content/drive/MyDrive/ML-Quiz-XRay-ReportGeneration/data/images') for f in filenames if os.path.splitext(f)[1] == '.png']

In [None]:
file_id_mapper = dict()
for file_path in file_list:
  id = file_path.split('/')[-2]
  file_id_mapper[id] = file_path

In [None]:
lora_r = 16
lora_alpha = 16
lora_dropout = 0.1
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False

In [None]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)


In [None]:
#model = LlavaForConditionalGeneration.from_pretrained("liuhaotian/llava-v1.6-mistral-7b", torch_dtype=torch.float16, device_map="auto",quantization_config=bnb_config)
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", low_cpu_mem_usage=True, torch_dtype=compute_dtype, device_map="auto",quantization_config=bnb_config)
model = PeftModel.from_pretrained(model, '/content/drive/MyDrive/ML-Quiz-XRay-ReportGeneration/Model/checkpoint-120')
#This gpu does not support flash attention

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
LLAVA_CHAT_TEMPLATE = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}"""

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model.config._name_or_path, trust_remote_code=True, use_fast=True)
tokenizer.chat_template = LLAVA_CHAT_TEMPLATE
processor = AutoProcessor.from_pretrained(model.config._name_or_path, trust_remote_code=True)
processor.tokenizer = tokenizer

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
terminators = [
    processor.tokenizer.eos_token_id,
    processor.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
prompt = """Prompt: [INST] <image>
Describe the image in more details from clinical perspective. Your description should focuss on five anatomical regions: lungs, heart, mediastinal, bone, others. If the part of the report (one or many line) is about lungs than put into "lungs", so as heart, bones, and mediastinal. If one line is part of two regions, add it into both the regions. However, if you can not put any part of the report into these four regions than put it into "others\. You will follow this format name of anatomical region : your_report_for_that_region, followed by comma.[/INST]"""

In [None]:
def eval_loop(small_df):
  result = []
  for row in tqdm(small_df.iterrows(), total=small_df.shape[0]):
    id = row[1]['id']
    image_path = file_id_mapper[id]
    image = Image.open(image_path)
    inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")

    # autoregressively complete prompt
    output = model.generate(**inputs, max_new_tokens=1000,
                            eos_token_id=terminators,
                            do_sample=True,
                            temperature=0.1, # To reduce model creativity
                            top_p=0.9, # To reduce randomness
                            pad_token_id=processor.tokenizer.eos_token_id)

    response = output[0][inputs['input_ids'].shape[-1]:]
    decoded_output = tokenizer.decode(response, skip_special_tokens=True)
    result.append({
        "id": id,
        "output": decoded_output
    })
  return pd.DataFrame(result)

In [None]:
# result = []
# small_df = test_df.sample(frac=0.1, random_state=42)
small_df = val_df.sample(frac=0.3, random_state=42)
result = eval_loop(small_df)

100%|██████████| 89/89 [46:03<00:00, 31.05s/it]


In [None]:
result.to_csv('/content/drive/MyDrive/ML-Quiz-XRay-ReportGeneration/Val_output.csv', index=False)

In [None]:
result

Unnamed: 0,id,output
0,CXR3001_IM-1387,Lungs: The lungs appear to be in a normal posi...
1,CXR2412_IM-0958,Lungs: The lungs appear to be clear with no vi...
2,CXR85_IM-2372,lungs : The lungs appear to be in a normal pos...
3,CXR3261_IM-1547,Lungs: The lungs appear to be clear with no vi...
4,CXR737_IM-2295,Lungs: The lungs appear to be clear with no vi...
...,...,...
84,CXR2596_IM-1086,lungs : The lungs appear to be in a normal pos...
85,CXR1752_IM-0494,Lungs: The lungs appear to be clear with no vi...
86,CXR2186_IM-0796,Lungs: The lungs appear to be in a normal posi...
87,CXR752_IM-2305,Lungs: The lungs appear to be in a normal posi...
