In [3]:
from IPython.core.getipython import get_ipython

ipython_client = get_ipython()
if ipython_client:
    ipython_client.run_line_magic(magic_name="load_ext", line="autoreload")
    ipython_client.run_line_magic(magic_name="autoreload", line="2")

from RewardingVisualDoubt import dataset, inference, mimic_cxr, prompter, shared, vllm

tokenizer = vllm.load_pretrained_llava_tokenizer_with_image_support(
    model_base=vllm.LLAVA_BASE_MODEL_NAME
)

# %% load the model
model = vllm.load_pretrained_llava_model(is_lora_trainable=True)

Fetching 69 files:   0%|          | 0/69 [00:00<?, ?it/s]

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Loading model in trainable mode...
Model base:  liuhaotian/llava-v1.5-7b
Loading LLaVA from base model...


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading additional LLaVA weights...
Loading LoRA weights...
Model is loaded with unmerged and trainable LoRA weights...
Using downloaded and verified file: /tmp/biovil_t_image_model_proj_size_128.pt
Loaded additional vision tower weights...


# Examine trainable parameters of the Llava model and its TRL-ready version (in training mode)

### Without value head

In [4]:
print("Printing the non-TRL Llava-LoRA model's trainable parameters")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.shape)

Printing the non-TRL Llava-LoRA model's trainable parameters
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight torch.Size([128, 4096])
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight torch.Size([4096, 128])
base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight torch.Size([128, 4096])
base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight torch.Size([4096, 128])
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight torch.Size([128, 4096])
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight torch.Size([4096, 128])
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight torch.Size([128, 4096])
base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight torch.Size([4096, 128])
base_model.model.model.layers.0.mlp.gate_proj.lora_A.default.weight torch.Size([128, 4096])
base_model.model.model.layers.0.mlp.gate_proj.lora_B.default.weight torch.Size([11008, 

### After value head

In [6]:
trl_model = vllm.add_value_head_to_LlavaLlamaForCausalLM_model(model)
print("Printing the TRL-Llava model's trainable parameters")
for name, param in trl_model.named_parameters():
    if param.requires_grad:
        print(name, param.shape)

Printing the TRL-Llava model's trainable parameters
pretrained_model.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight torch.Size([128, 4096])
pretrained_model.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight torch.Size([4096, 128])
pretrained_model.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight torch.Size([128, 4096])
pretrained_model.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight torch.Size([4096, 128])
pretrained_model.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight torch.Size([128, 4096])
pretrained_model.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight torch.Size([4096, 128])
pretrained_model.base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight torch.Size([128, 4096])
pretrained_model.base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight torch.Size([4096, 128])
pretrained_model.base_model.model.model.layers.0.mlp

# Affirm non-trainable params in non-training (inference) mode

In [19]:
# %% load the model
model = vllm.load_pretrained_llava_model(is_lora_trainable=False)

Loading model in non-trainable mode...
Model base:  liuhaotian/llava-v1.5-7b
Loading LLaVA from base model...




Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading additional LLaVA weights...
Loading LoRA weights...
Merging LoRA weights...
Model is loaded with merged and unloaded LoRA weights...
Using downloaded and verified file: /tmp/biovil_t_image_model_proj_size_128.pt
Loaded additional vision tower weights...


In [20]:
print("Printing the non-TRL Llava-LoRA model's trainable parameters")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.shape)

Printing the non-TRL Llava-LoRA model's trainable parameters


In [23]:
from LLAVA_Biovil.llava.mm_utils import KeywordsStoppingCriteria
from LLAVA_Biovil.llava.conversation import SeparatorStyle, conv_vicuna_v1
import torch

STOP_STR = (
    conv_vicuna_v1.copy().sep
    if conv_vicuna_v1.copy().sep_style != SeparatorStyle.TWO
    else conv_vicuna_v1.copy().sep2
)

tokenizer = vllm.load_pretrained_llava_tokenizer_with_image_support(
    model_base=vllm.LLAVA_BASE_MODEL_NAME
)
tokenizer.pad_token_id = tokenizer.eos_token_id
dataset_test = dataset.get_binary_qa_prompted_mimic_cxr_llava_model_input_dataset(
    split=dataset.DatasetSplit.TEST,
    tokenizer=tokenizer,
    prompter=prompter.build_binary_qa_instruction_from_disease_under_study,
)
dataloader_test = dataset.get_mimic_cxr_llava_model_input_dataloader(
    dataset=dataset_test, batch_size=1, padding_value=tokenizer.eos_token_id, num_workers=8
)

for idx, datapoint in enumerate(dataloader_test):
    llava_model_input_dict = datapoint[0]
    llava_model_input_dict = dataset.move_llava_model_input_dict_to_device(
        llava_model_input_dict, torch.device(shared.torch_devices.cuda.value)
    )
    input_ids, images = (
        llava_model_input_dict["text_prompt_input_ids"],
        llava_model_input_dict["images"],
    )
    stopping_criteria = KeywordsStoppingCriteria([STOP_STR], tokenizer, input_ids)
    pred = inference.generate_radialog_answer_for_binary_qa_for_single_study(
        model, tokenizer, input_ids, images, stopping_criteria
    )
    print(f"\n Metadata: {datapoint[3]}")
    print(f"Prompt: {datapoint[2]}")
    print(f"Label:", datapoint[1])
    print(f"File_idx {idx}, ASSISTANT: ", pred)
    if idx == 10:
        break




 Metadata: [MimicCxrBinaryQADatapoint(subject_id=18460230, study_id=53631792, img_path='/home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0/files/p18/p18460230/s53631792/369dc5bd-70bd89d0-2d90fa80-f319ec1d-fb2802aa.jpg', disease=<ChexpertFinding.PLEURAL_EFFUSION: 'Pleural Effusion'>, label=<ChexpertLabel.POSITIVE: 1.0>)]
Prompt: ["A chat between a curious user and an artificial intelligence assistant acting as an experienced radiologist. The assistant gives professional, detailed, and polite answers to the user's questions. USER: <image>. You are to act as a radiologist and answer the following question: Is the following disease visible in the given X-ray image: Pleural Effusion?  ASSISTANT:"]
Label: tensor([1.], dtype=torch.float32)
File_idx 0, ASSISTANT:  Yes, the image shows pleural effusion.

 Metadata: [MimicCxrBinaryQADatapoint(subject_id=13263843, study_id=52138943, img_path='/home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0/files/p13/p13263843/s52138943/de739d0b-2345495b-255f0e3b-00ccbf4c-ab4d3

# Configs

### PEFT LoRA Config (in trainable mode)

In [14]:
import dataclasses

for field in dataclasses.fields(vllm._extract_lora_config_from_model(model)):
    print(f"{field.name}: {getattr(vllm._extract_lora_config_from_model(model), field.name)}")

peft_type: LORA
auto_mapping: None
base_model_name_or_path: liuhaotian/llava-v1.5-7b
revision: None
task_type: CAUSAL_LM
inference_mode: False
r: 128
target_modules: ['gate_proj', 'v_proj', 'o_proj', 'k_proj', 'down_proj', 'up_proj', 'q_proj']
lora_alpha: 256
lora_dropout: 0.05
fan_in_fan_out: False
bias: none
modules_to_save: None
init_lora_weights: True
layers_to_transform: None
layers_pattern: None


### LlavaConfig

In [None]:
vllm._get_finetuned_llava_config(model_path=vllm._get_hf_model_path(vllm.FINETUNED_LLAVA_REPO_ID))

Fetching 69 files:   0%|          | 0/69 [00:00<?, ?it/s]

LlavaConfig {
  "_name_or_path": "/home/guests/deniz_gueler/.cache/huggingface/hub/models--ChantalPellegrini--RaDialog-interactive-radiology-report-generation/snapshots/14b6bf53a4105c7080f6f4ec6ff2f3b806a580a3",
  "architectures": [
    "LlavaLlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "freeze_mm_mlp_adapter": false,
  "freeze_mm_vision_resampler": false,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "image_aspect_ratio": "pad",
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_length": 4096,
  "max_position_embeddings": 4096,
  "mm_hidden_size": 512,
  "mm_projector_lr": 2e-05,
  "mm_projector_type": "mlp2x_gelu",
  "mm_resampler_type": null,
  "mm_use_im_patch_token": false,
  "mm_use_im_start_end": false,
  "mm_vision_select_feature": "patch",
  "mm_vision_select_layer": -2,
  "mm_vision_tower": "biovil",
  "model_type": "llava",
  "mv_type": "concat",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pa

# Additional Methods for Future Reference

In [None]:
from peft.tuners.lora import mark_only_lora_as_trainable

mark_only_lora_as_trainable(model)

for name, param in model.named_parameters():
    if "lora" in name:
        param.requires_grad = True  # Keep LoRA trainable
    else:
        param.requires_grad = False  # Freeze base model if needed