In [1]:
import sys 
sys.path.append('/data3/KJE/code/WIL_DeepLearningProject_2/VLM_Hallu')
import argparse
import os
import random
from typing import List, Union, Optional, Dict, Tuple
import gc
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import numpy as np
import pandas as pd

import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score
from typing import Dict, Tuple, List

# import wandb

from transformers import AutoProcessor, LlavaForConditionalGeneration, set_seed  # noqa: F401

from src.model_zoo import get_model
from src.dataset_zoo import get_dataset
from src.misc import seed_all, _default_collate, save_scores
from src.old.probing_utils_copy import load_llava

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from transformers import LlavaForConditionalGeneration, Qwen2VLForConditionalGeneration, InstructBlipProcessor, InstructBlipForConditionalGeneration

def load_model(model_name):
    
    if torch.cuda.is_available():
        cap_major = torch.cuda.get_device_capability(0)[0]  # compute capability of gpu 0
        dtype = torch.bfloat16 if cap_major >= 8 else torch.float16
        device_map = "auto"
    else:
        dtype = torch.float32
        device_map = None
    
    model_id = "llava-hf/llava-1.5-7b-hf"
    processor = AutoProcessor.from_pretrained(
        model_id,
        trust_remote_code=False,
        cache_dir='/data3/hg_weight/hg_weight',
        use_fast=False
    )
    model = LlavaForConditionalGeneration.from_pretrained(
        model_id,
        torch_dtype=dtype,
        low_cpu_mem_usage=True,
        device_map=device_map,
        cache_dir='/data3/hg_weight/hg_weight',
    )    
    tok = processor.tokenizer
    tok.padding_side = "left"
   
    return model, processor, tok

  from .autonotebook import tqdm as notebook_tqdm


ImportError: /opt/anaconda3/envs/logit_hallu/lib/python3.11/site-packages/torch/lib/../../nvidia/cusparse/lib/libcusparse.so.12: undefined symbol: __nvJitLinkCreate_12_8, version libnvJitLink.so.12

In [None]:
import torch

def calculate_all_features_for_batch(attentions: tuple, num_image_tokens: int) -> torch.Tensor:
    
    # 각 레이어별로 계산된 피처 텐서( (batch, heads, 2) )를 담을 리스트
    layer_features_list = []

    for layer_attention in attentions:
        # layer_attention shape: (batch_size, num_heads, seq_len, seq_len)
        
        last_token_attn = layer_attention[:, :, -1, :]
        # last_token_attn shape: (batch_size, num_heads, seq_len)
        
        # --- 피처 1: 어텐션 엔트로피 계산 ---
        attention_entropy = -torch.sum(
            last_token_attn * torch.log2(last_token_attn + 1e-9),
            dim=-1 # 마지막 차원(seq_len)에 대해 합산
        )
        # attention_entropy shape: (batch_size, num_heads)
        
        # --- 피처 2: (텍스트 어텐션 합) - (이미지 어텐션 합) 계산 ---
        # 이미지 토큰 부분의 어텐션 값 합산
        image_attn_sum = torch.sum(last_token_attn[:, :, :num_image_tokens], dim=-1)
        
        # 텍스트 토큰 부분의 어텐션 값 합산
        text_attn_sum = torch.sum(last_token_attn[:, :, num_image_tokens:], dim=-1)
        
        attention_diff = text_attn_sum - image_attn_sum
        # attention_diff shape: (batch_size, num_heads)

        # 2개의 피처를 마지막 차원으로 합쳐 (batch_size, num_heads, 2) 모양의 텐서 생성
        layer_features = torch.stack([attention_entropy, attention_diff], dim=-1)
        layer_features_list.append(layer_features)

    # 모든 레이어의 피처 리스트를 쌓아 최종 텐서 생성
    # dim=1을 기준으로 쌓아 (batch_size, num_layers, num_heads, 2) 모양을 만듭니다.
    final_features_tensor = torch.stack(layer_features_list, dim=1)
    
    return final_features_tensor

In [None]:
import json
import os
from tqdm import tqdm
from PIL import Image
from datetime import datetime
import torch
import torch.nn.functional as F

IS_TEST = True
OUTPUT_ROOT = "/data3/KJE/code/WIL_DeepLearningProject_2/VLM_Hallu/output"


def generate_with_attention(model, proc, tok, joint_loader, model_type, split):
    num_samples = 500
    config = model.config
    image_size = config.vision_config.image_size
    patch_size = config.vision_config.patch_size
    
    num_patches = (image_size // patch_size) ** 2
    
    num_layers = config.vision_config.num_hidden_layers
    num_heads = config.vision_config.num_attention_heads
    num_features = 2    # entorpy, subtraction
    num_image_tokens = num_patches
    
    all_features_tensor = torch.zeros((num_samples, num_layers, num_heads, num_features))
    all_labels_tensor = torch.zeros((num_samples))
    
    device = model.device

    cur_time = datetime.now().strftime("%m%d_%H%M")
    save_dir  = os.path.join(OUTPUT_ROOT, f"{model_type}_{DATASET}")
    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir, f"head_conf_{split}_{cur_time}.json")
    features_path = os.path.join(save_dir, 'train_features_500.pt')
    
    current_idx = 0
    batch_cnt = 0
    with torch.no_grad():
        for batch in tqdm(joint_loader):
            
            idxs, images, questions, gold_answers, labels, image_paths = batch
            batch_size = len(questions)
            
            prompts = [build_prompt(tok, q, model_type) for q in questions]
            images = ensure_images_ok(images)
            inputs = proc(
                images=images,
                text=prompts,
                padding=True,
                return_tensors="pt"
            ).to(device)
            
            outputs = model(
                **inputs,
                output_attentions=True
            )
            attentions = outputs.attentions # (layer1, layer2, layer ...)
            features_for_batch = calculate_all_features_for_batch(attentions, num_image_tokens)
            
            all_features_tensor[current_idx : current_idx + batch_size] = features_for_batch
            all_labels_tensor[current_idx : current_idx + batch_size] = batch['labels']
            
            
            batch_cnt += 1
            current_idx += batch_size
            
            if IS_TEST and batch_cnt == 1:
                break
            
    torch.save({'features': all_features_tensor, 'labels': all_labels_tensor}, features_path)
    print(f"All features and labels save with {all_labels_tensor.shape} and feature shape is {all_features_tensor.shape}")
            
        

In [1]:
model_type = "llava1.5"
cur_batch = 1
cur_split = "train"

model, proc, tok = load_model(model_type)
joint_loader = load_dataset(cur_split, cur_batch)

generate_with_attention(model, proc, tok, joint_loader, model_type, cur_split)

NameError: name 'load_model' is not defined