In [4]:
import subprocess
import sys
import os

print("🔍 Updated System Information:")
print(f"Python version: {sys.version}")
print(f"Current working directory: {os.getcwd()}")

# Check GPU availability with detailed info
try:
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    if result.returncode == 0:
        print("✅ GPU detected:")
        print(result.stdout)

        # Also check with PyTorch
        try:
            import torch
            print(f"\n🔥 PyTorch GPU Info:")
            print(f"✅ PyTorch version: {torch.__version__}")
            print(f"✅ CUDA available: {torch.cuda.is_available()}")
            if torch.cuda.is_available():
                print(f"✅ CUDA version: {torch.version.cuda}")
                print(f"✅ GPU device: {torch.cuda.get_device_name(0)}")
                print(f"✅ GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
            else:
                print("❌ CUDA not available in PyTorch")
        except ImportError:
            print("📦 PyTorch not yet installed - will be installed with LLaVA")

    else:
        print("❌ No GPU detected")
        print("Please enable GPU runtime: Runtime → Change runtime type → T4 GPU")

except FileNotFoundError:
    print("❌ nvidia-smi not found - no GPU available")
    print("🔧 Please enable GPU runtime: Runtime → Change runtime type → T4 GPU")

# Memory info
print(f"\n💾 System Memory:")
try:
    with open('/proc/meminfo', 'r') as f:
        meminfo = f.read()
        for line in meminfo.split('\n'):
            if 'MemTotal' in line:
                total_mem = int(line.split()[1]) / 1024 / 1024  # Convert to GB
                print(f"✅ Total RAM: {total_mem:.1f} GB")
                break
except:
    print("❓ Could not determine system memory")

print("\n" + "="*50)
if 'nvidia-smi' in str(subprocess.run(['which', 'nvidia-smi'], capture_output=True, text=True).stdout):
    print("🎉 GPU setup is ready for LLaVA!")
    print("👉 You can now proceed with the installation cells")
else:
    print("⚠️  No GPU detected - LLaVA will be very slow on CPU")
    print("🔧 Strongly recommend enabling GPU runtime first")
print("="*50)

🔍 Updated System Information:
Python version: 3.10.17 (main, Apr 25 2025, 17:54:28) [GCC 11.4.0]
Current working directory: /home/vasu/Documents/experiments/sereact
✅ GPU detected:
Mon May 26 14:11:50 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.247.01             Driver Version: 535.247.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 ...    On  | 00000000:01:00.0 Off |                  N/A |
| N/A   48C    P8               9W /  60W |     11MiB /  6144MiB |      0%      Default |
|                                         |                      | 

In [1]:
import torch
print(torch.__version__)

2.1.2+cu121


In [8]:
print("🔍 Verifying installation...")

try:
    # Test basic imports
    import torch
    print(f"✅ PyTorch version: {torch.__version__}")
    print(f"✅ CUDA available: {torch.cuda.is_available()}")
    
    # Test LLaVA imports
    from LLaVA.llava.model.builder import load_pretrained_model
    from LLaVA.llava.mm_utils import get_model_name_from_path
    print("✅ LLaVA imports successful!")
    
    # Check if flash_attn is available
    try:
        import flash_attn
        print("✅ Flash Attention available!")
    except ImportError:
        print("⚠️ Flash Attention not available (but LLaVA should still work)")
        
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Some components may not be properly installed.")

print("\n🎉 Installation complete!")


🔍 Verifying installation...
✅ PyTorch version: 2.1.2+cu121
✅ CUDA available: True
❌ Import error: No module named 'llava'
Some components may not be properly installed.

🎉 Installation complete!


In [2]:
from llava.conversation import conv_templates
from llava.mm_utils import tokenizer_image_token, process_images, IMAGE_TOKEN_INDEX
from PIL import Image
import torch
import traceback # Moved import to top level for clarity, though it's only used in except block

# Cell 10: LLaVA test with proper dtype handling
def dtype_fixed_test(model, tokenizer, image_processor, question="What color is this image?"):
    """
    LLaVA test with proper dtype handling for quantized models
    
    Args:
        model: The LLaVA model.
        tokenizer: The tokenizer associated with the model.
        image_processor: The image processor associated with the model.
        question (str): The question to ask about the image.
    """
    print(f"🧪 Dtype-fixed test: '{question}'")
    
    try:
        # Step 1: Create test image
        print("Step 1: Creating test image...")
        image = Image.new('RGB', (336, 336), color=(255, 165, 0))  # Orange image
        print(f"✅ Image created: {image.size}")
        
        # Step 2: Process image with proper dtype
        print("Step 2: Processing image...")
        # Ensure model.config is accessible; it's common for Hugging Face models
        if not hasattr(model, 'config'):
            print("❌ Error: Model does not have a 'config' attribute.")
            return "Failed: Model does not have a 'config' attribute."
            
        image_tensor = process_images([image], image_processor, model.config)
        
        if image_tensor is None or len(image_tensor) == 0:
            return "❌ Image processing failed"
        
        image_tensor = image_tensor[0]
        print(f"✅ Image processed: {image_tensor.shape}")
        
        # Step 3: Setup conversation
        print("Step 3: Setting up conversation...")
        # Ensure the conversation template key is valid
        if "mistral_instruct" not in conv_templates:
            # Fallback or error if specific template is missing
            # For LLaVA, common templates are 'llava_v1', 'vicuna_v1', 'llava_llama_2' etc.
            # Using a generic or common one if "mistral_instruct" is not found.
            # This part might need adjustment based on available templates.
            # For now, let's assume 'llava_v1' is a more generic choice if 'mistral_instruct' fails.
            # Or, the user needs to ensure "mistral_instruct" is correct.
            # Forcing a known key for now to prevent crashes if "mistral_instruct" is not default for llava.
            # If "mistral_instruct" is specific to their setup, they can keep it.
            # A common LLaVA template is often 'llava_v1' or 'vicuna_v1'.
            # Let's assume 'llava_v1' for robustness if 'mistral_instruct' is not present.
            conv_template_key = "llava_v1" 
            if "mistral_instruct" in conv_templates:
                conv_template_key = "mistral_instruct"
            elif not conv_templates: # if conv_templates is empty
                 print("❌ Error: conv_templates is empty.")
                 return "Failed: conv_templates is empty."
            elif conv_template_key not in conv_templates: # if llava_v1 is also not found
                conv_template_key = list(conv_templates.keys())[0] # take the first available
            
            print(f"⚠️ Warning: 'mistral_instruct' not found. Using '{conv_template_key}'.")
            conv = conv_templates[conv_template_key].copy()
        else:
            conv = conv_templates["mistral_instruct"].copy()
            
        conv.append_message(conv.roles[0], question)
        conv.append_message(conv.roles[1], None)
        prompt = conv.get_prompt()
        print(f"✅ Conversation ready")
        
        # Step 4: Tokenize
        print("Step 4: Tokenizing...")
        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')
        if input_ids is None:
            return "❌ Tokenization failed"
        input_ids = input_ids.unsqueeze(0)
        print(f"✅ Tokenized: {input_ids.shape}")
        
        # Step 5: Move to device and fix dtype
        print("Step 5: Moving to device and fixing dtypes...")
        device = next(model.parameters()).device
        model_dtype = next(model.parameters()).dtype
        
        print(f"Model device: {device}")
        print(f"Model dtype: {model_dtype}")
        
        input_ids = input_ids.to(device)
        image_tensor = image_tensor.to(device, dtype=model_dtype)
        
        image_sizes = [image.size]
        
        print(f"✅ Input IDs: {input_ids.dtype} on {input_ids.device}")
        print(f"✅ Image tensor: {image_tensor.dtype} on {image_tensor.device}")
        print(f"✅ Image sizes: {image_sizes}")
        
        # Step 6: Generate with proper attention mask
        print("Step 6: Generating...")
        with torch.inference_mode():
            attention_mask = torch.ones_like(input_ids, dtype=torch.bool)
            
            output_ids = model.generate(
                input_ids,
                attention_mask=attention_mask,
                images=image_tensor.unsqueeze(0), # model expects batch, so unsqueeze
                image_sizes=image_sizes,
                max_new_tokens=100,
                use_cache=True,
                do_sample=True,
                temperature=0.1,
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id
            )
        
        if output_ids is None:
            return "❌ Generation failed"
        
        print(f"✅ Generated: {output_ids.shape}")
        
        # Step 7: Decode
        print("Step 7: Decoding...")
        # Ensure output_ids are on CPU for decoding if tokenizer expects that
        outputs = tokenizer.batch_decode(output_ids.cpu(), skip_special_tokens=True)
        
        if not outputs:
            return "❌ Decoding failed"
        
        response = outputs[0].strip()
        
        # Clean response: remove the prompt part if it's echoed
        # This specific cleaning logic might need adjustment based on the model's output format
        # A common way LLaVA echoes is "ASSISTANT: <response>" or just "<response>"
        # The original logic for removing question was:
        # if question in response:
        #    response = response.split(question)[-1].strip()
        # This might be too aggressive if the question is naturally part of the answer.
        # A more robust way is to find the end of the prompt in the generated output.
        # The prompt includes the question.
        # `outputs[0]` contains the full sequence including prompt and generated part.
        # We need to decode `input_ids` to get the prompt text sent to the model
        # prompt_text_decoded = tokenizer.decode(input_ids[0], skip_special_tokens=True)
        # if response.startswith(prompt_text_decoded):
        #    response = response[len(prompt_text_decoded):].strip()
            
        # The provided cleaning looks for specific prefixes, which is fine.
        # Also, the prompt itself sometimes appears as a prefix in the output.
        # Let's try to remove the full prompt if it is repeated.
        clean_prompt_for_checking = prompt.replace(IMAGE_TOKEN_INDEX, '').replace('<image>', '').strip()

        # Check if the response starts with the prompt (common behavior)
        # The prompt prepared `conv.get_prompt()` includes roles, e.g., "USER: ... ASSISTANT:"
        # Often, the model output only contains the text after "ASSISTANT:"
        # The original cleaning logic:
        # if question in response:
        #     response = response.split(question)[-1].strip()
        # This is okay, but checking for standard role markers might be more general.

        # The current prefix stripping loop is a good general approach:
        cleaned_response = response
        # If prompt ends with assistant's role, and model output starts with it, it can be stripped
        assistant_role_marker = conv.roles[1] # e.g., "ASSISTANT:"
        if cleaned_response.startswith(assistant_role_marker):
            cleaned_response = cleaned_response[len(assistant_role_marker):].strip()
        
        # Then the custom prefixes:
        for prefix in ["[/INST]", "Assistant:", "ASSISTANT:", "A:", "Answer:"]: # ensure consistency with actual output
            if cleaned_response.startswith(prefix):
                cleaned_response = cleaned_response[len(prefix):].strip()
                break
        
        # If the question itself is repeated at the start of the cleaned response (after role markers)
        if cleaned_response.lower().startswith(question.lower()):
             temp_response = cleaned_response[len(question):].strip()
             # check if the removal makes sense (e.g. starts with a colon or similar)
             if temp_response.startswith(':') or temp_response.startswith('-') or len(temp_response) < len(cleaned_response) * 0.8:
                 cleaned_response = temp_response.lstrip(':').strip()


        print(f"✅ SUCCESS! Response: {cleaned_response}")
        return cleaned_response
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        traceback.print_exc()
        return f"Failed: {str(e)}"

ModuleNotFoundError: No module named 'llava'