# 🚁 Spatial Reasoning Tester for Drone Navigation

Interactive tool to test spatial reasoning capabilities on images, videos, and point clouds.
Perfect for exploring spatial understanding for autonomous systems and drone navigation.

## What This Does:
- 📸 **Upload images/videos** and ask spatial questions
- 🔮 **Analyze point clouds** for 3D spatial relationships  
- 📏 **Estimate depths and distances** between objects
- 🚁 **Test drone navigation scenarios** with spatial queries

---

In [None]:
# Setup - Load Spatial Reasoning Models
import warnings
warnings.filterwarnings('ignore')

try:
    from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
    import torch
    print("✅ Transformers available")
    
    # Try to load SpaceThinker (best for spatial reasoning)
    try:
        model = Qwen2VLForConditionalGeneration.from_pretrained(
            "remyxai/SpaceThinker-Qwen2.5VL-3B",
            torch_dtype=torch.bfloat16,
            device_map="auto",
            trust_remote_code=True
        )
        processor = AutoProcessor.from_pretrained(
            "remyxai/SpaceThinker-Qwen2.5VL-3B", 
            trust_remote_code=True
        )
        print("🎯 Loaded SpaceThinker-Qwen2.5VL-3B (optimized for spatial reasoning)")
        MODEL_LOADED = True
    except:
        print("⚠️  SpaceThinker not available, trying base Qwen2.5-VL...")
        try:
            model = Qwen2VLForConditionalGeneration.from_pretrained(
                "Qwen/Qwen2.5-VL-3B-Instruct",
                torch_dtype=torch.bfloat16,
                device_map="auto"
            )
            processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
            print("✅ Loaded Qwen2.5-VL-3B-Instruct (base vision-language model)")
            MODEL_LOADED = True
        except:
            print("❌ Could not load any vision-language model")
            MODEL_LOADED = False
            
except ImportError:
    print("❌ Transformers not available. Install: pip install transformers torch")
    MODEL_LOADED = False

# Other imports
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import requests
from IPython.display import display, HTML
import ipywidgets as widgets
from io import BytesIO

2025-10-01 16:27:41.985610: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-01 16:27:41.993253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759325262.001885   77822 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759325262.004676   77822 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1759325262.011966   77822 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

`torch_dtype` is deprecated! Use `dtype` instead!


✅ Transformers available


You are using a model of type qwen2_5_vl to instantiate a model of type qwen2_vl. This is not supported for all configurations of models and can yield errors.


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

⚠️  SpaceThinker not available, trying base Qwen2.5-VL...


You are using a model of type qwen2_5_vl to instantiate a model of type qwen2_vl. This is not supported for all configurations of models and can yield errors.


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Spatial Reasoning Function
def ask_spatial_question(image, question):
    """Ask spatial questions about an image"""
    if not MODEL_LOADED:
        return "❌ No spatial reasoning model loaded. Please install transformers and torch."
    
    try:
        # Prepare the conversation
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": image},
                    {"type": "text", "text": question}
                ]
            }
        ]
        
        # Process the input
        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        image_inputs, video_inputs = processor.process_vision_info(messages)
        inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
        
        # Generate answer
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs, 
                max_new_tokens=256,
                temperature=0.1,
                do_sample=False
            )
            
        # Decode the response
        generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
        response = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        
        return response
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Test with a sample image if available
if MODEL_LOADED:
    print("🎯 Spatial reasoning function ready!")
    print("Usage: ask_spatial_question(your_image, 'How tall is the building?')")
else:
    print("⚠️  Spatial reasoning not available - models not loaded")

## 🚁 Test Drone Navigation Scenarios

Let's test spatial reasoning with scenarios relevant to drone navigation and autonomous systems.

In [None]:
# Load test images for drone scenarios
def load_test_image(url=None, local_path=None):
    """Load test image from URL or local path"""
    try:
        if url:
            response = requests.get(url)
            image = Image.open(BytesIO(response.content))
        elif local_path:
            image = Image.open(local_path)
        else:
            # Create a simple test image if no image provided
            image = Image.new('RGB', (400, 300), color='lightblue')
        
        return image
    except Exception as e:
        print(f"Error loading image: {e}")
        return None

# Example drone navigation questions
drone_questions = [
    "What is the height of the tallest building in this image?",
    "How much clearance space is there between the buildings?", 
    "What is the distance from the drone's viewpoint to the nearest obstacle?",
    "Is there enough vertical space for a 2-meter tall drone to fly through?",
    "Which direction offers the most open space for navigation?",
    "What are the dimensions of the open area in the center?",
    "How wide is the gap between the two structures?",
    "What is the safest flight path through this environment?"
]

print("🎯 Drone Navigation Test Questions:")
for i, q in enumerate(drone_questions, 1):
    print(f"{i}. {q}")

# Interactive testing function
def test_spatial_reasoning(image_source=None, custom_question=None):
    """Interactive spatial reasoning test"""
    
    # Load image
    if image_source:
        if image_source.startswith('http'):
            image = load_test_image(url=image_source)
        else:
            image = load_test_image(local_path=image_source)
    else:
        # Use a sample image from your dataset if available
        sample_images = [
            "/home/isr-lab3/James/VQASynth-UAV/assets/warehouse_sample_1.jpeg",
            "/home/isr-lab3/James/VQASynth-UAV/assets/warehouse_sample_2.jpeg"
        ]
        
        for img_path in sample_images:
            if os.path.exists(img_path):
                image = load_test_image(local_path=img_path)
                break
        else:
            print("No test images found. Please provide an image URL or path.")
            return
    
    if not image:
        print("Failed to load test image")
        return
    
    # Display image
    plt.figure(figsize=(10, 6))
    plt.imshow(image)
    plt.axis('off')
    plt.title('Test Image for Spatial Reasoning')
    plt.show()
    
    # Test questions
    questions_to_test = [custom_question] if custom_question else drone_questions[:3]
    
    print("\n🔍 Spatial Reasoning Results:")
    print("=" * 50)
    
    for i, question in enumerate(questions_to_test, 1):
        if not question:
            continue
            
        print(f"\nQ{i}: {question}")
        answer = ask_spatial_question(image, question)
        print(f"A{i}: {answer}")
        print("-" * 30)

# Ready to test!
print("\n✅ Ready to test spatial reasoning!")
print("Usage examples:")
print("• test_spatial_reasoning()  # Use default test images")  
print("• test_spatial_reasoning('path/to/your/image.jpg')")
print("• test_spatial_reasoning('https://example.com/image.jpg')")
print("• test_spatial_reasoning(custom_question='How tall is the drone?')")