In [None]:
import os
import csv
import torch
from datetime import datetime

# Read configuration from injected cells or environment
MODEL_ID = globals().get('CONFIG_MODEL_ID') or os.environ.get('MODEL_ID', 'SmolVLM2-2.2B')
BENCHMARK_PRECISION = globals().get('CONFIG_PRECISION') or os.environ.get('PRECISION', 'fp32')
TENSORRT = globals().get('CONFIG_TENSORRT', False) or os.environ.get('TENSORRT', 'False').lower() == 'true'
RESULT_DIR = globals().get('CONFIG_OUTPUT_SUBDIR') or os.environ.get('OUTPUT_SUBDIR', 'results')
IMAGE_LIMIT = globals().get('CONFIG_IMAGE_LIMIT', 0) or int(os.environ.get('IMAGE_LIMIT', '0'))

# Debug printout
print('=== INJECTED CONFIGURATION ===')
print(f'MODEL_ID: {MODEL_ID}')
print(f'BENCHMARK_PRECISION: {BENCHMARK_PRECISION}')
print(f'TENSORRT: {TENSORRT}')
print(f'RESULT_DIR: {RESULT_DIR}')
print(f'IMAGE_LIMIT: {IMAGE_LIMIT}')
print(f'TORCH_VERSION: {torch.__version__}')
print(f'CUDA_AVAILABLE: {torch.cuda.is_available()}')
print('===========================\n')

In [None]:
try:
    os.makedirs(RESULT_DIR, exist_ok=True)
    
    with open(os.path.join(RESULT_DIR, 'notebook_boot.txt'), 'w', encoding='utf-8') as f:
        f.write('Notebook boot test: kernel started successfully.')
    
    with open(os.path.join(RESULT_DIR, 'notebook_test.txt'), 'w', encoding='utf-8') as f:
        f.write('Notebook file write test successful.')
    
    print(f'NOTEBOOK BOOT: Successfully created test files in {RESULT_DIR}')
except Exception as e:
    print(f'NOTEBOOK BOOT ERROR: {e}')

In [None]:
import os
import sys
from pathlib import Path
from huggingface_hub import login, get_token

print("\n" + "="*60)
print("MODEL LOADING CELL STARTED")
print("="*60)

hf_token = os.environ.get('HUGGINGFACE_HUB_TOKEN', '')
print(f"HF_TOKEN in environment: {bool(hf_token)}")
print(f"MODEL_ID: {MODEL_ID}")
print(f"BENCHMARK_PRECISION: {BENCHMARK_PRECISION}")

processor = None
model = None
model_load_error = None

# Check if MODEL_ID is a local path (starts with / or has : for Windows)
is_local_path = (
    MODEL_ID.startswith('/') or 
    (len(MODEL_ID) > 2 and MODEL_ID[1] == ':')  # Windows absolute path like C:\
)

if is_local_path:
    # Normalize Windows backslashes to forward slashes for from_pretrained()
    model_path = MODEL_ID.replace('\\', '/')
    print(f"Detected local model path: {model_path}")
else:
    model_path = MODEL_ID

try:
    if hf_token and not is_local_path:
        print(f'\nAuthenticating with HuggingFace token...')
        login(token=hf_token, add_to_git_credential=False)
        print(f'✓ Authentication successful')
    elif is_local_path:
        print('Loading from local path - no HF authentication needed')
    else:
        print('⚠ No HuggingFace token - loading public models only')
    
    from transformers import AutoProcessor, AutoModelForImageTextToText
    
    print(f'\n1. Loading processor for {model_path}...')
    try:
        processor = AutoProcessor.from_pretrained(
            model_path, 
            trust_remote_code=True,
            token=hf_token if (hf_token and not is_local_path) else None
        )
        print('✓ Processor loaded successfully')
    except Exception as proc_err:
        error_msg = f'{type(proc_err).__name__}: {str(proc_err)[:300]}'
        print(f'✗ ERROR loading processor: {error_msg}')
        model_load_error = error_msg
        processor = None
    
    print(f'\n2. Loading model in {BENCHMARK_PRECISION}...')
    try:
        load_in_fp16 = (BENCHMARK_PRECISION == 'fp16')
        model = AutoModelForImageTextToText.from_pretrained(
            model_path,
            trust_remote_code=True,
            token=hf_token if (hf_token and not is_local_path) else None,
            torch_dtype='auto' if load_in_fp16 else None
        )
        print('✓ Model loaded successfully')
    except Exception as model_err:
        error_msg = f'{type(model_err).__name__}: {str(model_err)[:300]}'
        print(f'✗ ERROR loading model: {error_msg}')
        model_load_error = error_msg
        model = None
    
    if processor is not None and model is not None:
        print(f'\n✓✓ SUCCESS: Both processor and model loaded!')
    else:
        print(f'\n✗✗ FAILURE: processor={processor is not None}, model={model is not None}')
        if model_load_error:
            print(f'Latest error: {model_load_error}')
        
except Exception as e:
    error_msg = f'{type(e).__name__}: {str(e)[:300]}'
    print(f'\n✗ CRITICAL ERROR: {error_msg}')
    model_load_error = error_msg
    processor = None
    model = None

print("="*60 + "\n")

In [None]:
!{sys.executable} -m pip install transformers==4.57.1

In [None]:
!{sys.executable} -m pip show torch
!{sys.executable} -m pip show transformers

In [None]:
try:
    from PIL import Image
    import time
    
    # Create a dummy image for testing
    dummy_img = Image.new('RGB', (224, 224), color='red')
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)
    model.eval()
    
    messages = [{'role': 'user', 'content': [{'type': 'image', 'path': '<PIL Image>'}, {'type': 'text', 'text': 'Describe this image'}]}]
    
    # This will likely fail due to PIL image in path, but we'll handle it
    print(f'Testing inference on {device}...')
except Exception as e:
    print(f'Warning during test: {e}')

In [None]:
csv_path = os.path.join(RESULT_DIR, 'benchmark_results.csv')
os.makedirs(os.path.dirname(csv_path), exist_ok=True)

# Create results header
fieldnames = [
    'image_filename', 'model_id', 'precision', 'tensorrt', 'device',
    'latency_ms', 'memory_used_mb', 'tokens_generated', 'throughput_tokens_per_sec',
    'timestamp', 'response', 'error'
]

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Determine images to process
images_dir = os.environ.get('CHART_IMAGES_DIR', '/kaggle/input/chart-patterns')
image_paths = []
if os.path.isdir(images_dir):
    for root, _, files in os.walk(images_dir):
        for fn in files:
            if fn.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                image_paths.append(os.path.join(root, fn))
else:
    # fallback: if a single file path provided
    if os.path.isfile(images_dir):
        image_paths = [images_dir]

# Respect image limit (0 means no limit)
limit = int(globals().get('CONFIG_IMAGE_LIMIT') or os.environ.get('IMAGE_LIMIT', '0'))
if limit > 0:
    image_paths = image_paths[:limit]

results = []

# Try to ensure processor/model are available (some earlier cell may have loaded them)
try:
    processor  # noqa: F821
except Exception:
    processor = None
try:
    model  # noqa: F821
except Exception:
    model = None

# Attempt to load processor/model if not present
if processor is None or model is None:
    try:
        from transformers import AutoProcessor, AutoModelForImageTextToText
        processor = AutoProcessor.from_pretrained(MODEL_ID)
        model = AutoModelForImageTextToText.from_pretrained(MODEL_ID)
    except Exception as e:
        print(f'Warning: Could not load model/processor: {e}')
        import traceback
        traceback.print_exc()

# Process each image and run inference if model loaded
for img_path in image_paths:
    row = {
        'image_filename': os.path.basename(img_path),
        'model_id': MODEL_ID,
        'precision': BENCHMARK_PRECISION,
        'tensorrt': str(TENSORRT),
        'device': device,
        'latency_ms': None,
        'memory_used_mb': None,
        'tokens_generated': None,
        'throughput_tokens_per_sec': None,
        'timestamp': datetime.now().isoformat(),
        'response': None,
        'error': None
    }
    try:
        from PIL import Image
        img = Image.open(img_path).convert('RGB')
        # Prepare input
        if processor is None or model is None:
            error_detail = f'processor={processor is not None}, model={model is not None}'
            if model_load_error:
                error_detail += f', load_error={model_load_error}'
            raise RuntimeError(f'Model or processor not available ({error_detail})')
        inputs = processor(images=img, return_tensors='pt')
        device_t = 'cuda' if torch.cuda.is_available() else 'cpu'
        model.to(device_t)
        # Move tensors to device
        inputs = {k: v.to(device_t) for k, v in inputs.items()}
        import time
        torch.cuda.reset_peak_memory_stats() if torch.cuda.is_available() else None
        start = time.perf_counter()
        if hasattr(model, 'generate'):
            gen = model.generate(**inputs, max_new_tokens=128)
            # Attempt to decode using processor if possible
            try:
                response = processor.batch_decode(gen, skip_special_tokens=True)[0]
            except Exception:
                response = str(gen)
            tokens = int(gen.shape[-1]) if hasattr(gen, 'shape') else None
        else:
            out = model(**inputs)
            response = str(out)
            tokens = None
        end = time.perf_counter()
        latency_ms = (end - start) * 1000.0
        memory_mb = None
        if torch.cuda.is_available():
            memory_mb = torch.cuda.max_memory_allocated() / (1024.0 * 1024.0)
        throughput = (tokens / (end - start)) if tokens and (end - start) > 0 else None
        row['latency_ms'] = round(latency_ms, 3)
        row['memory_used_mb'] = round(memory_mb, 3) if memory_mb is not None else None
        row['tokens_generated'] = int(tokens) if tokens is not None else None
        row['throughput_tokens_per_sec'] = round(throughput, 3) if throughput is not None else None
        row['response'] = response
    except Exception as e:
        row['error'] = str(e)
        import traceback
        traceback.print_exc()
    results.append(row)

# Write CSV
try:
    with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(results)
    print(f'Successfully wrote {len(results)} results to {csv_path}')
except Exception as e:
    print(f'ERROR writing CSV: {e}')
    import traceback
    traceback.print_exc()

In [None]:
print(f'\nBenchmark complete!')
print(f'Results saved to: {csv_path}')
print(f'Device: {device}')
print(f'Model: {MODEL_ID}')
print(f'Precision: {BENCHMARK_PRECISION}')
print(f'TensorRT: {TENSORRT}')
print(f'Output dir: {RESULT_DIR}')