# Prompt Engineering Experiment

Evaluate how refined textual prompts affect OWL-ViT detection quality.

---

In [None]:
## 1. Setup & Imports

import sys, os
from pathlib import Path

# Add project root
project_root = Path.cwd().resolve().parents[2]
sys.path.insert(0, str(project_root))

import torch

from tools.owlvit_utils    import OwlViTPipeline
from tools.prompt_utils    import PromptUtils

In [None]:
# Experiment configuration
experiment_name      = 'prompt_engineering'
image_filename       = '<input_image_filename>'  # e.g., 'site_image.png'
prompts_file         = '<prompts_file>'          # e.g., 'prompts.txt'
resize_size          = (<width>, <height>)       # e.g., (1024, 1024)
thresholds           = [<threshold_1>, <threshold_2>, <threshold_3>, <threshold_4>]  # e.g., [0.001, 0.002, ...]


In [None]:
pipeline = OwlViTPipeline(
    experiment_name=experiment_name,
    resize_size=resize_size
)
pipeline.load_image(image_filename)


pipeline.load_prompts(prompts_file)
base_prompts = pipeline.prompts

# Templates for engineering
templates = [
    "{prompt}. ",
    "A high-resolution satellite image showing {prompt}. ",
    "Close-up aerial view of {prompt}. ",
    "Satellite photo capturing {prompt} from above. ",
    "A {prompt} in a satellite image. ",
    "a satellite image of {prompt}." ,
    "A bird's eye view of {prompt}. "
]


In [None]:

# Expand via templates
expanded_prompts = PromptUtils.expand_prompts(base_prompts, templates)

# Optional: sample a subset for faster runs
prompts = PromptUtils.sample_prompts(expanded_prompts, k=10)

# Replace pipeline's prompts
pipeline.prompts = prompts
print("Using prompts:\n", "\n".join(pipeline.prompts))


In [None]:
inputs = pipeline.processor(
    text=pipeline.prompts,
    images=pipeline.image_resized,
    return_tensors="pt"
)
with torch.no_grad():
    outputs = pipeline.model(**inputs)
# Single-shot inference per threshold
for thresh in thresholds:
    
    results = pipeline.processor.post_process_object_detection(
    outputs,
    target_sizes=torch.tensor([[pipeline.image_full.height,
                                pipeline.image_full.width]]),
    threshold=thresh
    )[0]

    # run and save visual, metrics, geojson via pipeline helper
    png    = pipeline.save_visualisation(results, threshold=thresh)
    metrics= pipeline.save_metrics    (results, threshold=thresh)
    geo    = pipeline.run_and_save_geojson(thresh)
    print(f"[t={thresh:.4f}] saved →", png, metrics, geo)


In [None]:
pipeline.display_metrics()
