In [None]:
import torch
import clip
from PIL import Image
import os
import time
import matplotlib.pyplot as plt


In [None]:
# Verificar si tenemos GPU disponible
device = "cuda" if torch.cuda.is_available() else "cpu"

# Cargar el modelo CLIP
model, preprocess = clip.load("ViT-B/32", device=device)

# Ruta de las im√°genes
image_path = "../data/raw/train"
image_files = [f for f in os.listdir(image_path) if f.endswith((".jpg", ".png"))]

if len(image_files) < 5:
    print("‚ùå Not enough images in the folder for testing (minimum 5 required).")
    exit()


In [None]:
# Seleccionar 5 im√°genes
selected_images = image_files[:5]

# Definir posibles descripciones mejoradas en ingl√©s
text_descriptions = [
    "Dent on the fuselage surface",
    "Crack on the wing structure",
    "Corrosion detected in the engine",
    "Superficial scratch on the body",
    "Impact mark from a foreign object",
    "Severe structural damage observed"
]


In [None]:
# Tokenizar descripciones
text_tokens = clip.tokenize(text_descriptions).to(device)

# Medir tiempo de ejecuci√≥n
start_time = time.time()

In [None]:
# Procesar im√°genes
results = []
for img_name in selected_images:
    img_path = os.path.join(image_path, img_name)
    image = Image.open(img_path)
    image_preprocessed = preprocess(image).unsqueeze(0).to(device)

    # Calcular similitud entre la imagen y las descripciones
    with torch.no_grad():
        image_features = model.encode_image(image_preprocessed)
        text_features = model.encode_text(text_tokens)
        similarity = (image_features @ text_features.T).softmax(dim=-1)

    # Seleccionar la descripci√≥n m√°s probable
    best_match = text_descriptions[similarity.argmax()]
    
    # Guardar resultado
    results.append((img_name, best_match))



In [None]:
    # Mostrar imagen con su predicci√≥n
    plt.figure(figsize=(5, 6))
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"Generated description: {best_match}", fontsize=11, color="blue")
    plt.show()

In [None]:
# Medir tiempo final
end_time = time.time()
execution_time = end_time - start_time

# Imprimir resultados en la terminal
print("\n‚úÖ Test results with 5 images:")
for img_name, desc in results:
    print(f"üìå {img_name}: {desc}")

print(f"\n‚è≥ Total execution time: {execution_time:.2f} seconds")