In [None]:
# Mount Google Drive
# If already mounted this will show "Drive is already mounted" ‚Äî that's fine.
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install packages that are not pre-installed in Colab
# (torch, torchvision, numpy, Pillow, requests are already available)
!pip install -q git+https://github.com/openai/CLIP.git ftfy

# Find Similar Images: Upload Your Own Photo

In this notebook, you will:

1. **Upload** a photo from your laptop
2. **Compute** a CLIP embedding for your photo
3. **Find** the most visually similar images in the collection
4. **Explore** what "visual similarity" means to a neural network

---

## How It Works

```mermaid
flowchart LR
    subgraph Your Photo
        UP["üì∑ Upload\nyour photo"]
    end
    
    subgraph CLIP
        ENC["üß† CLIP\nImage Encoder"]
    end
    
    subgraph Embeddings
        YE["Your embedding\n[0.12, -0.45, ...]"]
        CE["Collection embeddings\n(pre-calculated)"]
    end
    
    subgraph Results
        SIM["üìä Compare\n(cosine similarity)"]
        RES["üñºÔ∏è Most similar\nimages"]
    end
    
    UP --> ENC --> YE
    YE --> SIM
    CE --> SIM
    SIM --> RES
```

The key insight: CLIP learns to represent images in a way that captures **semantic content**, not just pixels. Two images of sunsets will have similar embeddings even if they have different colors or compositions.

---

## Part 1: Setup

In [None]:
# Standard library imports
import os
import json
import io
from pathlib import Path

# External libraries
import numpy as np
from PIL import Image as PILImage
from IPython.display import display, HTML, clear_output

# Import CLIP
try:
    import torch
    import clip
    CLIP_AVAILABLE = True
    print(f"‚úì CLIP loaded successfully!")
except ImportError:
    CLIP_AVAILABLE = False
    print("‚ö†Ô∏è CLIP not installed.")
    print("  Install with: pip install git+https://github.com/openai/CLIP.git torch torchvision")

# Import ipywidgets for file upload
try:
    import ipywidgets as widgets
    WIDGETS_AVAILABLE = True
    print("‚úì ipywidgets available")
except ImportError:
    WIDGETS_AVAILABLE = False
    print("‚ö†Ô∏è ipywidgets not installed. Install with: pip install ipywidgets")

# Select compute device: CUDA GPU > Apple Silicon GPU > CPU
if CLIP_AVAILABLE:
    if torch.cuda.is_available():
        DEVICE = 'cuda'
        print(f"‚úì NVIDIA GPU (CUDA): {torch.cuda.get_device_name(0)}")
    elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
        DEVICE = 'mps'
        print("‚úì Apple Silicon GPU (MPS) ‚Äî good performance!")
    else:
        DEVICE = 'cpu'
        print("‚ÑπÔ∏è No GPU detected. Using CPU ‚Äî fine for single image encoding.")

In [None]:
# Set up paths
PROJECT_ROOT = Path("/content/drive/MyDrive/Distant_viewing")

# ============================================================
# Collection to work with ‚Äî change this to switch collections!
# ============================================================
COLLECTION_NAME = "Museum of Gothenburg"  # <-- Change this!
# ============================================================

safe_name = COLLECTION_NAME.lower().replace(' ', '_')
COLLECTION_IMAGES_DIR      = PROJECT_ROOT / "data" / "images"     / COLLECTION_NAME
COLLECTION_EMBEDDINGS_FILE = PROJECT_ROOT / "data" / "embeddings" / COLLECTION_NAME / f"{safe_name}_clip_embeddings.npz"
DATA_DIR = PROJECT_ROOT / "data"

print(f"Project root: {PROJECT_ROOT}")
print(f"Collection:   {COLLECTION_NAME}")
print(f"Images:       {COLLECTION_IMAGES_DIR}")
print(f"Embeddings:   {COLLECTION_EMBEDDINGS_FILE}")
print(f"Embeddings exist: {COLLECTION_EMBEDDINGS_FILE.exists()}")

In [None]:
# Load CLIP model
MODEL_NAME = 'ViT-B/32'

if CLIP_AVAILABLE:
    print(f"Loading CLIP model '{MODEL_NAME}'...")
    model, preprocess = clip.load(MODEL_NAME, device=DEVICE)
    model.eval()
    print(f"‚úì Model loaded on {DEVICE}")
else:
    print("‚ùå CLIP not available")

In [None]:
# Load pre-calculated collection embeddings
collection_embeddings = None
collection_filenames = None

if COLLECTION_EMBEDDINGS_FILE.exists():
    print(f"Loading collection embeddings from {COLLECTION_EMBEDDINGS_FILE}...")

    data = np.load(COLLECTION_EMBEDDINGS_FILE, allow_pickle=True)
    collection_embeddings = data['embeddings']
    collection_filenames = data['filenames']

    print(f"‚úì Loaded {len(collection_filenames)} image embeddings")

    # Convert to torch tensor
    if CLIP_AVAILABLE:
        collection_embeddings = torch.tensor(collection_embeddings, dtype=torch.float32).to(DEVICE)
else:
    print(f"‚ö†Ô∏è Embeddings not found at {COLLECTION_EMBEDDINGS_FILE}")
    print("   Ask your instructor for the embeddings file.")

---

## Part 2: Upload Your Photo

Use the widget below to select a photo from your laptop.

**Supported formats:** JPG, PNG, WEBP, GIF

In [None]:
# Create file upload widget
uploaded_image = None
uploaded_embedding = None

if WIDGETS_AVAILABLE:
    # File upload widget
    upload_widget = widgets.FileUpload(
        accept='image/*',
        multiple=False,
        description='Choose Photo'
    )
    
    # Output area for preview
    output = widgets.Output()
    
    # Process button
    process_btn = widgets.Button(
        description='Find Similar Images',
        button_style='primary',
        disabled=True
    )
    
    # Status label
    status_label = widgets.Label(value='Upload a photo to get started')
    
    def on_upload_change(change):
        global uploaded_image
        
        with output:
            clear_output()
            
            if upload_widget.value:
                # ipywidgets >= 8.0: value is a tuple; < 8.0: value is a dict
                val = upload_widget.value
                file_info = val[0] if isinstance(val, tuple) else list(val.values())[0]
                content = file_info['content']
                
                try:
                    # Load image
                    uploaded_image = PILImage.open(io.BytesIO(bytes(content))).convert('RGB')
                    
                    # Display preview
                    print("üì∑ Your uploaded photo:")
                    print(f"   Size: {uploaded_image.size[0]} x {uploaded_image.size[1]} pixels")
                    print()
                    
                    # Resize for display
                    display_img = uploaded_image.copy()
                    max_size = 400
                    ratio = min(max_size / display_img.width, max_size / display_img.height)
                    if ratio < 1:
                        new_size = (int(display_img.width * ratio), int(display_img.height * ratio))
                        display_img = display_img.resize(new_size, PILImage.Resampling.LANCZOS)
                    display(display_img)
                    
                    # Enable process button
                    process_btn.disabled = False
                    status_label.value = '‚úì Photo loaded! Click "Find Similar Images"'
                    
                except Exception as e:
                    print(f"‚ùå Error loading image: {e}")
                    status_label.value = f'Error: {e}'
    
    upload_widget.observe(on_upload_change, names='value')
    
    # Display the widgets
    display(widgets.VBox([
        widgets.HTML('<h3>Step 1: Choose a photo from your laptop</h3>'),
        upload_widget,
        status_label,
        output
    ]))
else:
    print("‚ö†Ô∏è File upload widget not available.")
    print("   You can manually specify an image path below.")

### Alternative: Specify Image Path Manually

If the upload widget doesn't work, you can specify the path to an image file directly.

In [None]:
# ============================================================
# ALTERNATIVE: Specify image path manually
# ============================================================

# Set this to the path of your image file
MANUAL_IMAGE_PATH = None  # e.g., "/home/user/photos/my_photo.jpg"

# ============================================================

if MANUAL_IMAGE_PATH:
    path = Path(MANUAL_IMAGE_PATH)
    if path.exists():
        uploaded_image = PILImage.open(path).convert('RGB')
        print(f"‚úì Loaded image: {path.name}")
        print(f"  Size: {uploaded_image.size[0]} x {uploaded_image.size[1]} pixels")
        
        # Display preview
        display_img = uploaded_image.copy()
        max_size = 400
        ratio = min(max_size / display_img.width, max_size / display_img.height)
        if ratio < 1:
            new_size = (int(display_img.width * ratio), int(display_img.height * ratio))
            display_img = display_img.resize(new_size, PILImage.Resampling.LANCZOS)
        display(display_img)
    else:
        print(f"‚ùå File not found: {path}")

---

## Part 3: Compute Embedding and Find Similar Images

In [None]:
def compute_image_embedding(image):
    """
    Compute CLIP embedding for an image.
    
    Parameters:
        image: PIL Image
    
    Returns:
        Normalized embedding tensor
    """
    with torch.no_grad():
        # Preprocess image
        image_tensor = preprocess(image).unsqueeze(0).to(DEVICE)
        
        # Compute embedding
        embedding = model.encode_image(image_tensor).float()
        
        # Normalize
        embedding = embedding / embedding.norm(dim=-1, keepdim=True)
    
    return embedding


def find_similar_images(query_embedding, top_k=10):
    """
    Find most similar images in the collection.
    
    Parameters:
        query_embedding: The embedding of the query image
        top_k: Number of results to return
    
    Returns:
        List of (filename, similarity_score) tuples
    """
    # Compute cosine similarity
    similarities = (collection_embeddings @ query_embedding.T).squeeze()
    
    # Get top-k
    top_indices = similarities.argsort(descending=True)[:top_k]
    
    results = []
    for idx in top_indices:
        filename = str(collection_filenames[idx.item()])
        score = similarities[idx].item()
        results.append((filename, score))
    
    return results

In [None]:
# Find similar images!
similar_results = None

if uploaded_image is not None and CLIP_AVAILABLE and collection_embeddings is not None:
    print("üîç Computing embedding for your photo...")
    uploaded_embedding = compute_image_embedding(uploaded_image)
    print("‚úì Embedding computed!")
    print()
    
    print("üîç Finding similar images in the collection...")
    similar_results = find_similar_images(uploaded_embedding, top_k=10)
    
    print(f"\nüìä Top 10 most similar images:")
    print("=" * 60)
    for i, (filename, score) in enumerate(similar_results, 1):
        print(f"{i:2}. Similarity: {score:.4f} - {Path(filename).name[:45]}")
else:
    if uploaded_image is None:
        print("‚ö†Ô∏è Please upload a photo first (Part 2)")
    elif not CLIP_AVAILABLE:
        print("‚ùå CLIP not available")
    else:
        print("‚ùå Collection embeddings not loaded")

---

## Part 4: View Results

Let's see your uploaded photo alongside the most similar images from the collection.

In [None]:
def find_collection_image(filename):
    """Find the full path to a collection image."""
    full_path = COLLECTION_IMAGES_DIR / filename
    if full_path.exists():
        return full_path

    full_path = COLLECTION_IMAGES_DIR / Path(filename).name
    if full_path.exists():
        return full_path

    return None


# Display results side by side
if similar_results and uploaded_image:
    print("\n" + "=" * 60)
    print("YOUR PHOTO vs SIMILAR IMAGES FROM COLLECTION")
    print("=" * 60)

    # Display your photo
    print("\nüì∑ Your uploaded photo:")
    display_img = uploaded_image.copy()
    max_size = 300
    ratio = min(max_size / display_img.width, max_size / display_img.height)
    if ratio < 1:
        new_size = (int(display_img.width * ratio), int(display_img.height * ratio))
        display_img = display_img.resize(new_size, PILImage.Resampling.LANCZOS)
    display(display_img)

    # Display similar images
    print(f"\nüñºÔ∏è Most similar images from the {COLLECTION_NAME} collection:")

    for i, (filename, score) in enumerate(similar_results[:5], 1):
        print(f"\n--- #{i} Similarity: {score:.4f} ---")
        print(f"File: {Path(filename).name}")

        img_path = find_collection_image(filename)
        if img_path and img_path.exists():
            try:
                img = PILImage.open(img_path)
                ratio = min(max_size / img.width, max_size / img.height)
                if ratio < 1:
                    new_size = (int(img.width * ratio), int(img.height * ratio))
                    img = img.resize(new_size, PILImage.Resampling.LANCZOS)
                display(img)
            except Exception as e:
                print(f"Could not display: {e}")
        else:
            print("[Image file not found locally]")
else:
    print("‚ö†Ô∏è Run the previous cells first to find similar images.")

---

## Part 5: Reflection

### Questions to Consider

1. **Are the results what you expected?** Why or why not?

2. **What features do your photo and the similar images share?**
   - Colors?
   - Composition?
   - Subject matter?
   - Mood/atmosphere?

3. **What does CLIP seem to focus on?**
   - Does it prioritize semantic content (what's in the image)?
   - Or visual style (how it looks)?

4. **What are the limitations?**
   - What kinds of photos might not work well?
   - How might cultural bias affect the results?

In [None]:
# ============================================================
# EXERCISE: Try with a different photo!
# ============================================================

# Go back to Part 2 and upload a different photo.
# Then run the cells again to see how the results change.

# Ideas to try:
# - A landscape photo
# - A portrait
# - An abstract pattern
# - A photo of an artwork you like

print("üí° Try uploading different types of photos to explore")
print("   how CLIP understands visual similarity!")

---

## Part 6: Save Your Results

In [None]:
# Save your similarity results
if similar_results:
    output_path = DATA_DIR / "my_similar_images.json"
    
    data = {
        'query_type': 'uploaded_image',
        'model': MODEL_NAME,
        'results': [
            {'filename': f, 'similarity': s}
            for f, s in similar_results
        ]
    }
    
    with open(output_path, 'w') as f:
        json.dump(data, f, indent=2)
    
    print(f"‚úì Saved results to {output_path}")
else:
    print("No results to save yet.")

---

## Summary

In this notebook, you:

1. **Uploaded** your own photo
2. **Computed** a CLIP embedding for it
3. **Found** the most visually similar images in the collection
4. **Explored** what "visual similarity" means to a neural network

### Key Takeaways

- CLIP captures **semantic** similarity, not just pixel similarity
- The same model can match images to text AND images to images
- Results may reveal **unexpected connections** between artworks
- Neural networks have **biases** based on their training data

### Next Steps

- **Notebook 04** (Advanced): Learn how to compute embeddings for your own image collections