# üîç Image Similarity & Ranking Demo

This notebook demonstrates how different image features perform for image similarity search and ranking.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kelkalot/imagefeatures/blob/main/examples/demo_similarity.ipynb)

## 1. Setup

In [None]:
!pip install -q git+https://github.com/kelkalot/imagefeatures.git
!pip install -q matplotlib

## 2. Download Sample Images

We'll download 10 diverse images for similarity comparison.

In [None]:
import urllib.request
import os

os.makedirs('images', exist_ok=True)

# Download 10 random images
for i in range(10):
    path = f'images/img_{i:02d}.jpg'
    if not os.path.exists(path):
        url = f'https://picsum.photos/300/200?random={i+100}'
        urllib.request.urlretrieve(url, path)
        print(f'Downloaded: {path}')

print(f'\nTotal images: {len(os.listdir("images"))}')

## 3. Display All Images

In [None]:
import matplotlib.pyplot as plt
from imagefeatures.utils import load_image
import os

image_files = sorted([f'images/{f}' for f in os.listdir('images') if f.endswith('.jpg')])

fig, axes = plt.subplots(2, 5, figsize=(15, 6))
for idx, (ax, path) in enumerate(zip(axes.flat, image_files)):
    img = load_image(path)
    ax.imshow(img)
    ax.set_title(f'Image {idx}', fontsize=10)
    ax.axis('off')
plt.suptitle('Image Gallery', fontsize=14)
plt.tight_layout()
plt.show()

## 4. Extract Features with Different Descriptors

In [None]:
from imagefeatures import FeatureExtractor
from imagefeatures.features import (
    ColorHistogram, LocalBinaryPatterns, CEDD, 
    Gabor, PHOG, ColorMoments
)

# Define different feature sets to compare
feature_sets = {
    'Color Only': [ColorHistogram(), ColorMoments()],
    'Texture Only': [LocalBinaryPatterns(), Gabor()],
    'Shape Only': [PHOG()],
    'Combined (CEDD)': [CEDD()],
    'All Features': [ColorHistogram(), LocalBinaryPatterns(), Gabor(), CEDD()],
}

# Extract features for each set
results = {}
for name, features in feature_sets.items():
    extractor = FeatureExtractor(features)
    result = extractor.extract_folder('images/')
    results[name] = result
    print(f'{name}: {result["features"].shape[1]} dimensions')

## 5. Compute Similarity Rankings

For each feature set, rank images by similarity to a query image.

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_ranking(features, query_idx=0):
    """Return indices sorted by similarity to query (most similar first)."""
    X = features
    query = X[query_idx:query_idx+1]
    similarities = cosine_similarity(query, X)[0]
    # Sort by similarity descending (exclude query itself)
    ranking = np.argsort(similarities)[::-1]
    return ranking, similarities

# Get rankings for each feature set
query_idx = 0  # Use first image as query
rankings = {}

for name, result in results.items():
    ranking, sims = get_ranking(result['features'], query_idx)
    rankings[name] = (ranking, sims)
    print(f"\n{name} ranking (top 5):")
    for i, idx in enumerate(ranking[:5]):
        print(f"  {i+1}. Image {idx} (similarity: {sims[idx]:.3f})")

## 6. Visualize Rankings by Feature Type

In [None]:
def visualize_ranking(name, ranking, similarities, images, query_idx):
    """Visualize the similarity ranking."""
    fig, axes = plt.subplots(1, 6, figsize=(15, 3))
    
    # Query image
    axes[0].imshow(images[query_idx])
    axes[0].set_title('QUERY', fontsize=10, fontweight='bold', color='red')
    axes[0].axis('off')
    
    # Top 5 similar (skip query if it's first)
    rank_idx = 0
    for ax_idx in range(1, 6):
        # Skip the query image itself
        while ranking[rank_idx] == query_idx:
            rank_idx += 1
        
        idx = ranking[rank_idx]
        axes[ax_idx].imshow(images[idx])
        axes[ax_idx].set_title(f'#{ax_idx} (sim: {similarities[idx]:.2f})', fontsize=9)
        axes[ax_idx].axis('off')
        rank_idx += 1
    
    plt.suptitle(f'Ranking using: {name}', fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.show()

# Load all images
images = [load_image(path) for path in image_files]

# Visualize each ranking
for name, (ranking, sims) in rankings.items():
    visualize_ranking(name, ranking, sims, images, query_idx)

## 7. Compare Different Query Images

In [None]:
# Try different query images with CEDD features
cedd_result = results['Combined (CEDD)']

fig, axes = plt.subplots(3, 6, figsize=(15, 8))

for row, query_idx in enumerate([0, 3, 7]):
    ranking, sims = get_ranking(cedd_result['features'], query_idx)
    
    # Query
    axes[row, 0].imshow(images[query_idx])
    axes[row, 0].set_title('QUERY', fontsize=9, fontweight='bold', color='red')
    axes[row, 0].axis('off')
    
    # Results
    rank_idx = 0
    for col in range(1, 6):
        while ranking[rank_idx] == query_idx:
            rank_idx += 1
        idx = ranking[rank_idx]
        axes[row, col].imshow(images[idx])
        axes[row, col].set_title(f'sim: {sims[idx]:.2f}', fontsize=8)
        axes[row, col].axis('off')
        rank_idx += 1

plt.suptitle('CEDD Similarity Search with Different Queries', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 8. Similarity Matrix Heatmap

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

feature_names = ['Color Only', 'Texture Only', 'Combined (CEDD)']

for ax, name in zip(axes, feature_names):
    X = results[name]['features']
    sim_matrix = cosine_similarity(X)
    
    im = ax.imshow(sim_matrix, cmap='RdYlGn', vmin=0, vmax=1)
    ax.set_title(name)
    ax.set_xlabel('Image')
    ax.set_ylabel('Image')
    plt.colorbar(im, ax=ax, fraction=0.046)

plt.suptitle('Similarity Matrices by Feature Type', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 9. Key Takeaways

- **Color features** rank images by color palette similarity
- **Texture features** (LBP, Gabor) focus on patterns and surface properties
- **Shape features** (PHOG) capture edge and contour information
- **Combined features** (CEDD, FCTH) balance multiple aspects

The best feature choice depends on your application:
- For **product search**: Color + Texture
- For **scene matching**: CEDD or PHOG
- For **texture retrieval**: LBP + Gabor + Haralick