In [1]:
%cd ..

c:\Users\HP\OneDrive - University of Moratuwa\Desktop\E-Vision-Projects\Shelf_Product_Count_Generation


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from pathlib import Path
import numpy as np
from tqdm import tqdm
import random

In [3]:
# 2. Embedding model (backbone + embedding head)
class ProductEmbeddingModel(nn.Module):
    def __init__(self, backbone_name='efficientnet_v2_s', embedding_dim=512):
        super().__init__()
        
        # Load pre-trained backbone
        if backbone_name == 'efficientnet_v2_s':
            backbone = models.efficientnet_v2_s(pretrained=True)
            backbone_features = backbone.classifier[1].in_features
            backbone.classifier = nn.Identity()  # Remove classifier
        elif backbone_name == 'resnet50':
            backbone = models.resnet50(pretrained=True)
            backbone_features = backbone.fc.in_features
            backbone.fc = nn.Identity()
        else:
            raise ValueError(f"Unknown backbone: {backbone_name}")
        
        self.backbone = backbone
        
        # Embedding head
        self.embedding_head = nn.Sequential(
            nn.Linear(backbone_features, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, embedding_dim),
            nn.LayerNorm(embedding_dim)
        )
        
    def forward(self, x):
        features = self.backbone(x)
        embedding = self.embedding_head(features)
        # L2 normalize embeddings
        embedding = nn.functional.normalize(embedding, p=2, dim=1)
        return embedding

In [4]:
# 7. Load trained model and get embeddings
import torch
from torchvision import transforms
from PIL import Image
import numpy as np

def load_trained_model(checkpoint_path='models/product_embedding_model.pth', device=None):
    """Load the trained embedding model from checkpoint"""
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Load checkpoint
    checkpoint = torch.load(checkpoint_path, map_location=device)
    
    # Create model with same architecture
    model = ProductEmbeddingModel(
        backbone_name=checkpoint['backbone'],
        embedding_dim=checkpoint['embedding_dim']
    )
    
    # Load weights
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  # Set to evaluation mode
    model = model.to(device)
    
    print(f"Loaded model from {checkpoint_path}")
    print(f"Backbone: {checkpoint['backbone']}")
    print(f"Embedding dim: {checkpoint['embedding_dim']}")
    print(f"Trained for {checkpoint['epoch']+1} epochs")
    print(f"Best loss: {checkpoint['loss']:.4f}")
    
    return model, checkpoint, device

def get_image_transform(image_size=224):
    """Get the same transform used during training (without augmentation)"""
    return transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])
    ])

def get_embedding_from_image(model, image_path_or_pil, transform, device):
    """
    Get embedding for a single image
    
    Args:
        model: Trained ProductEmbeddingModel
        image_path_or_pil: Path to image (str) or PIL Image
        transform: Image transform function
        device: torch device
    
    Returns:
        embedding: numpy array of shape (embedding_dim,)
    """
    # Load image
    if isinstance(image_path_or_pil, str):
        image = Image.open(image_path_or_pil).convert('RGB')
    else:
        image = image_path_or_pil.convert('RGB')
    
    # Transform
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    # Get embedding
    with torch.no_grad():
        embedding = model(image_tensor)
        embedding = embedding.cpu().numpy().flatten()
    
    return embedding

def get_embeddings_batch(model, image_paths, transform, device, batch_size=32):
    """
    Get embeddings for multiple images efficiently
    
    Args:
        model: Trained ProductEmbeddingModel
        image_paths: List of image paths or PIL Images
        transform: Image transform function
        device: torch device
        batch_size: Batch size for processing
    
    Returns:
        embeddings: numpy array of shape (num_images, embedding_dim)
    """
    embeddings_list = []
    
    for i in range(0, len(image_paths), batch_size):
        batch_paths = image_paths[i:i+batch_size]
        batch_images = []
        
        for img_path in batch_paths:
            if isinstance(img_path, str):
                image = Image.open(img_path).convert('RGB')
            else:
                image = img_path.convert('RGB')
            batch_images.append(transform(image))
        
        # Stack into batch tensor
        batch_tensor = torch.stack(batch_images).to(device)
        
        # Get embeddings
        with torch.no_grad():
            batch_embeddings = model(batch_tensor)
            embeddings_list.append(batch_embeddings.cpu().numpy())
    
    # Concatenate all batches
    embeddings = np.vstack(embeddings_list)
    return embeddings

In [15]:
# Load the trained model
model, checkpoint, device = load_trained_model('models/product_embedding_model_unfrozen2.pth')

# Get transform
transform = get_image_transform(image_size=224)

# Example 1: Get embedding for a single image
image_path = 'data/test_images/cropped_image_3.jpg'
embedding = get_embedding_from_image(model, image_path, transform, device)
print(f"Embedding shape: {embedding.shape}")
print(f"Embedding norm: {np.linalg.norm(embedding):.4f}")  # Should be ~1.0 (L2 normalized)



Loaded model from models/product_embedding_model_unfrozen2.pth
Backbone: efficientnet_v2_s
Embedding dim: 1024
Trained for 19 epochs
Best loss: 0.0057
Embedding shape: (1024,)
Embedding norm: 1.0000


In [16]:
# Build FAISS index with all reference images
import faiss
from pathlib import Path

def build_reference_index(model, reference_dir='data/reference_images', 
                          transform=None, device=None, batch_size=32):
    """
    Build FAISS index from all reference images
    
    Returns:
        index: FAISS index
        product_ids: List of product IDs for each embedding
        image_paths: List of image paths
    """
    if transform is None:
        transform = get_image_transform()
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    reference_path = Path(reference_dir)
    all_image_paths = []
    product_ids = []
    
    # Collect all image paths
    product_folders = sorted([d for d in reference_path.iterdir() if d.is_dir()])
    for product_folder in product_folders:
        product_id = product_folder.name
        image_files = sorted(product_folder.glob('*.jpg')) + \
                     sorted(product_folder.glob('*.jpeg')) + \
                     sorted(product_folder.glob('*.png'))
        
        for image_path in image_files:
            all_image_paths.append(str(image_path))
            product_ids.append(product_id)
    
    print(f"Processing {len(all_image_paths)} reference images...")
    
    # Get embeddings for all images
    embeddings = get_embeddings_batch(model, all_image_paths, transform, device, batch_size)
    
    # Build FAISS index (using L2 distance since embeddings are normalized)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    
    # Convert to float32 for FAISS
    embeddings_f32 = embeddings.astype('float32')
    index.add(embeddings_f32)
    
    print(f"Built FAISS index with {index.ntotal} vectors")
    
    return index, product_ids, all_image_paths

# Build index
index, product_ids, image_paths = build_reference_index(model)

Processing 208 reference images...
Built FAISS index with 208 vectors


In [17]:
def find_similar_products(query_embedding, index, product_ids, top_k=5, threshold=None):
    """
    Find top-k similar products using L2 distance
    
    Args:
        query_embedding: numpy array of shape (embedding_dim,)
        index: FAISS index
        product_ids: List of product IDs
        top_k: Number of results to return
        threshold: Optional maximum L2 distance threshold
    
    Returns:
        results: List of dicts with product_id, distance, rank
    """
    # Reshape for FAISS (needs batch dimension)
    query_embedding = query_embedding.reshape(1, -1).astype('float32')
    
    # Search
    distances, indices = index.search(query_embedding, top_k)
    
    results = []
    for i, (distance, idx) in enumerate(zip(distances[0], indices[0])):
        if threshold is None or distance <= threshold:
            results.append({
                'product_id': product_ids[idx],
                'distance': float(distance),
                'similarity': float(1 / (1 + distance)),  # Convert distance to similarity
                'rank': i + 1,
                'image_path': image_paths[idx]
            })
    
    return results

In [33]:
# Example: Find similar products
query_image = r'data\test_images\7.jpg'
query_embedding = get_embedding_from_image(model, query_image, transform, device)

results = find_similar_products(query_embedding, index, product_ids, top_k=5)
print("\nTop 5 similar products:")
for r in results:
    print(f"Rank {r['rank']}: Product {r['product_id']} - Distance: {r['distance']:.4f}, Similarity: {r['similarity']:.4f}")


Top 5 similar products:
Rank 1: Product 1012 - Distance: 0.5193, Similarity: 0.6582
Rank 2: Product 1012 - Distance: 0.5613, Similarity: 0.6405
Rank 3: Product 1012 - Distance: 0.6409, Similarity: 0.6094
Rank 4: Product 1023 - Distance: 0.6524, Similarity: 0.6052
Rank 5: Product 1023 - Distance: 0.7201, Similarity: 0.5814
