In [2]:
%cd ..

c:\Users\HP\OneDrive - University of Moratuwa\Desktop\E-Vision-Projects\Shelf_Product_Count_Generation


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from pathlib import Path
import numpy as np
from tqdm import tqdm
import random

In [4]:
# 1. Dataset class
class ProductDataset(Dataset):
    def __init__(self, reference_dir='data/reference_images', 
                 image_size=224, augment=True):
        self.reference_dir = Path(reference_dir)
        self.image_size = image_size
        
        # Load all images with their product IDs
        self.images = []
        self.product_ids = []
        self.product_to_images = {}
        
        product_folders = sorted([d for d in self.reference_dir.iterdir() if d.is_dir()])
        
        for product_folder in product_folders:
            product_id = product_folder.name
            image_files = sorted(product_folder.glob('*.jpg')) + \
                         sorted(product_folder.glob('*.jpeg')) + \
                         sorted(product_folder.glob('*.png'))
            
            self.product_to_images[product_id] = []
            
            for image_path in image_files:
                self.images.append(str(image_path))
                self.product_ids.append(product_id)
                self.product_to_images[product_id].append(len(self.images) - 1)
        
        # Create product ID to integer mapping
        unique_products = sorted(set(self.product_ids))
        self.product_to_idx = {pid: idx for idx, pid in enumerate(unique_products)}
        self.idx_to_product = {idx: pid for pid, idx in self.product_to_idx.items()}
        self.num_products = len(unique_products)
        
        # Data augmentation
        if augment:
            self.transform = transforms.Compose([
                transforms.Resize((image_size, image_size)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomRotation(degrees=15),
                transforms.ColorJitter(brightness=0.2, contrast=0.2),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                   std=[0.229, 0.224, 0.225])
            ])
        else:
            self.transform = transforms.Compose([
                transforms.Resize((image_size, image_size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                   std=[0.229, 0.224, 0.225])
            ])
        
        print(f"Loaded {len(self.images)} images from {self.num_products} products")
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image_path = self.images[idx]
        product_id = self.product_ids[idx]
        product_idx = self.product_to_idx[product_id]
        
        image = Image.open(image_path).convert('RGB')
        image = self.transform(image)
        
        return image, product_idx, product_id

In [22]:
data = ProductDataset()

Loaded 208 images from 68 products


In [29]:
data.product_to_images

{'1000': [0, 1, 2],
 '1001': [3, 4, 5, 6],
 '1002': [7, 8, 9, 10],
 '1003': [11, 12, 13],
 '1004': [14, 15, 16, 17, 18],
 '1005': [19, 20, 21],
 '1006': [22, 23, 24, 25, 26],
 '1007': [27, 28, 29],
 '1008': [30, 31],
 '1009': [32, 33, 34],
 '1010': [35, 36],
 '1011': [37, 38],
 '1012': [39, 40, 41],
 '1013': [42, 43],
 '1014': [44, 45],
 '1015': [46, 47],
 '1016': [48, 49],
 '1017': [50, 51],
 '1018': [52, 53],
 '1019': [54, 55],
 '1020': [56, 57],
 '1021': [58, 59],
 '1022': [60, 61],
 '1023': [62, 63],
 '1024': [64, 65],
 '1025': [66, 67],
 '1026': [68, 69],
 '1027': [70, 71],
 '1028': [72, 73],
 '1029': [74, 75, 76],
 '1030': [77, 78],
 '1031': [79, 80],
 '1032': [81, 82, 83, 84],
 '1033': [85, 86],
 '1034': [87, 88],
 '1035': [89, 90],
 '1036': [91, 92],
 '1037': [93, 94],
 '1038': [95, 96],
 '1039': [97, 98, 99],
 '15785': [100, 101, 102],
 '15827': [103, 104, 105],
 '15829': [106, 107, 108, 109, 110, 111],
 '15832': [112, 113, 114, 115, 116],
 '58': [117, 118, 119, 120, 121, 122,

In [5]:
# 2. Embedding model (backbone + embedding head)
class ProductEmbeddingModel(nn.Module):
    def __init__(self, backbone_name='efficientnet_v2_s', embedding_dim=512):
        super().__init__()
        
        # Load pre-trained backbone
        if backbone_name == 'efficientnet_v2_s':
            backbone = models.efficientnet_v2_s(pretrained=True)
            backbone_features = backbone.classifier[1].in_features
            backbone.classifier = nn.Identity()  # Remove classifier
        elif backbone_name == 'resnet50':
            backbone = models.resnet50(pretrained=True)
            backbone_features = backbone.fc.in_features
            backbone.fc = nn.Identity()
        else:
            raise ValueError(f"Unknown backbone: {backbone_name}")
        
        self.backbone = backbone
        
        # Embedding head
        self.embedding_head = nn.Sequential(
            nn.Linear(backbone_features, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, embedding_dim),
            nn.LayerNorm(embedding_dim)
        )
        
    def forward(self, x):
        features = self.backbone(x)
        embedding = self.embedding_head(features)
        # L2 normalize embeddings
        embedding = nn.functional.normalize(embedding, p=2, dim=1)
        return embedding

In [21]:
backbone = models.efficientnet_v2_s(pretrained=True)
backbone.classifier = nn.Identity()  



In [22]:
backbone

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  

In [32]:
list(backbone.features.children())[-1]

Conv2dNormActivation(
  (0): Conv2d(256, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (1): BatchNorm2d(1280, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (2): SiLU(inplace=True)
)

In [6]:
# 3. Triplet Loss (metric learning)
class TripletLoss(nn.Module):
    def __init__(self, margin=0.5):
        super().__init__()
        self.margin = margin
    
    def forward(self, anchor, positive, negative):
        """
        anchor: embedding of anchor image
        positive: embedding of positive (same product) image
        negative: embedding of negative (different product) image
        """
        distance_positive = nn.functional.pairwise_distance(anchor, positive)
        distance_negative = nn.functional.pairwise_distance(anchor, negative)
        
        loss = torch.relu(distance_positive - distance_negative + self.margin)
        return loss.mean()

In [7]:
# 4. Triplet sampler (creates triplets)
class TripletSampler:
    def __init__(self, dataset):
        self.dataset = dataset
        self.product_to_indices = {}
        
        for idx, product_id in enumerate(dataset.product_ids):
            if product_id not in self.product_to_indices:
                self.product_to_indices[product_id] = []
            self.product_to_indices[product_id].append(idx)
    
    def sample_triplet(self):
        """Sample anchor, positive, negative"""
        # Random anchor product
        anchor_product = random.choice(list(self.product_to_indices.keys()))
        anchor_idx, positive_idx = random.sample(
            self.product_to_indices[anchor_product], 2
        )
        
        # Random negative product (different from anchor)
        negative_product = random.choice([
            p for p in self.product_to_indices.keys() 
            if p != anchor_product
        ])
        negative_idx = random.choice(self.product_to_indices[negative_product])
        
        return anchor_idx, positive_idx, negative_idx

In [8]:
# # 5. Training function
# def train_embedding_model(
#     reference_dir='data/reference_images',
#     backbone='efficientnet_v2_s',
#     embedding_dim=512,
#     batch_size=32,
#     num_epochs=50,
#     learning_rate=0.001,
#     margin=0.5,
#     save_path='models/product_embedding_model.pth'
# ):
    
#     # Device
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#     print(f"Using device: {device}")
    
#     # Dataset
#     train_dataset = ProductDataset(reference_dir, augment=True)
#     train_loader = DataLoader(
#         train_dataset, 
#         batch_size=batch_size, 
#         shuffle=True,
#         num_workers=0,  # Use 0 for Windows/Jupyter
#         pin_memory=False
#     )
    
#     # Model
#     model = ProductEmbeddingModel(backbone, embedding_dim)
#     model = model.to(device)
    
#     # Loss and optimizer
#     criterion = TripletLoss(margin=margin)
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#     scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    
#     # Triplet sampler
#     triplet_sampler = TripletSampler(train_dataset)
    
#     # Training loop
#     model.train()
#     best_loss = float('inf')
    
#     for epoch in range(num_epochs):
#         epoch_loss = 0
#         num_batches = 0
        
#         progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        
#         for batch_images, batch_labels, batch_product_ids in progress_bar:
#             # Sample triplets
#             anchor_indices = []
#             positive_indices = []
#             negative_indices = []
            
#             for _ in range(batch_size):
#                 a, p, n = triplet_sampler.sample_triplet()
#                 anchor_indices.append(a)
#                 positive_indices.append(p)
#                 negative_indices.append(n)
            
#             # Get images - access dataset directly (safe with num_workers=0)
#             anchor_images = torch.stack([train_dataset[i][0] for i in anchor_indices]).to(device)
#             positive_images = torch.stack([train_dataset[i][0] for i in positive_indices]).to(device)
#             negative_images = torch.stack([train_dataset[i][0] for i in negative_indices]).to(device)
            
#             # Forward pass
#             anchor_emb = model(anchor_images)
#             positive_emb = model(positive_images)
#             negative_emb = model(negative_images)
            
#             # Loss
#             loss = criterion(anchor_emb, positive_emb, negative_emb)
            
#             # Backward pass
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()
            
#             epoch_loss += loss.item()
#             num_batches += 1
            
#             progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
        
#         avg_loss = epoch_loss / num_batches
#         scheduler.step()
        
#         print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f}")
        
#         # Save best model
#         if avg_loss < best_loss:
#             best_loss = avg_loss
#             torch.save({
#                 'model_state_dict': model.state_dict(),
#                 'backbone': backbone,
#                 'embedding_dim': embedding_dim,
#                 'num_products': train_dataset.num_products,
#                 'product_to_idx': train_dataset.product_to_idx,
#                 'epoch': epoch,
#                 'loss': avg_loss
#             }, save_path)
#             print(f"Saved best model (loss: {avg_loss:.4f})")
    
#     print(f"Training complete! Best loss: {best_loss:.4f}")
#     return model

In [8]:
# # 6. Usage: Train the model
# if __name__ == "__main__":
#     model = train_embedding_model(
#         reference_dir='data/reference_images',
#         backbone='efficientnet_v2_s',
#         embedding_dim=512,
#         batch_size=16,  # Adjust based on GPU memory
#         num_epochs=10,
#         learning_rate=0.001,
#         margin=0.5
#     )

In [34]:
# # 5. Training function with checkpoint resuming support
# def train_embedding_model(
#     reference_dir='data/reference_images',
#     backbone='efficientnet_v2_s',
#     embedding_dim=512,
#     batch_size=32,
#     num_epochs=50,
#     learning_rate=0.001,
#     margin=0.5,
#     save_path='models/product_embedding_model.pth',
#     resume_from_checkpoint=None,  # Path to checkpoint to resume from, or None to start fresh
#     start_epoch=0  # Will be updated if resuming
# ):
#     """
#     Train embedding model with optional checkpoint resuming
    
#     Args:
#         resume_from_checkpoint: Path to checkpoint file to resume from.
#                                If None, starts training from scratch.
#                                If same as save_path, automatically resumes if file exists.
#     """
#     # Device
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#     print(f"Using device: {device}")
    
#     # Dataset
#     train_dataset = ProductDataset(reference_dir, augment=True)
#     train_loader = DataLoader(
#         train_dataset, 
#         batch_size=batch_size, 
#         shuffle=True,
#         num_workers=0,  # Use 0 for Windows/Jupyter
#         pin_memory=False
#     )
    
#     # Model
#     model = ProductEmbeddingModel(backbone, embedding_dim)
#     model = model.to(device)
    
#     # Loss and optimizer
#     criterion = TripletLoss(margin=margin)
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#     scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    
#     # Checkpoint resuming logic
#     best_loss = float('inf')
#     start_epoch = 0
    
#     # Determine checkpoint path
#     checkpoint_path = resume_from_checkpoint if resume_from_checkpoint else save_path
    
#     # Try to load checkpoint
#     if Path(checkpoint_path).exists():
#         print(f"\nFound checkpoint at {checkpoint_path}")
#         print("Loading checkpoint to resume training...")
        
#         checkpoint = torch.load(checkpoint_path, map_location=device)
        
#         # Load model state
#         model.load_state_dict(checkpoint['model_state_dict'])
        
#         # Load optimizer state (if available)
#         if 'optimizer_state_dict' in checkpoint:
#             optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
#             print("Loaded optimizer state")
        
#         # Load scheduler state (if available)
#         if 'scheduler_state_dict' in checkpoint:
#             scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
#             print("Loaded scheduler state")
        
#         # Resume from saved epoch
#         if 'epoch' in checkpoint:
#             start_epoch = checkpoint['epoch'] + 1
#             print(f"Resuming from epoch {start_epoch}")
        
#         # Resume best loss
#         if 'loss' in checkpoint:
#             best_loss = checkpoint['loss']
#             print(f"Previous best loss: {best_loss:.4f}")
        
#         # Verify architecture matches
#         if checkpoint.get('backbone') != backbone:
#             print(f"Warning: Backbone mismatch! Checkpoint: {checkpoint.get('backbone')}, Current: {backbone}")
#         if checkpoint.get('embedding_dim') != embedding_dim:
#             print(f"Warning: Embedding dim mismatch! Checkpoint: {checkpoint.get('embedding_dim')}, Current: {embedding_dim}")
        
#         print("Checkpoint loaded successfully!\n")
#     else:
#         print(f"No checkpoint found at {checkpoint_path}. Starting training from scratch.\n")
    
#     # Triplet sampler
#     triplet_sampler = TripletSampler(train_dataset)
    
#     # Training loop
#     model.train()
    
#     for epoch in range(start_epoch, num_epochs):
#         epoch_loss = 0
#         num_batches = 0
        
#         progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        
#         for batch_images, batch_labels, batch_product_ids in progress_bar:
#             # Sample triplets
#             anchor_indices = []
#             positive_indices = []
#             negative_indices = []
            
#             for _ in range(batch_size):
#                 a, p, n = triplet_sampler.sample_triplet()
#                 anchor_indices.append(a)
#                 positive_indices.append(p)
#                 negative_indices.append(n)
            
#             # Get images - access dataset directly (safe with num_workers=0)
#             anchor_images = torch.stack([train_dataset[i][0] for i in anchor_indices]).to(device)
#             positive_images = torch.stack([train_dataset[i][0] for i in positive_indices]).to(device)
#             negative_images = torch.stack([train_dataset[i][0] for i in negative_indices]).to(device)
            
#             # Forward pass
#             anchor_emb = model(anchor_images)
#             positive_emb = model(positive_images)
#             negative_emb = model(negative_images)
            
#             # Loss
#             loss = criterion(anchor_emb, positive_emb, negative_emb)
            
#             # Backward pass
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()
            
#             epoch_loss += loss.item()
#             num_batches += 1
            
#             progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
        
#         avg_loss = epoch_loss / num_batches
#         scheduler.step()
        
#         current_lr = scheduler.get_last_lr()[0]
#         print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f} - LR: {current_lr:.6f}")
        
#         # Save best model (with full training state)
#         if avg_loss < best_loss:
#             best_loss = avg_loss
#             torch.save({
#                 'model_state_dict': model.state_dict(),
#                 'optimizer_state_dict': optimizer.state_dict(),
#                 'scheduler_state_dict': scheduler.state_dict(),
#                 'backbone': backbone,
#                 'embedding_dim': embedding_dim,
#                 'num_products': train_dataset.num_products,
#                 'product_to_idx': train_dataset.product_to_idx,
#                 'epoch': epoch,
#                 'loss': avg_loss,
#                 'learning_rate': current_lr
#             }, save_path)
#             print(f"Saved best model (loss: {avg_loss:.4f})")
    
#     print(f"\nTraining complete! Best loss: {best_loss:.4f}")
#     return model

In [35]:
# # Explicitly specify checkpoint to resume from
# model = train_embedding_model(
#     reference_dir='data/reference_images',
#     backbone='efficientnet_v2_s',
#     embedding_dim=512,
#     batch_size=16,
#     num_epochs=30,  # Train for 30 more epochs
#     learning_rate=0.0005,  # Can adjust learning rate
#     margin=0.5,
#     save_path='models/product_embedding_model_v2.pth',
#     resume_from_checkpoint='models/product_embedding_model.pth'  # Resume from previous model
# )

In [36]:
# model = train_embedding_model(
#     reference_dir='data/reference_images',
#     backbone='efficientnet_v2_s',
#     embedding_dim=1024,  # Changed from 512 to 1024
#     batch_size=16,
#     num_epochs=20,
#     learning_rate=0.001,
#     margin=0.5,
#     save_path='models/product_embedding_model_1024.pth'  # Use different name
# )

In [9]:
# 5. Training function with best + last model saving and auto-resume from last
def train_embedding_model(
    reference_dir='data/reference_images',
    backbone='efficientnet_v2_s',
    embedding_dim=512,
    batch_size=32,
    num_epochs=50,
    learning_rate=0.001,
    margin=0.5,
    save_path='models/product_embedding_model.pth',
    resume_from_checkpoint=None,  # Path to checkpoint to resume from, or None for auto-resume
    resume_from_best=False  # If True, resume from best model instead of last
):
    """
    Train embedding model with best and last model saving
    
    Args:
        save_path: Base path for saving models. Will create:
                  - {save_path} -> best model
                  - {save_path.replace('.pth', '_last.pth')} -> last model
        resume_from_checkpoint: Explicit checkpoint path, or None for auto-resume
        resume_from_best: If True and resume_from_checkpoint is None, resume from best model
                          Otherwise, resumes from last model (default)
    """
    # Device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Determine model paths
    best_model_path = save_path
    last_model_path = save_path.replace('.pth', '_last.pth')
    
    # Dataset
    train_dataset = ProductDataset(reference_dir, augment=True)
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=0,  # Use 0 for Windows/Jupyter
        pin_memory=False
    )
    
    # Model
    model = ProductEmbeddingModel(backbone, embedding_dim)
    model = model.to(device)
    
    # Loss and optimizer
    criterion = TripletLoss(margin=margin)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    
    # Checkpoint resuming logic
    best_loss = float('inf')
    start_epoch = 0
    
    # Determine which checkpoint to load
    if resume_from_checkpoint is not None:
        # Explicit checkpoint path provided
        checkpoint_path = resume_from_checkpoint
        checkpoint_type = "explicit"
    elif resume_from_best and Path(best_model_path).exists():
        # Resume from best model
        checkpoint_path = best_model_path
        checkpoint_type = "best"
    elif Path(last_model_path).exists():
        # Auto-resume from last model (default)
        checkpoint_path = last_model_path
        checkpoint_type = "last"
    else:
        # No checkpoint found
        checkpoint_path = None
        checkpoint_type = None
    
    # Load checkpoint if available
    if checkpoint_path and Path(checkpoint_path).exists():
        print(f"\nFound {checkpoint_type} checkpoint at {checkpoint_path}")
        print("Loading checkpoint to resume training...")
        
        checkpoint = torch.load(checkpoint_path, map_location=device)
        
        # Load model state
        model.load_state_dict(checkpoint['model_state_dict'])
        
        # Load optimizer state (if available)
        if 'optimizer_state_dict' in checkpoint:
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            print("Loaded optimizer state")
        
        # Load scheduler state (if available)
        if 'scheduler_state_dict' in checkpoint:
            scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
            print("Loaded scheduler state")
        
        # Resume from saved epoch
        if 'epoch' in checkpoint:
            start_epoch = checkpoint['epoch'] + 1
            print(f"Resuming from epoch {start_epoch}")
        
        # Resume best loss
        if 'loss' in checkpoint:
            best_loss = checkpoint['loss']
            print(f"Previous best loss: {best_loss:.4f}")
        
        # Verify architecture matches
        if checkpoint.get('backbone') != backbone:
            print(f"Warning: Backbone mismatch! Checkpoint: {checkpoint.get('backbone')}, Current: {backbone}")
        if checkpoint.get('embedding_dim') != embedding_dim:
            print(f"Warning: Embedding dim mismatch! Checkpoint: {checkpoint.get('embedding_dim')}, Current: {embedding_dim}")
        
        print("Checkpoint loaded successfully!\n")
    else:
        print(f"No checkpoint found. Starting training from scratch.\n")
    
    # Triplet sampler
    triplet_sampler = TripletSampler(train_dataset)
    
    # Helper function to save checkpoint
    def save_checkpoint(epoch, loss, val_loss=None, is_best=False, is_last=False):
        checkpoint_data = {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'backbone': backbone,
            'embedding_dim': embedding_dim,
            'num_products': train_dataset.num_products,
            'product_to_idx': train_dataset.product_to_idx,
            'epoch': epoch,
            'loss': loss,
            'learning_rate': scheduler.get_last_lr()[0]
        }
        if val_loss is not None:
            checkpoint_data['val_loss'] = val_loss
        
        if is_best:
            torch.save(checkpoint_data, best_model_path)
            print(f"Saved best model (loss: {loss:.4f})")
        
        if is_last:
            torch.save(checkpoint_data, last_model_path)
    
    # Training loop
    model.train()
    
    for epoch in range(start_epoch, num_epochs):
        epoch_loss = 0
        num_batches = 0
        
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        
        for batch_images, batch_labels, batch_product_ids in progress_bar:
            # Sample triplets
            anchor_indices = []
            positive_indices = []
            negative_indices = []
            
            for _ in range(batch_size):
                a, p, n = triplet_sampler.sample_triplet()
                anchor_indices.append(a)
                positive_indices.append(p)
                negative_indices.append(n)
            
            # Get images - access dataset directly (safe with num_workers=0)
            anchor_images = torch.stack([train_dataset[i][0] for i in anchor_indices]).to(device)
            positive_images = torch.stack([train_dataset[i][0] for i in positive_indices]).to(device)
            negative_images = torch.stack([train_dataset[i][0] for i in negative_indices]).to(device)
            
            # Forward pass
            anchor_emb = model(anchor_images)
            positive_emb = model(positive_images)
            negative_emb = model(negative_images)
            
            # Loss
            loss = criterion(anchor_emb, positive_emb, negative_emb)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            num_batches += 1
            
            progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        avg_loss = epoch_loss / num_batches
        scheduler.step()
        
        current_lr = scheduler.get_last_lr()[0]
        
        # Check if this is the best model
        is_best = avg_loss < best_loss
        if is_best:
            best_loss = avg_loss
        
        # Save checkpoints
        save_checkpoint(epoch, avg_loss, is_best=is_best, is_last=True)
        
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f} - LR: {current_lr:.6f}")
    
    print(f"\nTraining complete! Best loss: {best_loss:.4f}")
    print(f"Best model saved at: {best_model_path}")
    print(f"Last model saved at: {last_model_path}")
    return model

In [10]:
model = train_embedding_model(
    reference_dir='data/reference_images',
    backbone='efficientnet_v2_s',
    embedding_dim=1024,
    batch_size=16,
    num_epochs=15,
    learning_rate=0.001,
    margin=0.5,
    save_path='models/product_embedding_model.pth'
    # Will create:
    # - models/product_embedding_model.pth (best model)
    # - models/product_embedding_model_last.pth (last model)
)

Using device: cpu
Loaded 208 images from 68 products




No checkpoint found. Starting training from scratch.



Epoch 1/15: 100%|██████████| 13/13 [01:59<00:00,  9.18s/it, loss=0.0403]


Saved best model (loss: 0.1434)
Epoch 1/15 - Loss: 0.1434 - LR: 0.001000


Epoch 2/15: 100%|██████████| 13/13 [02:10<00:00, 10.07s/it, loss=0.0312]


Saved best model (loss: 0.0584)
Epoch 2/15 - Loss: 0.0584 - LR: 0.001000


Epoch 3/15: 100%|██████████| 13/13 [02:26<00:00, 11.26s/it, loss=0.0478]


Epoch 3/15 - Loss: 0.0623 - LR: 0.001000


Epoch 4/15: 100%|██████████| 13/13 [02:20<00:00, 10.84s/it, loss=0.1446]


Epoch 4/15 - Loss: 0.1101 - LR: 0.001000


Epoch 5/15: 100%|██████████| 13/13 [02:31<00:00, 11.67s/it, loss=0.2199]


Epoch 5/15 - Loss: 0.1547 - LR: 0.001000


Epoch 6/15: 100%|██████████| 13/13 [02:39<00:00, 12.24s/it, loss=0.1711]


Epoch 6/15 - Loss: 0.1278 - LR: 0.001000


Epoch 7/15: 100%|██████████| 13/13 [02:23<00:00, 11.07s/it, loss=0.1972]


Epoch 7/15 - Loss: 0.1336 - LR: 0.001000


Epoch 8/15: 100%|██████████| 13/13 [02:21<00:00, 10.87s/it, loss=0.1030]


Epoch 8/15 - Loss: 0.1098 - LR: 0.001000


Epoch 9/15: 100%|██████████| 13/13 [02:19<00:00, 10.72s/it, loss=0.0932]


Epoch 9/15 - Loss: 0.1211 - LR: 0.001000


Epoch 10/15: 100%|██████████| 13/13 [02:19<00:00, 10.70s/it, loss=0.1321]


Epoch 10/15 - Loss: 0.1324 - LR: 0.001000


Epoch 11/15: 100%|██████████| 13/13 [02:18<00:00, 10.69s/it, loss=0.1422]


Epoch 11/15 - Loss: 0.0991 - LR: 0.001000


Epoch 12/15: 100%|██████████| 13/13 [02:17<00:00, 10.57s/it, loss=0.0950]


Epoch 12/15 - Loss: 0.1046 - LR: 0.001000


Epoch 13/15: 100%|██████████| 13/13 [02:18<00:00, 10.67s/it, loss=0.1600]


Epoch 13/15 - Loss: 0.1406 - LR: 0.001000


Epoch 14/15: 100%|██████████| 13/13 [02:16<00:00, 10.50s/it, loss=0.0672]


Epoch 14/15 - Loss: 0.1119 - LR: 0.001000


Epoch 15/15: 100%|██████████| 13/13 [02:19<00:00, 10.72s/it, loss=0.1985]


Epoch 15/15 - Loss: 0.1435 - LR: 0.001000

Training complete! Best loss: 0.0584
Best model saved at: models/product_embedding_model.pth
Last model saved at: models/product_embedding_model_last.pth


In [11]:
# 2. Embedding model with layer freezing support
class ProductEmbeddingModel(nn.Module):
    def __init__(self, backbone_name='efficientnet_v2_s', embedding_dim=512,
                 freeze_backbone=True, unfreeze_last_n_layers=0):
        super().__init__()
        
        # Load pre-trained backbone
        if backbone_name == 'efficientnet_v2_s':
            backbone = models.efficientnet_v2_s(pretrained=True)
            backbone_features = backbone.classifier[1].in_features
            backbone.classifier = nn.Identity()  # Remove classifier
        elif backbone_name == 'resnet50':
            backbone = models.resnet50(pretrained=True)
            backbone_features = backbone.fc.in_features
            backbone.fc = nn.Identity()
        else:
            raise ValueError(f"Unknown backbone: {backbone_name}")
        
        self.backbone = backbone
        self.backbone_name = backbone_name
        
        # Setup backbone freezing
        self._setup_backbone_freezing(freeze_backbone, unfreeze_last_n_layers)
        
        # Embedding head
        self.embedding_head = nn.Sequential(
            nn.Linear(backbone_features, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, embedding_dim),
            nn.LayerNorm(embedding_dim)
        )
        
    def _setup_backbone_freezing(self, freeze_backbone, unfreeze_last_n_layers):
        """Setup which backbone layers to freeze/unfreeze"""
        if not freeze_backbone:
            # Don't freeze anything
            print("Backbone: Fully trainable")
            return
        
        if unfreeze_last_n_layers == 0:
            # Freeze entire backbone
            for param in self.backbone.parameters():
                param.requires_grad = False
            print("Backbone: Fully frozen")
        else:
            # Freeze all layers first
            for param in self.backbone.parameters():
                param.requires_grad = False
            
            # Unfreeze last N layers
            if self.backbone_name == 'efficientnet_v2_s':
                # EfficientNet V2 S structure: features -> avgpool -> classifier (removed)
                # features contains multiple blocks
                blocks = list(self.backbone.features.children())
                total_blocks = len(blocks)
                
                # Unfreeze last N blocks
                layers_to_unfreeze = min(unfreeze_last_n_layers, total_blocks)
                for i in range(total_blocks - layers_to_unfreeze, total_blocks):
                    for param in blocks[i].parameters():
                        param.requires_grad = True
                
                print(f"Backbone: Unfroze last {layers_to_unfreeze} feature blocks (out of {total_blocks})")
                
            elif self.backbone_name == 'resnet50':
                # ResNet structure: conv1, bn1, relu, maxpool, layer1-4, avgpool, fc (removed)
                if unfreeze_last_n_layers >= 1:
                    for param in self.backbone.layer4.parameters():
                        param.requires_grad = True
                if unfreeze_last_n_layers >= 2:
                    for param in self.backbone.layer3.parameters():
                        param.requires_grad = True
                if unfreeze_last_n_layers >= 3:
                    for param in self.backbone.layer2.parameters():
                        param.requires_grad = True
                if unfreeze_last_n_layers >= 4:
                    for param in self.backbone.layer1.parameters():
                        param.requires_grad = True
                
                print(f"Backbone: Unfroze last {unfreeze_last_n_layers} ResNet layers")
    
    def print_trainable_params(self):
        """Print summary of trainable vs frozen parameters"""
        total_params = 0
        trainable_params = 0
        frozen_params = 0
        
        backbone_trainable = 0
        head_trainable = 0
        
        print("\n" + "="*60)
        print("MODEL PARAMETER SUMMARY")
        print("="*60)
        
        for name, param in self.named_parameters():
            total_params += param.numel()
            if param.requires_grad:
                trainable_params += param.numel()
                if 'backbone' in name:
                    backbone_trainable += param.numel()
                else:
                    head_trainable += param.numel()
            else:
                frozen_params += param.numel()
        
        print(f"\nTotal parameters: {total_params:,}")
        print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")
        print(f"  - Backbone: {backbone_trainable:,}")
        print(f"  - Embedding Head: {head_trainable:,}")
        print(f"Frozen parameters: {frozen_params:,} ({100 * frozen_params / total_params:.2f}%)")
        print("="*60 + "\n")
        
    def forward(self, x):
        features = self.backbone(x)
        embedding = self.embedding_head(features)
        # L2 normalize embeddings
        embedding = nn.functional.normalize(embedding, p=2, dim=1)
        return embedding

In [12]:
# 5. Training function with layer freezing and differential learning rates
def train_embedding_model(
    reference_dir='data/reference_images',
    backbone='efficientnet_v2_s',
    embedding_dim=512,
    batch_size=32,
    num_epochs=50,
    learning_rate=0.001,
    backbone_lr_multiplier=0.1,  # Learning rate multiplier for backbone (if unfrozen)
    margin=0.5,
    save_path='models/product_embedding_model.pth',
    resume_from_checkpoint=None,
    resume_from_best=False,
    freeze_backbone=True,
    unfreeze_last_n_layers=0  # Number of backbone layers to unfreeze
):
    """
    Train embedding model with configurable backbone freezing
    
    Args:
        freeze_backbone: If True, freeze backbone (except last N layers if specified)
        unfreeze_last_n_layers: Number of last backbone layers to unfreeze (0 = all frozen)
        backbone_lr_multiplier: Learning rate multiplier for backbone (typically 0.1-0.01)
    """
    # Device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Determine model paths
    best_model_path = save_path
    last_model_path = save_path.replace('.pth', '_last.pth')
    
    # Dataset
    train_dataset = ProductDataset(reference_dir, augment=True)
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=0,
        pin_memory=False
    )
    
    # Model with layer freezing
    model = ProductEmbeddingModel(
        backbone, 
        embedding_dim,
        freeze_backbone=freeze_backbone,
        unfreeze_last_n_layers=unfreeze_last_n_layers
    )
    model = model.to(device)
    
    # Print parameter summary
    model.print_trainable_params()
    
    # Loss and optimizer with differential learning rates
    criterion = TripletLoss(margin=margin)
    
    # Separate learning rates for backbone and head
    if unfreeze_last_n_layers > 0:
        # Different learning rates for backbone and head
        backbone_params = []
        head_params = []
        
        for name, param in model.named_parameters():
            if param.requires_grad:
                if 'backbone' in name:
                    backbone_params.append(param)
                else:
                    head_params.append(param)
        
        if backbone_params:
            optimizer = optim.Adam([
                {'params': backbone_params, 'lr': learning_rate * backbone_lr_multiplier},
                {'params': head_params, 'lr': learning_rate}
            ])
            print(f"Using differential learning rates:")
            print(f"  Backbone: {learning_rate * backbone_lr_multiplier:.6f}")
            print(f"  Head: {learning_rate:.6f}\n")
        else:
            # Only head params (backbone fully frozen)
            optimizer = optim.Adam(head_params, lr=learning_rate)
            print(f"Using single learning rate: {learning_rate:.6f}\n")
    else:
        # Single learning rate (backbone frozen or all trainable)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        print(f"Using single learning rate: {learning_rate:.6f}\n")
    
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    
    # Checkpoint resuming logic
    best_loss = float('inf')
    start_epoch = 0
    
    # Determine which checkpoint to load
    if resume_from_checkpoint is not None:
        checkpoint_path = resume_from_checkpoint
        checkpoint_type = "explicit"
    elif resume_from_best and Path(best_model_path).exists():
        checkpoint_path = best_model_path
        checkpoint_type = "best"
    elif Path(last_model_path).exists():
        checkpoint_path = last_model_path
        checkpoint_type = "last"
    else:
        checkpoint_path = None
        checkpoint_type = None
    
    # Load checkpoint if available
    if checkpoint_path and Path(checkpoint_path).exists():
        print(f"\nFound {checkpoint_type} checkpoint at {checkpoint_path}")
        print("Loading checkpoint to resume training...")
        
        checkpoint = torch.load(checkpoint_path, map_location=device)
        
        # Load model state
        model.load_state_dict(checkpoint['model_state_dict'])
        
        # Load optimizer state (if available)
        if 'optimizer_state_dict' in checkpoint:
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            print("Loaded optimizer state")
        
        # Load scheduler state (if available)
        if 'scheduler_state_dict' in checkpoint:
            scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
            print("Loaded scheduler state")
        
        # Resume from saved epoch
        if 'epoch' in checkpoint:
            start_epoch = checkpoint['epoch'] + 1
            print(f"Resuming from epoch {start_epoch}")
        
        # Resume best loss
        if 'loss' in checkpoint:
            best_loss = checkpoint['loss']
            print(f"Previous best loss: {best_loss:.4f}")
        
        # Verify architecture matches
        if checkpoint.get('backbone') != backbone:
            print(f"Warning: Backbone mismatch! Checkpoint: {checkpoint.get('backbone')}, Current: {backbone}")
        if checkpoint.get('embedding_dim') != embedding_dim:
            print(f"Warning: Embedding dim mismatch! Checkpoint: {checkpoint.get('embedding_dim')}, Current: {embedding_dim}")
        
        print("Checkpoint loaded successfully!\n")
    else:
        print(f"No checkpoint found. Starting training from scratch.\n")
    
    # Triplet sampler
    triplet_sampler = TripletSampler(train_dataset)
    
    # Helper function to save checkpoint
    def save_checkpoint(epoch, loss, val_loss=None, is_best=False, is_last=False):
        checkpoint_data = {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'backbone': backbone,
            'embedding_dim': embedding_dim,
            'num_products': train_dataset.num_products,
            'product_to_idx': train_dataset.product_to_idx,
            'epoch': epoch,
            'loss': loss,
            'learning_rate': scheduler.get_last_lr()[0],
            'freeze_backbone': freeze_backbone,
            'unfreeze_last_n_layers': unfreeze_last_n_layers
        }
        if val_loss is not None:
            checkpoint_data['val_loss'] = val_loss
        
        if is_best:
            torch.save(checkpoint_data, best_model_path)
            print(f"Saved best model (loss: {loss:.4f})")
        
        if is_last:
            torch.save(checkpoint_data, last_model_path)
    
    # Training loop
    model.train()
    
    for epoch in range(start_epoch, num_epochs):
        epoch_loss = 0
        num_batches = 0
        
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        
        for batch_images, batch_labels, batch_product_ids in progress_bar:
            # Sample triplets
            anchor_indices = []
            positive_indices = []
            negative_indices = []
            
            for _ in range(batch_size):
                a, p, n = triplet_sampler.sample_triplet()
                anchor_indices.append(a)
                positive_indices.append(p)
                negative_indices.append(n)
            
            # Get images
            anchor_images = torch.stack([train_dataset[i][0] for i in anchor_indices]).to(device)
            positive_images = torch.stack([train_dataset[i][0] for i in positive_indices]).to(device)
            negative_images = torch.stack([train_dataset[i][0] for i in negative_indices]).to(device)
            
            # Forward pass
            anchor_emb = model(anchor_images)
            positive_emb = model(positive_images)
            negative_emb = model(negative_images)
            
            # Loss
            loss = criterion(anchor_emb, positive_emb, negative_emb)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            num_batches += 1
            
            progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        avg_loss = epoch_loss / num_batches
        scheduler.step()
        
        current_lr = scheduler.get_last_lr()[0]
        
        # Check if this is the best model
        is_best = avg_loss < best_loss
        if is_best:
            best_loss = avg_loss
        
        # Save checkpoints
        save_checkpoint(epoch, avg_loss, is_best=is_best, is_last=True)
        
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f} - LR: {current_lr:.6f}")
    
    print(f"\nTraining complete! Best loss: {best_loss:.4f}")
    print(f"Best model saved at: {best_model_path}")
    print(f"Last model saved at: {last_model_path}")
    return model

In [13]:
model = train_embedding_model(
    reference_dir='data/reference_images',
    backbone='efficientnet_v2_s',
    embedding_dim=1024,
    batch_size=16,
    num_epochs=20,
    learning_rate=0.001,
    backbone_lr_multiplier=0.1,  # 10x lower LR for backbone
    margin=0.5,
    freeze_backbone=True,
    unfreeze_last_n_layers=2,  # Unfreeze last 2 blocks
    save_path='models/product_embedding_model_unfrozen2.pth'
)

Using device: cpu
Loaded 208 images from 68 products
Backbone: Unfroze last 2 feature blocks (out of 8)

MODEL PARAMETER SUMMARY

Total parameters: 22,540,880
Trainable parameters: 17,255,464 (76.55%)
  - Backbone: 14,892,072
  - Embedding Head: 2,363,392
Frozen parameters: 5,285,416 (23.45%)

Using differential learning rates:
  Backbone: 0.000100
  Head: 0.001000

No checkpoint found. Starting training from scratch.



Epoch 1/20: 100%|██████████| 13/13 [00:50<00:00,  3.87s/it, loss=0.1253]


Saved best model (loss: 0.2334)
Epoch 1/20 - Loss: 0.2334 - LR: 0.000100


Epoch 2/20: 100%|██████████| 13/13 [01:01<00:00,  4.76s/it, loss=0.0277]


Saved best model (loss: 0.0810)
Epoch 2/20 - Loss: 0.0810 - LR: 0.000100


Epoch 3/20: 100%|██████████| 13/13 [01:01<00:00,  4.77s/it, loss=0.0374]


Saved best model (loss: 0.0474)
Epoch 3/20 - Loss: 0.0474 - LR: 0.000100


Epoch 4/20: 100%|██████████| 13/13 [01:06<00:00,  5.15s/it, loss=0.0102]


Saved best model (loss: 0.0296)
Epoch 4/20 - Loss: 0.0296 - LR: 0.000100


Epoch 5/20: 100%|██████████| 13/13 [01:12<00:00,  5.60s/it, loss=0.0000]


Epoch 5/20 - Loss: 0.0307 - LR: 0.000100


Epoch 6/20: 100%|██████████| 13/13 [01:45<00:00,  8.11s/it, loss=0.0456]


Saved best model (loss: 0.0210)
Epoch 6/20 - Loss: 0.0210 - LR: 0.000100


Epoch 7/20: 100%|██████████| 13/13 [01:22<00:00,  6.32s/it, loss=0.0086]


Epoch 7/20 - Loss: 0.0260 - LR: 0.000100


Epoch 8/20: 100%|██████████| 13/13 [01:09<00:00,  5.37s/it, loss=0.0076]


Epoch 8/20 - Loss: 0.0265 - LR: 0.000100


Epoch 9/20: 100%|██████████| 13/13 [01:10<00:00,  5.44s/it, loss=0.0210]


Epoch 9/20 - Loss: 0.0223 - LR: 0.000100


Epoch 10/20: 100%|██████████| 13/13 [01:10<00:00,  5.43s/it, loss=0.0363]


Saved best model (loss: 0.0132)
Epoch 10/20 - Loss: 0.0132 - LR: 0.000100


Epoch 11/20: 100%|██████████| 13/13 [01:11<00:00,  5.47s/it, loss=0.0042]


Epoch 11/20 - Loss: 0.0182 - LR: 0.000100


Epoch 12/20: 100%|██████████| 13/13 [01:09<00:00,  5.34s/it, loss=0.0086]


Epoch 12/20 - Loss: 0.0152 - LR: 0.000100


Epoch 13/20: 100%|██████████| 13/13 [01:10<00:00,  5.39s/it, loss=0.0020]


Saved best model (loss: 0.0130)
Epoch 13/20 - Loss: 0.0130 - LR: 0.000100


Epoch 14/20: 100%|██████████| 13/13 [01:10<00:00,  5.42s/it, loss=0.0246]


Epoch 14/20 - Loss: 0.0132 - LR: 0.000100


Epoch 15/20: 100%|██████████| 13/13 [01:10<00:00,  5.41s/it, loss=0.0094]


Epoch 15/20 - Loss: 0.0132 - LR: 0.000100


Epoch 16/20: 100%|██████████| 13/13 [01:10<00:00,  5.41s/it, loss=0.0008]


Epoch 16/20 - Loss: 0.0191 - LR: 0.000100


Epoch 17/20: 100%|██████████| 13/13 [01:09<00:00,  5.36s/it, loss=0.0346]


Saved best model (loss: 0.0109)
Epoch 17/20 - Loss: 0.0109 - LR: 0.000100


Epoch 18/20: 100%|██████████| 13/13 [01:10<00:00,  5.41s/it, loss=0.0065]


Epoch 18/20 - Loss: 0.0161 - LR: 0.000100


Epoch 19/20: 100%|██████████| 13/13 [01:09<00:00,  5.38s/it, loss=0.0024]


Saved best model (loss: 0.0057)
Epoch 19/20 - Loss: 0.0057 - LR: 0.000100


Epoch 20/20: 100%|██████████| 13/13 [01:09<00:00,  5.37s/it, loss=0.0210]


Epoch 20/20 - Loss: 0.0178 - LR: 0.000050

Training complete! Best loss: 0.0057
Best model saved at: models/product_embedding_model_unfrozen2.pth
Last model saved at: models/product_embedding_model_unfrozen2_last.pth
