In [2]:
# 1. Check GPU allocation
# We want to see a table with GPU details (e.g., Tesla T4)
!nvidia-smi

Wed Jun 11 10:38:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   66C    P8             11W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
# Install the Ultralytics library which includes YOLOv10 support.
!pip install -q ultralytics

# Import necessary libraries
import ultralytics
from ultralytics import YOLO

# Check the installed version
print(ultralytics.__version__)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# 1. Install the necessary library
!pip install -q ultralytics

# 2. Import the YOLO class
from ultralytics import YOLO

# 3. Load the YOLOv10n model
# The '.pt' file will be downloaded automatically if it's not found.
model = YOLO("yolov10s.pt")

# 4. Train the model on the SKU-110K dataset
# The dataset will be downloaded automatically on the first run.
# This is a large dataset (11 GB), so the initial download may take time.
results = model.train(data="SKU-110K.yaml", epochs=10, imgsz=640, batch=8)

print("\nTraining complete. Results saved in the 'runs' directory.")

In [1]:
# === SETUP FOR THE FEATURE EXTRACTOR MODEL ===

# 1. Mount your Google Drive
# This allows us to save models and datasets permanently.
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive mounted successfully.")

# 2. Install new libraries for Metric Learning
# These are different from the YOLO libraries.
print("\nInstalling required libraries: PyTorch Metric Learning and Timm...")
!pip install -q pytorch-metric-learning
!pip install -q timm
print("Installation complete.")

Mounted at /content/drive
Google Drive mounted successfully.

Installing required libraries: PyTorch Metric Learning and Timm...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.9/125.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m90.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m81.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m49.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━

In [3]:
!pip install -q ultralytics torchvision faiss-cpu opencv-python matplotlib

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
import os
import numpy as np
import torch
from torchvision import models, transforms
from PIL import Image
import faiss
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
resnet = models.resnet34(pretrained=True)
resnet.fc = torch.nn.Identity()
resnet = resnet.to(device).eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 116MB/s]


In [6]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def spatial_weight(d, sigma=0.3):
    return np.exp(-d**2 / (2 * sigma**2))

def apply_caqe(vectors, positions):
    refined = []
    for i, q in enumerate(vectors):
        q_prime = q.copy()
        for j, n in enumerate(vectors):
            if i == j: continue
            alpha = cosine_similarity(q, n)
            dx = positions[i][0] - positions[j][0]
            dy = positions[i][1] - positions[j][1]
            dist = np.sqrt(dx**2 + dy**2)
            beta = spatial_weight(dist)
            q_prime += alpha * beta * n
        q_prime = q_prime / np.linalg.norm(q_prime)
        refined.append(q_prime)
    return refined

In [7]:
reference_dir = "/content/drive/My Drive/clean_sku_dataset"
sku_vectors = []
sku_labels = []

for sku_id in os.listdir(reference_dir):
    sku_path = os.path.join(reference_dir, sku_id)
    if not os.path.isdir(sku_path): continue

    raw_vectors = []
    positions = []

    for i, img_file in enumerate(os.listdir(sku_path)):
        img_path = os.path.join(sku_path, img_file)
        image = Image.open(img_path).convert("RGB")
        tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            vec = resnet(tensor).squeeze().cpu().numpy()
            vec = vec / np.linalg.norm(vec)

        raw_vectors.append(vec)
        positions.append((i, 0))  # fake x position (same SKU = nearby)

    # Apply CAQE to SKU group
    refined_vectors = apply_caqe(raw_vectors, positions)

    for vec in refined_vectors:
        sku_vectors.append(vec)
        sku_labels.append(sku_id)

print(f"✅ Stored {len(sku_vectors)} CAQE-refined vectors for {len(set(sku_labels))} SKUs.")

✅ Stored 40 CAQE-refined vectors for 4 SKUs.


In [8]:
d = sku_vectors[0].shape[0]
index = faiss.IndexFlatL2(d)
index.add(np.stack(sku_vectors))
print("✅ FAISS index created with", index.ntotal, "vectors.")

✅ FAISS index created with 40 vectors.


In [None]:
from google.colab import files
uploaded = files.upload()

import os
shelf_img_path = list(uploaded.keys())[0]
print(f"📷 Shelf image uploaded: {shelf_img_path}")

In [None]:
# Load trained YOLOv10s
yolo_model = YOLO("runs/detect/yolov10s/weights/best.pt")

results = yolo_model.predict(source=shelf_img_path, save=False, conf=0.4)
pred = results[0]
boxes = pred.boxes.xyxy.cpu().numpy()
img = cv2.imread(shelf_img_path)
print(f"✅ Detected {len(boxes)} products.")

In [None]:
os.makedirs("shelf_crops", exist_ok=True)
crop_paths = []

for i, box in enumerate(boxes):
    x1, y1, x2, y2 = map(int, box)
    crop = img[int(y1):int(y2), int(x1):int(x2)]
    crop_path = f"shelf_crops/crop_{i}.jpg"
    cv2.imwrite(crop_path, crop)
    crop_paths.append(crop_path)

In [None]:
query_vecs = []
positions = []

for i, path in enumerate(crop_paths):
    image = Image.open(path).convert("RGB")
    tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        vec = resnet(tensor).squeeze().cpu().numpy()
        vec = vec / np.linalg.norm(vec)

    query_vecs.append(vec)

    # use center of box for CAQE
    x1, y1, x2, y2 = boxes[i]
    cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
    positions.append((cx, cy))

In [None]:
refined_query_vecs = apply_caqe(query_vecs, positions)

In [None]:
D, I = index.search(np.stack(refined_query_vecs), k=1)
matched_skus = [sku_labels[i[0]] for i in I]

print("\n🎯 Detected SKUs:")
for i, sku in enumerate(matched_skus):
    print(f"  • Product {i+1}: {sku}")

In [10]:
sku_list = sorted(set(sku_labels))

In [None]:
sku_list = sorted(set(sku_labels))
print("\n🛍️ Available SKUs:")
for i, sku in enumerate(sku_list):
    print(f"{i}: {sku}")

selected = input("🔍 Enter brand name to focus on (e.g., 'pepsi'): ").strip().lower()
matched_targets = [sku for sku in sku_list if selected in sku.lower()]

if not matched_targets:
    print("❌ No matching SKU found.")
else:
    print(f"✅ Matched Brand(s): {matched_targets}")

In [None]:
selected_boxes = []
selected_labels = []
selected_centers = []

for i, (sku, box) in enumerate(zip(matched_skus, boxes)):
    if sku in matched_targets:
        selected_boxes.append(box)
        selected_labels.append(sku)
        x1, y1, x2, y2 = box
        cx = (x1 + x2) / 2
        selected_centers.append(cx)

In [None]:
from collections import Counter

sku_counts_filtered = Counter(selected_labels)

print("\n🧮 Product Counts for Selected Brand:")
for sku, count in sku_counts_filtered.items():
    print(f"{sku}: {count}")

In [None]:
grid_cells = 10  # Expected positions across shelf
img_h, img_w = img.shape[:2]
cell_width = img_w / grid_cells

detected_cells = set()

for cx in selected_centers:
    cell_index = int(cx / cell_width)
    detected_cells.add(cell_index)

missing_cells = set(range(grid_cells)) - detected_cells
print(f"\n🕳️ Shelf Gaps for selected brand: {sorted(missing_cells)}")

In [None]:
annotated_img = img.copy()

# Draw selected brand boxes
for i, box in enumerate(selected_boxes):
    x1, y1, x2, y2 = map(int, box)
    cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0,255,0), 2)
    cv2.putText(annotated_img, selected_labels[i], (x1, y1-10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

# Draw missing slots
for gap in missing_cells:
    cx = int(gap * cell_width + cell_width / 2)
    cy = int(img_h * 0.95)
    cv2.circle(annotated_img, (cx, cy), radius=10, color=(0, 0, 255), thickness=-1)

# Show result
plt.figure(figsize=(16,10))
plt.imshow(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
plt.axis(False)
plt.title(f"🧃 Annotated Detection for: {matched_targets}")
plt.show()