In [None]:
from ultralytics import YOLO

# Load the YOLOv8 segmentation model
model = YOLO("/home/mcw/Karthick/shopable-ad2.0/yolo_model/best_shop_mdl.pt")  # Change model if needed

# Run inference on video and filter only jacket and suitcase
results = model.predict(
    source="/home/mcw/Karthick/shopable-ad2.0/ad-video/ad.mp4",
    save=True,
    conf=0.50,
    iou=0.5,
)


In [None]:
from ultralytics import YOLO
import cv2
import json
import time
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision import models

# Load YOLOv8 model on CPU
model = YOLO("/home/mcw/Karthick/shopable-ad2.0/yolo_model/best_shop_mdl.pt")
model.to("cpu")

# Load classification model (ResNet) on CPU
classification_model = models.resnet50(pretrained=False)
classification_model.fc = torch.nn.Linear(classification_model.fc.in_features, 4)  # Suitcase classes
classification_model.load_state_dict(torch.load("/home/mcw/Karthick/shopable-ad2.0/classification_model/suitcase_classify.pth", map_location=torch.device("cpu")))
classification_model.to("cpu")
classification_model.eval()

# Define product links
product_links = {
    "jacket": "https://www.ajio.com/brooks-brothers-stretch-tennis-zip-front-bomber-jacket/p/410309671001",
    "suitcase1": "https://www.indiamart.com/proddetail/american-tourister-player-trolly-bag-2853815356891.html",
    "suitcase2": "https://www.americantourister.in/play4blue-blue-sb4061002",
    "suitcase3": "http://behance.net/gallery/84337561/American-Tourister-Bags?locale=es_ES", 
    "suitcase4": "https://www.flipkart.com/american-tourister-trafford-spinner-78cm-navy-check-in-suitcase-31-inch/p/itm0af067fc544c3"
}

def classify_object(cropped_img):
    """Classifies a cropped suitcase image and returns the respective link."""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    img_tensor = transform(cropped_img).unsqueeze(0)
    with torch.no_grad():
        outputs = classification_model(img_tensor)
        _, predicted = torch.max(outputs, 1)
    return product_links.get(f"suitcase{predicted.item() + 1}")

# Open video file
video_path = "/home/mcw/Karthick/shopable-ad2.0/ad-video/ad.mp4"
cap = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define video writer
fourcc = cv2.VideoWriter_fourcc(*'avc1')
out = cv2.VideoWriter("tracked_output2.mp4", fourcc, fps, (width, height), isColor=True)

# Store bounding boxes for JSON output
bounding_boxes = {}

frame_id = 0
processing_times = []

while cap.isOpened():
    start_time = time.time()
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 object detection on CPU
    results = model(frame, conf=0.45, iou=0.4, device="cpu")

    bounding_boxes[frame_id] = []

    for result in results:
        for box, conf, cls in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
            obj_class = int(cls.item())
            x1, y1, x2, y2 = map(int, box.tolist())
            cropped_img = frame[y1:y2, x1:x2]

            assigned_link = None
            if obj_class == 1:  # Jacket
                assigned_link = product_links["jacket"]
            elif obj_class == 2:  # Suitcase, needs classification
                assigned_link = classify_object(cropped_img)
            
            if assigned_link:
                bounding_boxes[frame_id].append({
                    "bbox": [x1, y1, x2, y2],
                    "link": assigned_link
                })

                # Draw bounding boxes
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, assigned_link.split("/")[-1], (x1, y1 - 5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    out.write(frame)
    end_time = time.time()
    processing_times.append(end_time - start_time)

    if frame_id % 10 == 0:
        print(f"Processed frame {frame_id}, time: {end_time - start_time:.3f}s")

    frame_id += 1

avg_processing_time = np.mean(processing_times)
effective_fps = 1.0 / avg_processing_time if avg_processing_time > 0 else 0

json_path = "/home/mcw/Karthick/shopable-ad2.0/data/bounding_boxes.json"

with open(json_path, "w") as json_file:
    json.dump(bounding_boxes, json_file, indent=4)

cap.release()
out.release()
cv2.destroyAllWindows()

print(f"✅ Object detection complete. Output saved as 'tracked_output2.mp4'")
print(f"Processing FPS: {effective_fps:.2f}")
print(f"Total frames processed: {frame_id}")



0: 384x640 1 jacket, 1 suitcase, 32.2ms
Speed: 1.3ms preprocess, 32.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Processed frame 0, time: 0.122s

0: 384x640 1 jacket, 1 suitcase, 31.9ms
Speed: 2.4ms preprocess, 31.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jacket, 1 suitcase, 31.8ms
Speed: 2.6ms preprocess, 31.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jacket, 1 suitcase, 31.7ms
Speed: 2.2ms preprocess, 31.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jacket, 1 suitcase, 31.9ms
Speed: 2.8ms preprocess, 31.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jacket, 1 suitcase, 31.8ms
Speed: 3.0ms preprocess, 31.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jacket, 1 suitcase, 32.4ms
Speed: 1.5ms preprocess, 32.4ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0