In [None]:
pip install supervision ultralytics

In [None]:
!apt install tesseract-ocr
!pip install pytesseract

In [None]:
import re
import time
import cv2
from ultralytics import YOLO
import supervision as sv
import pytesseract
from google.colab.patches import cv2_imshow

In [None]:
# Load the YOLOv8 model
model = YOLO("yolov8s.pt")

# Box annotator
bbox_annotator = sv.BoxAnnotator()

In [12]:

# Load the video
video = cv2.VideoCapture("/content/drive/MyDrive/Robo/video/video.mp4")

frame_count = 0  # To count frames
predefined_product_details = {
    "Lays": {"variety": "Classic Salted", "company": "PepsiCo"},
    "Lays Masala": {"variety": "India's Magic Masala", "company": "PepsiCo"},
    "Lays Tomato": {"variety": "Tomato Tango", "company": "PepsiCo"},
    "Kurkure": {"variety": "Masala Munch", "company": "PepsiCo"},
    "Kurkure Green": {"variety": "Green Chutney Rajasthani Style", "company": "PepsiCo"},
    # Add more predefined products here
}

side = "front"  # Track which side is being processed
side_duration = 0.2  # Time duration per side in seconds

ocr_results = {}  # Dictionary to store front and back OCR results


def extract_expiry_date(text):
    """
    Extract expiry date from OCR text by searching for keywords like 'EXP', 'Expiry', 'Best Before', etc.
    """
    # Define regular expressions for matching dates (e.g., DD/MM/YYYY, MM/YYYY, etc.)
    date_patterns = [
        r"\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b",  # DD/MM/YYYY or similar
        r"\b\d{1,2}[/-]\d{2,4}\b",  # MM/YYYY or similar
    ]

    # Define keywords to search for
    keywords = ["EXP", "Expiry", "Best Before", "Use By", "BBE"]

    # Check if any of the keywords are in the text
    for keyword in keywords:
        if keyword.lower() in text.lower():
            # Try to find the date following the keyword
            for pattern in date_patterns:
                match = re.search(pattern, text)
                if match:
                    return match.group()  # Return the first matched date

    return "No expiry date found"  # Return this if no expiry date is detected


while video.isOpened():
    ret, frame = video.read()
    if ret:
        # Run YOLOv8 inference on the frame
        result = model(frame)[0]

        # Extract detections
        detections = sv.Detections.from_ultralytics(result)

        # Filter detections by confidence
        detections = detections[detections.confidence > 0.5]

        # Annotate the frame with bounding boxes and labels
        frame = bbox_annotator.annotate(scene=frame, detections=detections)

        # Display the frame with bounding boxes using cv2_imshow
        # cv2_imshow(frame)

        # For each detection, crop the detected object and store OCR text
        for i, (box, class_id) in enumerate(zip(detections.xyxy, detections.class_id)):
            # Crop the object based on detection bounding box
            x_min, y_min, x_max, y_max = map(int, box)
            cropped_object = frame[y_min:y_max, x_min:x_max]

            # Apply OCR and store results depending on side
            if side == "front":
                ocr_text = pytesseract.image_to_string(cropped_object)
                ocr_results[i] = {"front_text": ocr_text, "class_id": class_id}  # Store front side OCR and class_id

            elif side == "back":
                ocr_text = pytesseract.image_to_string(cropped_object)
                if i in ocr_results:
                    ocr_results[i]["back_text"] = ocr_text  # Store back side OCR

        # Switch sides after 0.2 seconds for front and back alternation
        time.sleep(side_duration)
        if side == "front":
            side = "back"
        else:
            # Process and print the details after both sides are captured
            for i, result in ocr_results.items():
                front_text = result.get("front_text", "")
                back_text = result.get("back_text", "")
                class_id = result.get("class_id", "")

                # If the detected object is a known packet, print details and expiry
                detected_product = None
                for product_name, details in predefined_product_details.items():
                    if product_name.lower() in front_text.lower():
                        detected_product = product_name
                        expiry_date = extract_expiry_date(back_text)  # Extract expiry date from back side
                        print(f"Detected {product_name}:")
                        print(f"  - Variety: {details['variety']}")
                        print(f"  - Company: {details['company']}")
                        print(f"  - Expiry Date: {expiry_date}")
                        break

                # If it's not a packet, assume it's a fruit/vegetable and print the name from class_id
                if not detected_product:
                    fruit_or_veg_name = model.names[class_id]  # Get the name of the detected fruit/vegetable
                    print(f"Detected a fruit/vegetable: {fruit_or_veg_name}")
                    # Freshness check will be implemented later

            # Clear the OCR results after processing both sides
            ocr_results.clear()
            side = "front"  # Reset to front for next object

        # Increment frame count
        frame_count += 1

    else:
        break

# Release the video capture
video.release()



0: 384x640 2 oranges, 468.8ms
Speed: 4.6ms preprocess, 468.8ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 oranges, 535.7ms
Speed: 2.7ms preprocess, 535.7ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)
Detected a fruit/vegetable: orange

0: 384x640 2 oranges, 528.8ms
Speed: 7.7ms preprocess, 528.8ms inference, 4.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 oranges, 557.6ms
Speed: 2.7ms preprocess, 557.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 oranges, 422.4ms
Speed: 2.8ms preprocess, 422.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 orange, 345.7ms
Speed: 3.7ms preprocess, 345.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 339.0ms
Speed: 2.7ms preprocess, 339.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 343.8ms
Speed: 2.6ms preproc