In [1]:
from ultralytics import YOLO
import cv2
import numpy as np
from keras.models import load_model

In [2]:
model = YOLO('models/yolov8m-seg.pt')

classifier = load_model('models/apple_classifier.h5')

classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

def preprocess_for_classification(img):
    img = cv2.resize(img, (150, 150))
    img = img.astype(np.float32) / 255.0
    img = np.expand_dims(img, axis=0)
    return img

def classify_apple(apple_img):
    processed_img = preprocess_for_classification(apple_img)
    preds = classifier.predict(processed_img)
    class_idx = np.argmax(preds)
    return class_idx



In [3]:
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)


    for result in results:
        boxes = result.boxes
        for box in boxes:
            class_id = int(box.cls)
            if class_id == 47:  
                x1, y1, x2, y2 = box.xyxy[0].int()
                apple_img = frame[int(y1):int(y2), int(x1):int(x2)]

                if apple_img.size == 0:
                    continue
                
                apple_class = classify_apple(apple_img)

                class_labels = {0: "Ripe", 1: "Rotten", 2: "Unripe"}
                label = class_labels.get(apple_class, "Unknown")
                
                red = green = 256
                if label == "Ripe":
                    red = 0
                elif label == "Rotten":
                    green = 0
                
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, green, red), 2)
                cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, green, red), 2)

    cv2.imshow('Live Apple Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 (no detections), 891.8ms
Speed: 22.5ms preprocess, 891.8ms inference, 12.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 660.8ms
Speed: 7.0ms preprocess, 660.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 629.0ms
Speed: 3.0ms preprocess, 629.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 618.9ms
Speed: 2.0ms preprocess, 618.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 631.6ms
Speed: 3.0ms preprocess, 631.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 620.9ms
Speed: 3.0ms preprocess, 620.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 628.5ms
Speed: 2.0ms preprocess, 628.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 730.8ms
Speed: 5.6ms preprocess