In [10]:
import cv2
import numpy as np
from ultralytics import YOLO
import cvzone
import base64
import os
import time
import threading
from openai import OpenAI

# ✅ Set up OpenRouter API Key
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=""
)

# Load the YOLO model
yolo_model = YOLO("best.pt")
names = yolo_model.names

# Open the video file
cap = cv2.VideoCapture('vid4.mp4')
if not cap.isOpened():
    raise Exception("Error: Could not open video file.")

# Constants for ROI detection and tracking
cx1 = 491
offset = 8

# Get current date for folder and file naming
current_date = time.strftime("%Y-%m-%d")

# Create a folder for cropped images
crop_folder = f"crop_{current_date}"
os.makedirs(crop_folder, exist_ok=True)

# Set to track processed track_ids
processed_track_ids = set()

def encode_image_to_base64(image):
    """Convert an image to a base64 string."""
    _, img_buffer = cv2.imencode('.jpg', image)
    return base64.b64encode(img_buffer).decode('utf-8')

def analyze_image_with_openai(current_image):
    """Analyze a single image using OpenRouter's OpenAI."""
    if current_image is None:
        return "No image available for analysis."

    # Convert image to base64
    current_image_data = encode_image_to_base64(current_image)

    # Create the request
    try:
        response = client.chat.completions.create(
            extra_headers={
                "HTTP-Referer": "<YOUR_SITE_URL>",
                "X-Title": "<YOUR_SITE_NAME>",
            },
            model="google/gemini-2.0-pro-exp-02-05:free",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Analyze this image and determine if the label is present on the bottle. Check the following:\n\n1. **Is the label present?** (Yes/No)\n2. **Is there any damage?** (Yes/No)\n\nReturn the result strictly in a structured table format like below:\n\n| Label Present | Damage |\n|--------------|--------|\n| Yes/No       | Yes/No |"},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{current_image_data}"}}
                    ]
                }
            ]
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error invoking OpenAI model: {e}")
        return "Error processing image."

def save_response_to_file(track_id, response):
    """Save the analysis response to a text file."""
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    response_filename = f"gemini_response_{current_date}_report.txt"
    
    with open(response_filename, "a", encoding="utf-8") as file:
        file.write(f"Track ID: {track_id} | Condition: {response} | Date: {timestamp}\n\n")

def save_crop_image(crop, track_id):
    """Save cropped image with track ID."""
    timestamp = time.strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"{crop_folder}/{track_id}_{timestamp}.jpg"
    cv2.imwrite(filename, crop)
    return filename

def crop_and_process(frame, box, track_id):
    """Crop detected objects and send for analysis."""
    if track_id in processed_track_ids:
        return  # Skip if already processed

    x1, y1, x2, y2 = box
    crop = frame[y1:y2, x1:x2]

    # Save the cropped image
    crop_filename = save_crop_image(crop, track_id)

    # Mark as processed
    processed_track_ids.add(track_id)

    # Start analysis in a separate thread
    threading.Thread(target=process_crop_image, args=(crop, track_id, crop_filename)).start()

def process_crop_image(current_image, track_id, crop_filename):
    """Process the cropped image and analyze it using OpenAI."""
    response_content = analyze_image_with_openai(current_image)
    print("OpenAI Response:", response_content)
    save_response_to_file(track_id, response_content)
    try:
        with open(response_filename, "w", encoding="utf-8") as f:
            f.write(f"Track ID: {track_id}\nDate: {time.strftime('%Y-%m-%d %H:%M:%S')}\nResponse: {response_content}\n")
    except Exception as e:
        print(f"Error saving response file: {e}")

def process_video_frame(frame):
    """Process video frame for object detection and analysis."""
    frame = cv2.resize(frame, (1020, 500))
    
    # Run YOLOv8 tracking
    results = yolo_model.track(frame, persist=True)
    
    if results[0].boxes is not None:
        boxes = results[0].boxes.xyxy.int().cpu().tolist()
        class_ids = results[0].boxes.cls.int().cpu().tolist()
        track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)

        for box, track_id, class_id in zip(boxes, track_ids, class_ids):
            c = names[class_id]
            x1, y1, x2, y2 = box
            cx = int(x1 + x2) // 2

            if cx1 < (cx + offset) and cx1 > (cx - offset):
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cvzone.putTextRect(frame, f'{track_id}', (x2, y2), 1, 1)
                cvzone.putTextRect(frame, f'{c}', (x1, y1), 1, 1)
                crop_and_process(frame, box, track_id)

    return frame

def main():
    """Main function to run video processing."""
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = process_video_frame(frame)

        cv2.line(frame, (491, 1), (491, 499), (0, 0, 255), 2)
        cv2.imshow("RGB", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()



0: 320x640 1 product, 24.2ms
Speed: 2.1ms preprocess, 24.2ms inference, 4.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 19.2ms
Speed: 2.0ms preprocess, 19.2ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 18.6ms
Speed: 2.0ms preprocess, 18.6ms inference, 5.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 20.1ms
Speed: 1.0ms preprocess, 20.1ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 19.5ms
Speed: 1.0ms preprocess, 19.5ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 25.2ms
Speed: 1.0ms preprocess, 25.2ms inference, 6.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 18.4ms
Speed: 1.0ms preprocess, 18.4ms inference, 3.2ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 21.1ms
Speed: 1.0ms preprocess, 21.1ms inference, 3.0ms postprocess per image at shape (

In [14]:
import cv2
import numpy as np
from ultralytics import YOLO
import cvzone
import base64
import os
import time
import threading
from openai import OpenAI

# ✅ Set up OpenRouter API Key
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=""
)

# Load the YOLO model
yolo_model = YOLO("best.pt")
names = yolo_model.names

# Open the video file
cap = cv2.VideoCapture('vid4.mp4')
if not cap.isOpened():
    raise Exception("Error: Could not open video file.")

# Constants for ROI detection and tracking
cx1 = 491
offset = 8

# Get current date for folder and file naming
current_date = time.strftime("%Y-%m-%d")

# Create a folder for cropped images
crop_folder = f"crop_{current_date}"
os.makedirs(crop_folder, exist_ok=True)

# Set to track processed track_ids
processed_track_ids = set()

def encode_image_to_base64(image):
    """Convert an image to a base64 string."""
    _, img_buffer = cv2.imencode('.jpg', image)
    return base64.b64encode(img_buffer).decode('utf-8')

def analyze_image_with_openai(current_image):
    """Analyze a single image using OpenRouter's OpenAI."""
    if current_image is None:
        return "No image available for analysis."

    # Convert image to base64
    current_image_data = encode_image_to_base64(current_image)

    # Create the request
    try:
        response = client.chat.completions.create(
            extra_headers={
                "HTTP-Referer": "<YOUR_SITE_URL>",
                "X-Title": "<YOUR_SITE_NAME>",
            },
            model="google/gemini-2.0-pro-exp-02-05:free",
            messages=[{
                    "role": "user",
                    "content": [
                        {
                          "type": "text",
                          "text": "Analyze this image and determine if the label is present on the bottle. Check the following:\n\n"
                                  "1. **Is the label present?** (Yes/No)\n"
                                  "2. **Is there any damage?** (Yes/No)\n\n"
                                  "Return the result strictly in a structured table format like below:\n\n"
                                  "| Label Present | Damage |\n"
                                  "|--------------|--------|\n"
                                  "| Yes/No       | Yes/No |"
                        },
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{current_image_data}"}}
                    ]
                }
            ]
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error invoking OpenAI model: {e}")
        return "Error processing image."

def save_response_to_file(track_id, response, crop_filename):
    """Save the analysis response to a separate .txt file for each bottle."""
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    
    # Extract the base filename from the cropped image
    txt_filename = crop_filename.replace(".jpg", ".txt")
    
    # Create the file and save the structured response
    with open(txt_filename, "w", encoding="utf-8") as file:
        file.write(f"Track ID: {track_id}\n")
        file.write(f"Date: {timestamp}\n")
        file.write(f"{response}\n")

def save_crop_image(crop, track_id):
    """Save cropped image with track ID."""
    timestamp = time.strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"{crop_folder}/{track_id}_{timestamp}.jpg"
    cv2.imwrite(filename, crop)
    return filename

def crop_and_process(frame, box, track_id):
    """Crop detected objects and send for analysis."""
    if track_id in processed_track_ids:
        return  # Skip if already processed

    x1, y1, x2, y2 = box
    crop = frame[y1:y2, x1:x2]

    # Save the cropped image
    crop_filename = save_crop_image(crop, track_id)

    # Mark as processed
    processed_track_ids.add(track_id)

    # Start analysis in a separate thread
    threading.Thread(target=process_crop_image, args=(crop, track_id, crop_filename)).start()

def process_crop_image(current_image, track_id, crop_filename):
    """Process the cropped image and analyze it using OpenAI."""
    response_content = analyze_image_with_openai(current_image)
    print(f"OpenAI Response for Track ID {track_id}:", response_content)
    
    # Save response in a separate .txt file for each bottle
    save_response_to_file(track_id, response_content, crop_filename)

def process_video_frame(frame):
    """Process video frame for object detection and analysis."""
    frame = cv2.resize(frame, (1020, 500))
    
    # Run YOLOv8 tracking
    results = yolo_model.track(frame, persist=True)
    
    if results[0].boxes is not None:
        boxes = results[0].boxes.xyxy.int().cpu().tolist()
        class_ids = results[0].boxes.cls.int().cpu().tolist()
        track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)

        for box, track_id, class_id in zip(boxes, track_ids, class_ids):
            c = names[class_id]
            x1, y1, x2, y2 = box
            cx = int(x1 + x2) // 2

            if cx1 < (cx + offset) and cx1 > (cx - offset):
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cvzone.putTextRect(frame, f'{track_id}', (x2, y2), 1, 1)
                cvzone.putTextRect(frame, f'{c}', (x1, y1), 1, 1)
                crop_and_process(frame, box, track_id)

    return frame

def main():
    """Main function to run video processing."""
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = process_video_frame(frame)

        cv2.line(frame, (491, 1), (491, 499), (0, 0, 255), 2)
        cv2.imshow("RGB", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()



0: 320x640 1 product, 45.1ms
Speed: 4.0ms preprocess, 45.1ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 29.6ms
Speed: 2.0ms preprocess, 29.6ms inference, 6.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 26.0ms
Speed: 2.5ms preprocess, 26.0ms inference, 2.1ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 23.6ms
Speed: 4.2ms preprocess, 23.6ms inference, 3.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 23.4ms
Speed: 1.0ms preprocess, 23.4ms inference, 4.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 24.1ms
Speed: 4.5ms preprocess, 24.1ms inference, 6.5ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 31.6ms
Speed: 4.0ms preprocess, 31.6ms inference, 3.5ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 product, 22.1ms
Speed: 3.0ms preprocess, 22.1ms inference, 4.1ms postprocess per image at shape (