<a href="https://colab.research.google.com/github/srikrishnavansi/Gemini-video-analysis/blob/main/video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import google.generativeai as genai
import cv2
import json
import re
import time
import numpy as np
import logging

In [8]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def analyze_video(video_path):
    """Analyzes a video using Gemini and annotates it with bounding boxes."""

    try:
        logging.info(f"Starting video analysis for: {video_path}")

        # Upload the video
        video_file = genai.upload_file(path=video_path)

        # Wait for video processing to complete
        while video_file.state.name == "PROCESSING":
            logging.info("Video is still processing. Waiting...")
            time.sleep(10)
            video_file = genai.get_file(video_file.name)
        logging.info("Video processing complete.")

        # Calculate video length
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        video_length = total_frames / fps
        cap.release()
        logging.info(f"Video length: {video_length} seconds")

        # Define the prompt (using f-strings for clarity)
        prompt = f"""Identify and locate all objects and features present in the video throughout its entire duration from 0 seconds to {video_length} seconds.This includes furniture, appliances, décor, structural elements, and any other detectable items.

                        For each identified object or feature, provide its bounding box, condition, and appearance time in the video. If an object appears multiple times, include separate entries with the precise timestamps for each instance, along with the duration it is visible.

                        Ensure the following:
                        1. All items are accounted for, even if they appear intermittently or across multiple frames.
                        2. The start time and end time for each item are accurate and correspond to the time window during which the object is visible in the video.
                        3. The last `end_time` for any object should match the video’s final time, which is {video_length} seconds.
                        4. Timestamps for items should span the complete duration of the video, from the start (0 seconds) to the video’s end (video length).
                        5. Ensure that no object or feature is skipped in the annotation. If an object appears in any frame, include it, and ensure its end time reflects its actual disappearance.
                        6. Objects appearing multiple times should have separate entries for each instance with accurate start and end times.

                        The output should be provided in the following JSON format:

                        [
                          {{ "totalvideo_length":seconds,
                            "item_name": "string",
                            "condition": "string",
                            "bounding_box": Return a bounding box for each of the objects in this image in [ymin, xmin, ymax, xmax] format.(Provide accurate bounding box),
                            "start_time": seconds,  // The time in seconds when this object first appears
                            "end_time": seconds    // The time in seconds when this object disappears
                          }},
                          {{
                            // ... more items (including multiple entries for objects appearing multiple times)
                          }}
                        ]

                        Make sure that the list of items includes those visible throughout the video, and that timestamps reflect the full duration of the video (from 0s to {video_length}s).""" # Your prompt here

        model = genai.GenerativeModel(model_name="gemini-1.5-pro")

        # Generate content with JSON response
        response = model.generate_content(
            [video_file, prompt],
            generation_config=genai.GenerationConfig(response_mime_type="application/json"),
            request_options={"timeout": 600},
        )

        # Extract JSON data (with improved error handling)
        try:
            data = json.loads(response.text)
        except json.JSONDecodeError:
            logging.error(f"JSONDecodeError: {response.text}") #Log full response
            match = re.search(r'\[(?:[^[\]]|(?R))*\]', response.text)
            if match:
                try:
                    data = json.loads(match.group(0))
                except json.JSONDecodeError:
                    logging.error("Could not extract valid JSON even with regex.")
                    return None
            else:
                logging.error("Could not find JSON in the response.")
                return None
        logging.info("JSON data loaded successfully.")

        # Annotate the video
        output_video_path = "video.mp4"
        cap = cv2.VideoCapture(video_path)
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

        def generate_random_color():
            return [int(np.random.randint(0, 255)) for _ in range(3)]

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            current_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
            for item in data:
                if 'bounding_box' not in item or not isinstance(item['bounding_box'], list) or len(item['bounding_box']) != 4:
                    logging.warning(f"Invalid bounding box format for item: {item}")
                    continue
                ymin, xmin, ymax, xmax = map(int, item['bounding_box']) #convert to int
                if any(val<0 for val in [ymin,xmin,ymax,xmax]) or xmax<xmin or ymax<ymin:
                    logging.warning(f"Invalid bounding box values for item: {item}")
                    continue
                if 'start_time' not in item or 'end_time' not in item:
                    logging.warning(f"Missing start or end time for item: {item}")
                    continue
                start_time = item['start_time']
                end_time = item['end_time']
                if start_time <= current_time <= end_time:
                    color = generate_random_color()
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                    label = f"{item.get('item_name', 'Unknown')} ({item.get('condition', 'Unknown')})"
                    label_position = (xmin, ymin - 10 if ymin - 10 > 10 else ymin + 10)
                    cv2.putText(frame, label, label_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            out.write(frame)

        cap.release()
        out.release()
        logging.info(f"Annotated video saved to {output_video_path}")
        return output_video_path

    except Exception as e:
        logging.exception(f"An error occurred: {e}")
        return None

Analysis complete. Annotated video saved to: video.mp4


In [None]:
video_file_path = "/content/WhatsApp Video 2024-12-23 at 23.47.01.mp4"  # Replace with your video path
output_video = analyze_video(video_file_path)
if output_video:
    print(f"Analysis complete. Annotated video saved to: {output_video}")
else:
    print("Video analysis failed.")