In [7]:
# ==========================================
# SETUP INSTRUCTIONS FOR GOOGLE COLAB
# ==========================================
# 1. Install required packages
"""
!pip install torch torchvision transformers
!pip install opencv-python Pillow easyocr
!pip install PyMuPDF gtts pydub moviepy
!pip install scikit-learn numpy pandas matplotlib
!pip install requests ultralytics
"""
# 2. Mount Google Drive
"""
from google.colab import drive
drive.mount('/content/drive')
"""
# 3. Set up directories
"""
import os
os.makedirs('/content/drive/MyDrive/TIM/', exist_ok=True)
"""
# 4. Download necessary models (run once)
"""
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from ultralytics import YOLO
# Download BLIP model for image captioning
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
# Download YOLO model for object detection
yolo_model = YOLO('yolov8n.pt')  # Download YOLOv8 nano model
# Save models locally for faster loading
processor.save_pretrained('/content/drive/MyDrive/TIM/models/blip_processor')
model.save_pretrained('/content/drive/MyDrive/TIM/models/blip_model')
yolo_model.save('/content/drive/MyDrive/TIM/models/yolo_model.pt')
"""
# ==========================================
# COMPLETE COLAB SETUP SCRIPT
# ==========================================
# Copy this entire block to your Google Colab notebook
# Cell 1: Install dependencies
!pip install torch torchvision transformers
!pip install opencv-python Pillow easyocr
!pip install PyMuPDF gtts pydub moviepy
!pip install scikit-learn numpy pandas matplotlib
!pip install ultralytics ffmpeg-python

# Cell 2: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Cell 3: Create directories
import os
os.makedirs('/content/drive/MyDrive/TIM/', exist_ok=True)
os.makedirs('/content/drive/MyDrive/TIM/models/', exist_ok=True)
os.makedirs('/content/drive/MyDrive/TIM/output/', exist_ok=True)

# Cell 4: Download and cache models
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from ultralytics import YOLO
print("Downloading BLIP model...")
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

print("Downloading YOLO model...")
yolo_model = YOLO('yolov8n.pt')  # YOLOv8 nano for fast inference

# Cache models
processor.save_pretrained('/content/drive/MyDrive/TIM/models/blip_processor')
model.save_pretrained('/content/drive/MyDrive/TIM/models/blip_model')
yolo_model.save('/content/drive/MyDrive/TIM/models/yolo_model.pt')
print("Models cached successfully!")

# ==========================================
# VIDEO OBJECT DETECTION WITH OUTLINES
# ==========================================

import cv2
import numpy as np
from PIL import Image
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from ultralytics import YOLO
import os
from moviepy.editor import VideoFileClip
import tempfile

class AdvancedVideoObjectDetector(VideoObjectDetector):
    """Extended version with additional features"""

    def draw_enhanced_outlines(self, frame, detections, confidence_threshold=0.5):
        """Draw enhanced outlines with different colors per class"""
        annotated_frame = frame.copy()

        # Color palette for different classes
        colors = [
            (255, 0, 0),    # Red
            (0, 255, 0),    # Green
            (0, 0, 255),    # Blue
            (255, 255, 0),  # Yellow
            (255, 0, 255),  # Magenta
            (0, 255, 255),  # Cyan
            (128, 0, 128),  # Purple
            (255, 165, 0),  # Orange
        ]

        boxes = detections.boxes
        if boxes is not None:
            for i, box in enumerate(boxes):
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                confidence = box.conf[0].cpu().numpy()
                class_id = int(box.cls[0].cpu().numpy())

                if confidence >= confidence_threshold:
                    class_name = self.yolo_model.names[class_id]
                    color = colors[class_id % len(colors)]

                    # Draw thicker, colored outline
                    cv2.rectangle(annotated_frame,
                                (int(x1), int(y1)),
                                (int(x2), int(y2)),
                                color, 3)

                    # Draw filled label background
                    label = f"{class_name}: {confidence:.2f}"
                    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]

                    cv2.rectangle(annotated_frame,
                                (int(x1), int(y1) - label_size[1] - 15),
                                (int(x1) + label_size[0] + 10, int(y1)),
                                color, -1)

                    # Draw white text
                    cv2.putText(annotated_frame, label,
                              (int(x1) + 5, int(y1) - 5),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        return annotated_frame

    def process_video_with_audio_enhanced(self, video_path, output_path, confidence_threshold=0.5):
        """Process video with enhanced object detection while preserving audio"""

        # Verify video file first
        if not os.path.exists(video_path):
            print(f"Error: Video file not found at {video_path}")
            return False

        # Create temporary file for video without audio
        temp_video_path = tempfile.mktemp(suffix='.mp4')

        try:
            # Process video frames with enhanced detection
            cap = cv2.VideoCapture(video_path)

            if not cap.isOpened():
                print(f"Error: Cannot open video file {video_path}")
                return False

            # Get video properties
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            print(f"Processing {total_frames} frames with enhanced detection...")

            if total_frames == 0:
                print("Error: Video has 0 frames")
                cap.release()
                return False

            # Set up video writer
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(temp_video_path, fourcc, fps, (width, height))

            frame_count = 0
            successful_frames = 0

            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                if frame is None:
                    frame_count += 1
                    continue

                try:
                    # Detect objects and draw enhanced outlines
                    detections = self.detect_objects(frame)
                    annotated_frame = self.draw_enhanced_outlines(frame, detections, confidence_threshold)

                    out.write(annotated_frame)
                    successful_frames += 1

                except Exception as e:
                    print(f"Error processing frame {frame_count}: {e}")
                    out.write(frame)  # Write original frame on error

                frame_count += 1

                if frame_count % 30 == 0:
                    print(f"Processed {frame_count}/{total_frames} frames ({frame_count/total_frames*100:.1f}%)")

            cap.release()
            out.release()

            print(f"Enhanced processing complete: {successful_frames}/{frame_count} frames processed")

            # Merge with audio using MoviePy
            print("Merging with audio...")
            from moviepy.editor import VideoFileClip
            original_clip = VideoFileClip(video_path)
            processed_clip = VideoFileClip(temp_video_path)

            if original_clip.audio is not None:
                final_clip = processed_clip.set_audio(original_clip.audio)
                final_clip.write_videofile(
                    output_path,
                    codec='libx264',
                    audio_codec='aac',
                    verbose=False,
                    logger=None
                )
                final_clip.close()
                print(f"Enhanced video with audio saved to: {output_path}")
            else:
                print("No audio found, saving video only")
                import shutil
                shutil.copy2(temp_video_path, output_path)
                print(f"Enhanced video saved to: {output_path}")

            original_clip.close()
            processed_clip.close()
            return True

        except Exception as e:
            print(f"Error in enhanced processing: {e}")
            import traceback
            traceback.print_exc()
            return False

        finally:
            if os.path.exists(temp_video_path):
                os.remove(temp_video_path)

    def track_objects_across_frames(self, video_path, output_path, confidence_threshold=0.5):
        """Track objects across frames with consistent IDs and preserve audio"""
        return self.process_video_with_audio_enhanced(video_path, output_path, confidence_threshold)

# Example usage with advanced features
# advanced_detector = AdvancedVideoObjectDetector()
# advanced_detector.track_objects_across_frames(
#     '/content/drive/MyDrive/TIM/input_video.mp4',
#     '/content/drive/MyDrive/TIM/output/enhanced_video.mp4',
#     confidence_threshold=0.5
# )

class VideoObjectDetector:
    def __init__(self, model_path='/content/drive/MyDrive/TIM/models/'):
        """Initialize the video object detector with pre-trained models"""
        self.model_path = model_path
        self.load_models()

    def load_models(self):
        """Load YOLO and BLIP models"""
        print("Loading models...")

        # Load YOLO model for object detection
        yolo_path = os.path.join(self.model_path, 'yolo_model.pt')
        if os.path.exists(yolo_path):
            self.yolo_model = YOLO(yolo_path)
        else:
            print("YOLO model not found, downloading...")
            self.yolo_model = YOLO('yolov8n.pt')

        # Load BLIP model for image captioning
        blip_processor_path = os.path.join(self.model_path, 'blip_processor')
        blip_model_path = os.path.join(self.model_path, 'blip_model')

        if os.path.exists(blip_processor_path) and os.path.exists(blip_model_path):
            self.blip_processor = BlipProcessor.from_pretrained(blip_processor_path)
            self.blip_model = BlipForConditionalGeneration.from_pretrained(blip_model_path)
        else:
            print("BLIP model not found, downloading...")
            self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
            self.blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

        print("Models loaded successfully!")

    def detect_objects(self, frame):
        """Detect objects in a frame using YOLO"""
        results = self.yolo_model(frame)
        return results[0]  # Return first result

    def draw_outlines(self, frame, detections, confidence_threshold=0.5):
        """Draw outlines around detected objects"""
        annotated_frame = frame.copy()

        # Get detection data
        boxes = detections.boxes
        if boxes is not None:
            for i, box in enumerate(boxes):
                # Get box coordinates and confidence
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                confidence = box.conf[0].cpu().numpy()
                class_id = int(box.cls[0].cpu().numpy())

                # Only draw if confidence is above threshold
                if confidence >= confidence_threshold:
                    # Get class name
                    class_name = self.yolo_model.names[class_id]

                    # Draw bounding box
                    cv2.rectangle(annotated_frame,
                                (int(x1), int(y1)),
                                (int(x2), int(y2)),
                                (0, 255, 0), 2)

                    # Draw label with confidence
                    label = f"{class_name}: {confidence:.2f}"
                    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]

                    # Draw label background
                    cv2.rectangle(annotated_frame,
                                (int(x1), int(y1) - label_size[1] - 10),
                                (int(x1) + label_size[0], int(y1)),
                                (0, 255, 0), -1)

                    # Draw label text
                    cv2.putText(annotated_frame, label,
                              (int(x1), int(y1) - 5),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)

        return annotated_frame

    def process_video(self, video_path, output_path, confidence_threshold=0.5):
        """Process entire video and save with object outlines"""

        # Verify video file first
        if not os.path.exists(video_path):
            print(f"Error: Video file not found at {video_path}")
            return False

        cap = cv2.VideoCapture(video_path)

        if not cap.isOpened():
            print(f"Error: Cannot open video file {video_path}")
            return False

        # Get video properties
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        print(f"Video Properties:")
        print(f"  Total frames: {total_frames}")
        print(f"  FPS: {fps}")
        print(f"  Resolution: {width}x{height}")

        if total_frames == 0:
            print("Error: Video has 0 frames or is corrupted")
            cap.release()
            return False

        # Create output directory if it doesn't exist
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # Set up video writer
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

        if not out.isOpened():
            print(f"Error: Cannot create output video file {output_path}")
            cap.release()
            return False

        frame_count = 0
        successful_frames = 0
        print(f"Processing {total_frames} frames...")

        while True:
            ret, frame = cap.read()
            if not ret:
                print(f"Finished reading frames. Read {frame_count} frames.")
                break

            if frame is None:
                print(f"Warning: Frame {frame_count} is None, skipping...")
                frame_count += 1
                continue

            try:
                # Detect objects in frame
                detections = self.detect_objects(frame)

                # Draw outlines on detected objects
                annotated_frame = self.draw_outlines(frame, detections, confidence_threshold)

                # Write frame to output video
                out.write(annotated_frame)
                successful_frames += 1

            except Exception as e:
                print(f"Error processing frame {frame_count}: {e}")
                # Write original frame if processing fails
                out.write(frame)

            frame_count += 1
            if frame_count % 30 == 0:  # Print progress every 30 frames
                print(f"Processed {frame_count}/{total_frames} frames ({frame_count/total_frames*100:.1f}%)")

        # Release resources
        cap.release()
        out.release()

        print(f"Video processing complete!")
        print(f"  Processed frames: {frame_count}")
        print(f"  Successful frames: {successful_frames}")
        print(f"  Saved to: {output_path}")

    def process_video_with_audio(self, video_path, output_path, confidence_threshold=0.5):
        """Process video with object detection while preserving audio"""

        # Verify video file first
        if not os.path.exists(video_path):
            print(f"Error: Video file not found at {video_path}")
            return False

        # Create temporary file for video without audio
        temp_video_path = tempfile.mktemp(suffix='.mp4')

        try:
            # Process video frames (without audio)
            success = self.process_video(video_path, temp_video_path, confidence_threshold)

            if not success:
                print("Failed to process video frames")
                return False

            # Load original video to extract audio
            print("Extracting and merging audio...")
            original_clip = VideoFileClip(video_path)

            # Load processed video (without audio)
            processed_clip = VideoFileClip(temp_video_path)

            # Set audio from original video to processed video
            if original_clip.audio is not None:
                final_clip = processed_clip.set_audio(original_clip.audio)

                # Write final video with audio
                final_clip.write_videofile(
                    output_path,
                    codec='libx264',
                    audio_codec='aac',
                    verbose=False,
                    logger=None
                )

                # Clean up
                final_clip.close()
                print(f"Video with audio saved to: {output_path}")
            else:
                print("No audio track found in original video")
                # Just copy the processed video
                import shutil
                shutil.copy2(temp_video_path, output_path)
                print(f"Video without audio saved to: {output_path}")

            # Clean up
            original_clip.close()
            processed_clip.close()

            return True

        except Exception as e:
            print(f"Error processing video with audio: {e}")
            return False

        finally:
            # Clean up temporary file
            if os.path.exists(temp_video_path):
                os.remove(temp_video_path)

    def process_video_realtime(self, video_path, confidence_threshold=0.5):
        """Process video in real-time display mode"""
        cap = cv2.VideoCapture(video_path)

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Detect objects in frame
            detections = self.detect_objects(frame)

            # Draw outlines on detected objects
            annotated_frame = self.draw_outlines(frame, detections, confidence_threshold)

            # Display frame
            cv2.imshow('Object Detection', annotated_frame)

            # Break on 'q' key press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

    def get_detection_summary(self, video_path, confidence_threshold=0.5, sample_frames=10):
        """Get summary of detected objects in video"""
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        # Sample frames evenly throughout video
        frame_indices = np.linspace(0, total_frames-1, sample_frames, dtype=int)

        object_counts = {}

        for frame_idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if not ret:
                continue

            # Detect objects
            detections = self.detect_objects(frame)

            # Count objects
            boxes = detections.boxes
            if boxes is not None:
                for box in boxes:
                    confidence = box.conf[0].cpu().numpy()
                    class_id = int(box.cls[0].cpu().numpy())

                    if confidence >= confidence_threshold:
                        class_name = self.yolo_model.names[class_id]
                        object_counts[class_name] = object_counts.get(class_name, 0) + 1

        cap.release()
        return object_counts

# ==========================================
# USAGE EXAMPLES
# ==========================================

# Initialize detector
detector = VideoObjectDetector()

# ==========================================
# VIDEO FILE VERIFICATION AND PROCESSING
# ==========================================

def verify_video_file(video_path):
    """Verify if video file exists and is readable"""
    import os
    if not os.path.exists(video_path):
        print(f"Error: Video file not found at {video_path}")
        return False

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Cannot open video file {video_path}")
        return False

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(f"Video Info:")
    print(f"  Path: {video_path}")
    print(f"  Frames: {frame_count}")
    print(f"  FPS: {fps}")
    print(f"  Resolution: {width}x{height}")

    cap.release()
    return frame_count > 0

# Check if video file exists and get correct path
import os
import glob

# Common video file locations and patterns
possible_paths = [
    '/content/drive/MyDrive/TIM/input_video.mp4',
    '/content/drive/MyDrive/TIM/*.mp4',
    '/content/drive/MyDrive/TIM/*.avi',
    '/content/drive/MyDrive/TIM/*.mov',
    '/content/drive/MyDrive/*.mp4',
    '/content/drive/MyDrive/*.avi',
    '/content/drive/MyDrive/*.mov'
]

input_video = None
for pattern in possible_paths:
    if '*' in pattern:
        # Use glob to find files matching pattern
        files = glob.glob(pattern)
        if files:
            input_video = files[0]  # Use first found file
            break
    else:
        # Check exact path
        if os.path.exists(pattern):
            input_video = pattern
            break

if input_video is None:
    print("No video file found. Please upload a video file to one of these locations:")
    for path in possible_paths[:4]:  # Show first 4 specific paths
        print(f"  {path}")
    print("\nTo upload a video file in Colab:")
    print("1. Click on the folder icon in the left sidebar")
    print("2. Navigate to /content/drive/MyDrive/TIM/")
    print("3. Click upload and select your video file")
else:
    print(f"Found video file: {input_video}")

    # Verify the video file
    if verify_video_file(input_video):
        # Process the video
        output_video = '/content/drive/MyDrive/TIM/output/annotated_video.mp4'

        try:
            # Process video with audio preservation
            success = detector.process_video_with_audio(input_video, output_video, confidence_threshold=0.5)

            if success:
                # Get object detection summary
                summary = detector.get_detection_summary(input_video, confidence_threshold=0.5)
                print("\nObjects detected in video:")
                for obj, count in summary.items():
                    print(f"  {obj}: {count} instances")
            else:
                print("Failed to process video")

        except Exception as e:
            print(f"Error processing video: {e}")

        # Process with advanced detector (with audio)
        try:
            advanced_detector = AdvancedVideoObjectDetector()
            # Use the enhanced processing method
            success = advanced_detector.process_video_with_audio_enhanced(
                input_video,
                '/content/drive/MyDrive/TIM/output/enhanced_video.mp4',
                confidence_threshold=0.5
            )
            if success:
                print("Advanced processing completed successfully!")
            else:
                print("Advanced processing failed")
        except Exception as e:
            print(f"Error with advanced processing: {e}")
            import traceback
            traceback.print_exc()

# ==========================================
# ADDITIONAL AUDIO UTILITIES
# ==========================================

def extract_audio_info(video_path):
    """Extract audio information from video file"""
    try:
        from moviepy.editor import VideoFileClip
        clip = VideoFileClip(video_path)

        if clip.audio is not None:
            print(f"Audio Info for {video_path}:")
            print(f"  Duration: {clip.audio.duration:.2f} seconds")
            print(f"  FPS: {clip.audio.fps}")
            print(f"  Channels: {clip.audio.nchannels}")
            clip.close()
            return True
        else:
            print(f"No audio track found in {video_path}")
            clip.close()
            return False
    except Exception as e:
        print(f"Error extracting audio info: {e}")
        return False

def merge_video_audio_separately(video_path, audio_path, output_path):
    """Merge video and audio from separate files"""
    try:
        from moviepy.editor import VideoFileClip, AudioFileClip

        video_clip = VideoFileClip(video_path)
        audio_clip = AudioFileClip(audio_path)

        final_clip = video_clip.set_audio(audio_clip)
        final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')

        video_clip.close()
        audio_clip.close()
        final_clip.close()

        print(f"Merged video and audio saved to: {output_path}")
        return True

    except Exception as e:
        print(f"Error merging video and audio: {e}")
        return False
    else:
        print("Video file verification failed.")

# ==========================================
# ADVANCED FEATURES
# ==========================================

    def process_video_with_audio_enhanced(self, video_path, output_path, confidence_threshold=0.5):
        """Process video with enhanced object detection while preserving audio"""

        # Verify video file first
        if not os.path.exists(video_path):
            print(f"Error: Video file not found at {video_path}")
            return False

        # Create temporary file for video without audio
        temp_video_path = tempfile.mktemp(suffix='.mp4')

        try:
            # Process video frames with enhanced detection
            cap = cv2.VideoCapture(video_path)

            if not cap.isOpened():
                print(f"Error: Cannot open video file {video_path}")
                return False

            # Get video properties
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            print(f"Processing {total_frames} frames with enhanced detection...")

            # Set up video writer
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(temp_video_path, fourcc, fps, (width, height))

            frame_count = 0
            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                # Detect objects and draw enhanced outlines
                detections = self.detect_objects(frame)
                annotated_frame = self.draw_enhanced_outlines(frame, detections, confidence_threshold)

                out.write(annotated_frame)
                frame_count += 1

                if frame_count % 30 == 0:
                    print(f"Processed {frame_count}/{total_frames} frames ({frame_count/total_frames*100:.1f}%)")

            cap.release()
            out.release()

            # Merge with audio using MoviePy
            print("Merging with audio...")
            from moviepy.editor import VideoFileClip
            original_clip = VideoFileClip(video_path)
            processed_clip = VideoFileClip(temp_video_path)

            if original_clip.audio is not None:
                final_clip = processed_clip.set_audio(original_clip.audio)
                final_clip.write_videofile(
                    output_path,
                    codec='libx264',
                    audio_codec='aac',
                    verbose=False,
                    logger=None
                )
                final_clip.close()
                print(f"Enhanced video with audio saved to: {output_path}")
            else:
                print("No audio found, saving video only")
                import shutil
                shutil.copy2(temp_video_path, output_path)

            original_clip.close()
            processed_clip.close()
            return True

        except Exception as e:
            print(f"Error in enhanced processing: {e}")
            return False

        finally:
            if os.path.exists(temp_video_path):
                os.remove(temp_video_path)


# ==========================================
# TROUBLESHOOTING AND TESTING
# ==========================================

def test_with_sample_video():
    """Create a test video for debugging"""
    import numpy as np

    # Create a simple test video
    test_path = '/content/drive/MyDrive/TIM/test_video.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(test_path, fourcc, 20.0, (640, 480))

    for i in range(100):  # 100 frames
        # Create a frame with a moving rectangle
        frame = np.zeros((480, 640, 3), dtype=np.uint8)
        cv2.rectangle(frame, (i*5, 100), (i*5+100, 200), (0, 255, 0), -1)
        cv2.putText(frame, f'Frame {i}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        out.write(frame)

    out.release()
    print(f"Test video created: {test_path}")
    return test_path

# Test with webcam (if available)
def test_with_webcam():
    """Test object detection with webcam"""
    detector = VideoObjectDetector()

    cap = cv2.VideoCapture(0)  # Try webcam
    if not cap.isOpened():
        print("Webcam not available")
        return

    print("Press 'q' to quit webcam test")
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        detections = detector.detect_objects(frame)
        annotated_frame = detector.draw_outlines(frame, detections, confidence_threshold=0.5)

        cv2.imshow('Webcam Object Detection', annotated_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Alternative: Download a sample video for testing
def download_sample_video():
    """Download a sample video for testing"""
    import requests

    # Sample video URL (replace with actual URL)
    sample_url = "https://sample-videos.com/zip/10/mp4/SampleVideo_1280x720_1mb.mp4"
    output_path = "/content/drive/MyDrive/TIM/sample_video.mp4"

    try:
        print("Downloading sample video...")
        response = requests.get(sample_url)
        with open(output_path, 'wb') as f:
            f.write(response.content)
        print(f"Sample video downloaded to: {output_path}")
        return output_path
    except Exception as e:
        print(f"Error downloading sample video: {e}")
        return None

# Test with audio preservation
# if test_video:
#     detector = VideoObjectDetector()
#     detector.process_video_with_audio(test_video, '/content/drive/MyDrive/TIM/output/test_output_with_audio.mp4')

# ==========================================
# QUICK AUDIO CHECK AND FIX
# ==========================================

def quick_audio_check(video_path):
    """Quick check if video has audio and basic info"""
    try:
        from moviepy.editor import VideoFileClip
        clip = VideoFileClip(video_path)

        print(f"Video: {video_path}")
        print(f"Duration: {clip.duration:.2f} seconds")
        print(f"Video FPS: {clip.fps}")
        print(f"Resolution: {clip.w}x{clip.h}")

        if clip.audio is not None:
            print(f"✓ Audio found - Duration: {clip.audio.duration:.2f}s, FPS: {clip.audio.fps}")
        else:
            print("✗ No audio track found")

        clip.close()
        return clip.audio is not None

    except Exception as e:
        print(f"Error checking video: {e}")
        return False

# ==========================================
# ADVANCED OBJECT TRACKING AND FEATURES
# ==========================================

import json
from collections import defaultdict, deque
import time

class VideoObjectTracker(AdvancedVideoObjectDetector):
    """Advanced video processor with object tracking, analytics, and performance optimization"""

    def __init__(self, model_path='/content/drive/MyDrive/TIM/models/', tracking_enabled=True):
        super().__init__(model_path)
        self.tracking_enabled = tracking_enabled
        self.object_trajectories = defaultdict(list)
        self.frame_analytics = []
        self.processing_times = []

    def calculate_object_center(self, box):
        """Calculate center point of bounding box"""
        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
        center_x = (x1 + x2) / 2
        center_y = (y1 + y2) / 2
        return (center_x, center_y)

    def track_objects_simple(self, current_detections, previous_centers, max_distance=100):
        """Simple object tracking based on distance"""
        current_centers = []
        tracked_objects = []

        boxes = current_detections.boxes
        if boxes is not None:
            for box in boxes:
                center = self.calculate_object_center(box)
                confidence = box.conf[0].cpu().numpy()
                class_id = int(box.cls[0].cpu().numpy())

                # Find closest previous object
                min_distance = float('inf')
                best_match_id = None

                for prev_id, prev_center in previous_centers.items():
                    distance = np.sqrt((center[0] - prev_center[0])**2 + (center[1] - prev_center[1])**2)
                    if distance < min_distance and distance < max_distance:
                        min_distance = distance
                        best_match_id = prev_id

                # Assign ID
                if best_match_id is not None:
                    object_id = best_match_id
                else:
                    object_id = len(current_centers) + len(previous_centers)

                current_centers.append((object_id, center))
                tracked_objects.append({
                    'id': object_id,
                    'box': box,
                    'center': center,
                    'confidence': confidence,
                    'class_id': class_id,
                    'class_name': self.yolo_model.names[class_id]
                })

        return tracked_objects, {obj_id: center for obj_id, center in current_centers}

    def draw_tracked_objects(self, frame, tracked_objects, confidence_threshold=0.5):
        """Draw tracked objects with IDs and trails"""
        annotated_frame = frame.copy()

        # Color palette
        colors = [
            (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0),
            (255, 0, 255), (0, 255, 255), (128, 0, 128), (255, 165, 0),
            (255, 192, 203), (0, 128, 0), (128, 128, 0), (0, 0, 128)
        ]

        for obj in tracked_objects:
            if obj['confidence'] >= confidence_threshold:
                box = obj['box']
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()

                # Get color based on object ID
                color = colors[obj['id'] % len(colors)]

                # Draw bounding box
                cv2.rectangle(annotated_frame,
                            (int(x1), int(y1)),
                            (int(x2), int(y2)),
                            color, 3)

                # Draw center point
                center = obj['center']
                cv2.circle(annotated_frame, (int(center[0]), int(center[1])), 5, color, -1)

                # Draw label with ID
                label = f"ID:{obj['id']} {obj['class_name']}: {obj['confidence']:.2f}"
                label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]

                # Label background
                cv2.rectangle(annotated_frame,
                            (int(x1), int(y1) - label_size[1] - 15),
                            (int(x1) + label_size[0] + 10, int(y1)),
                            color, -1)

                # Label text
                cv2.putText(annotated_frame, label,
                          (int(x1) + 5, int(y1) - 5),
                          cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

                # Store trajectory
                self.object_trajectories[obj['id']].append({
                    'center': center,
                    'timestamp': time.time(),
                    'class_name': obj['class_name']
                })

        return annotated_frame

    def draw_object_trails(self, frame, trail_length=10):
        """Draw movement trails for tracked objects"""
        annotated_frame = frame.copy()
        colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]

        for obj_id, trajectory in self.object_trajectories.items():
            if len(trajectory) > 1:
                # Get recent positions
                recent_positions = trajectory[-trail_length:]
                color = colors[obj_id % len(colors)]

                # Draw trail
                for i in range(1, len(recent_positions)):
                    pt1 = (int(recent_positions[i-1]['center'][0]), int(recent_positions[i-1]['center'][1]))
                    pt2 = (int(recent_positions[i]['center'][0]), int(recent_positions[i]['center'][1]))

                    # Fade trail
                    alpha = i / len(recent_positions)
                    thickness = max(1, int(3 * alpha))
                    cv2.line(annotated_frame, pt1, pt2, color, thickness)

        return annotated_frame

    def process_video_with_tracking(self, video_path, output_path, confidence_threshold=0.5,
                                  show_trails=True, save_analytics=True):
        """Process video with object tracking and analytics"""

        if not os.path.exists(video_path):
            print(f"Error: Video file not found at {video_path}")
            return False

        # Reset tracking data
        self.object_trajectories.clear()
        self.frame_analytics.clear()
        self.processing_times.clear()

        # Create temporary file for video processing
        temp_video_path = tempfile.mktemp(suffix='.mp4')

        try:
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                return False

            # Video properties
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            print(f"Processing {total_frames} frames with object tracking...")

            # Video writer
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(temp_video_path, fourcc, fps, (width, height))

            previous_centers = {}
            frame_count = 0

            while True:
                start_time = time.time()
                ret, frame = cap.read()
                if not ret:
                    break

                # Detect objects
                detections = self.detect_objects(frame)

                # Track objects
                if self.tracking_enabled:
                    tracked_objects, current_centers = self.track_objects_simple(
                        detections, previous_centers
                    )

                    # Draw tracked objects
                    annotated_frame = self.draw_tracked_objects(
                        frame, tracked_objects, confidence_threshold
                    )

                    # Draw trails if enabled
                    if show_trails:
                        annotated_frame = self.draw_object_trails(annotated_frame)

                    previous_centers = current_centers

                    # Store analytics
                    frame_analytics = {
                        'frame': frame_count,
                        'objects_detected': len(tracked_objects),
                        'object_classes': list(set([obj['class_name'] for obj in tracked_objects])),
                        'timestamp': time.time()
                    }
                    self.frame_analytics.append(frame_analytics)

                else:
                    # Simple detection without tracking
                    annotated_frame = self.draw_enhanced_outlines(
                        frame, detections, confidence_threshold
                    )

                # Write frame
                out.write(annotated_frame)

                # Performance tracking
                processing_time = time.time() - start_time
                self.processing_times.append(processing_time)

                frame_count += 1
                if frame_count % 30 == 0:
                    avg_time = np.mean(self.processing_times[-30:])
                    fps_actual = 1.0 / avg_time if avg_time > 0 else 0
                    print(f"Processed {frame_count}/{total_frames} frames "
                          f"({frame_count/total_frames*100:.1f}%) - "
                          f"Processing FPS: {fps_actual:.1f}")

            cap.release()
            out.release()

            # Save analytics
            if save_analytics:
                self.save_analytics(video_path, output_path)

            # Merge with audio
            print("Adding audio track...")
            return self.merge_with_audio(video_path, temp_video_path, output_path)

        except Exception as e:
            print(f"Error in tracking processing: {e}")
            return False
        finally:
            if os.path.exists(temp_video_path):
                os.remove(temp_video_path)

    def save_analytics(self, input_path, output_path):
        """Save detailed analytics to JSON file"""
        analytics_data = {
            'input_video': input_path,
            'output_video': output_path,
            'processing_stats': {
                'total_frames': len(self.frame_analytics),
                'avg_processing_time': np.mean(self.processing_times),
                'total_processing_time': sum(self.processing_times),
                'avg_objects_per_frame': np.mean([f['objects_detected'] for f in self.frame_analytics])
            },
            'object_trajectories': {
                str(obj_id): [
                    {
                        'center': [float(p['center'][0]), float(p['center'][1])],
                        'class_name': p['class_name']
                    } for p in trajectory
                ] for obj_id, trajectory in self.object_trajectories.items()
            },
            'frame_analytics': self.frame_analytics,
            'performance_metrics': {
                'min_processing_time': min(self.processing_times),
                'max_processing_time': max(self.processing_times),
                'std_processing_time': np.std(self.processing_times)
            }
        }

        analytics_path = output_path.replace('.mp4', '_analytics.json')
        with open(analytics_path, 'w') as f:
            json.dump(analytics_data, f, indent=2)

        print(f"Analytics saved to: {analytics_path}")

        # Print summary
        print("\n--- Processing Summary ---")
        print(f"Total frames processed: {analytics_data['processing_stats']['total_frames']}")
        print(f"Average processing time per frame: {analytics_data['processing_stats']['avg_processing_time']:.3f}s")
        print(f"Average objects per frame: {analytics_data['processing_stats']['avg_objects_per_frame']:.1f}")
        print(f"Unique objects tracked: {len(self.object_trajectories)}")

    def merge_with_audio(self, original_path, processed_path, output_path):
        """Merge processed video with original audio"""
        try:
            from moviepy.editor import VideoFileClip

            original_clip = VideoFileClip(original_path)
            processed_clip = VideoFileClip(processed_path)

            if original_clip.audio is not None:
                final_clip = processed_clip.set_audio(original_clip.audio)
                final_clip.write_videofile(
                    output_path,
                    codec='libx264',
                    audio_codec='aac',
                    verbose=False,
                    logger=None
                )
                final_clip.close()
                print(f"Video with tracking and audio saved to: {output_path}")
            else:
                import shutil
                shutil.copy2(processed_path, output_path)
                print(f"Video with tracking (no audio) saved to: {output_path}")

            original_clip.close()
            processed_clip.close()
            return True

        except Exception as e:
            print(f"Error merging audio: {e}")
            return False

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
0: 384x640 1 frisbee, 2 sports balls, 4 kites, 10.5ms
Speed: 2.3ms preprocess, 10.5ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 frisbee, 2 sports balls, 4 kites, 10.2ms
Speed: 2.3ms preprocess, 10.2ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 frisbee, 2 sports balls, 4 kites, 9.6ms
Speed: 2.3ms preprocess, 9.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 frisbee, 2 sports balls, 4 kites, 9.8ms
Speed: 2.3ms preprocess, 9.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 frisbee, 2 sports balls, 4 kites, 8.7ms
Speed: 2.4ms preprocess, 8.7ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 frisbee, 2 sports balls, 4 kites, 9.6ms
Speed: 2.2ms preprocess, 9.6ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 frisbee, 2 sports 




Enhanced video with audio saved to: /content/drive/MyDrive/TIM/output/enhanced_video.mp4
Advanced processing completed successfully!
