In [1]:
!pip install opencv-python-headless



In [2]:
# Install PyTorch and torchvision using pip
!pip install torch torchvision



In [3]:
import sys
print(sys.version)

3.10.9 | packaged by Anaconda, Inc. | (main, Mar  8 2023, 10:42:25) [MSC v.1916 64 bit (AMD64)]


In [4]:
import cv2
import os
import torch
from torchvision import models, transforms
from PIL import Image, ImageDraw, ImageFont

In [5]:
def get_video_fps(video_path):
    """
    Returns the frames per second (fps) of the video.

    Parameters:
    video_path (str): Path to the video file.
    """
    # Open the video
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print(f"Error: Could not open video {video_path}")
        return None

    # Get fps
    fps = video.get(cv2.CAP_PROP_FPS)
    video.release()  # Release the video after getting the information
    return fps


In [6]:
def extract_frames(video_path, output_folder, n):
    """
    Extracts every Nth frame from a video and saves them as images.
    
    Parameters:
    video_path (str): Path to the video file.
    output_folder (str): Folder where extracted images will be saved.
    n (int): Extract every Nth frame.
    """
    # Make sure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Open the video
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print(f"Error: Could not open video {video_path}")
        return
    
    frame_count = 0
    extracted_count = 0
    
    while True:
        # Read the next frame
        success, frame = video.read()
        if not success:
            break  # No more frames or error
        
        # Check if this frame is the Nth frame
        if frame_count % n == 0:
            # Save the frame
            output_path = os.path.join(output_folder, f"frame_{extracted_count:04d}.jpg")
            cv2.imwrite(output_path, frame)
            extracted_count += 1
        
        frame_count += 1
    
    # Release the video after processing
    video.release()
    #print(f"Done! Extracted {extracted_count} frames.")

# # Example usage
# video_path = r"C:\Users\ravin\Downloads\Project\VideoData\k643b705c-30000157781315_009_mezzCrop.mp4"
# output_folder = r"C:\Users\ravin\Downloads\Project\framesForVideos\k643b705c-30000157781315_009_mezzCrop"
# n = get_video_fps(video_path)  # Change this based on your video's frame rate and required detail

# extract_frames(video_path, output_folder, n)


In [7]:
def process_video_folder(input_folder, output_base_folder, frame_extraction_rate_func):
    """
    Processes each video in the input folder, extracting frames according to the specified frame rate.
    
    Parameters:
    input_folder (str): Folder containing the video files.
    output_base_folder (str): Base output folder where extracted frames for each video will be saved.
    frame_extraction_rate_func (function): Function to determine the frame extraction rate (n).
    """
    # Ensure the output base folder exists
    if not os.path.exists(output_base_folder):
        os.makedirs(output_base_folder)
    
    # Iterate over all files in the input folder
    for file in os.listdir(input_folder):
        video_path = os.path.join(input_folder, file)
        if os.path.isfile(video_path) and video_path.endswith(('.mp4', '.avi', '.mov')):
            # Create a dedicated folder for this video's frames
            output_folder = os.path.join(output_base_folder, os.path.splitext(os.path.basename(video_path))[0])
            n = frame_extraction_rate_func(video_path)
            extract_frames(video_path, output_folder, n)


In [26]:
# Example usage
input_folder = r"C:\Users\ravin\Downloads\Project\VideoData"
output_base_folder = r"C:\Users\ravin\Downloads\Project\framesForVideos"
process_video_folder(input_folder, output_base_folder, get_video_fps)

In [9]:
!pip install matplotlib



In [25]:
# Load a pre-trained Faster R-CNN model
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
])

COCO_CATEGORIES = {
    1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane',
    6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light',
    11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench',
    16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep',
    21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe',
    27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase',
    34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite',
    39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard',
    43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup',
    48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana',
    53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot',
    58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair',
    63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table',
    70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote',
    76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven',
    80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book',
    85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier',
    90: 'toothbrush'
}

img_path = r'C:\Users\ravin\Downloads\Project\framesForVideos\rx913t447-30000157760400_25\frame_0025.jpg'  # Update this path
img = Image.open(img_path)

# Transform the image
img_tensor = transform(img)


with torch.no_grad():
    prediction = model([img_tensor])

# Draw bounding boxes and labels
draw = ImageDraw.Draw(img)
for i, box in enumerate(prediction[0]['boxes']):
    score = prediction[0]['scores'][i].item()
    label_id = prediction[0]['labels'][i].item()
    label = COCO_CATEGORIES.get(label_id, 'Unknown')

    # Draw the box if score is above a threshold, e.g., 0.5
    if score > 0.5:
        box = box.detach().numpy()
        draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline="red", width=3)
        draw.text((box[0], box[1]), f"{label} {score:.3f}", fill="red")
        
img.show()


In [27]:
print(model)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(