In [3]:
import os
import cv2
import numpy as np
from skimage.metrics import structural_similarity as ssim

def extract_frames(video_path, output_dir, interval_sec=3):
    os.makedirs(output_dir, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    interval_frames = interval_sec * fps
    count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        if int(cap.get(cv2.CAP_PROP_POS_FRAMES)) % interval_frames == 0:
            frame_filename = os.path.join(output_dir, f"frame_{count}.png")
            cv2.imwrite(frame_filename, frame)
            count += 1
    
    cap.release()
    print(f"Frames extracted from {video_path} to {output_dir}.")

# import cv2
# import numpy as np
# from skimage.metrics import structural_similarity as ssim

# def remove_similar_frames(frame_dir, threshold=0.9, win_size=3):
#     def compare_images(img1, img2):
#         # Ensure images are at least 7x7 pixels
#         if img1.shape[0] < win_size or img1.shape[1] < win_size:
#             return 0
        
#         # Compute SSIM between two images
#         return ssim(img1, img2, multichannel=True, win_size=win_size)
    
#     frames = [f for f in os.listdir(frame_dir) if f.endswith('.png')]
#     frame_paths = [os.path.join(frame_dir, f) for f in frames]
    
#     # Load all frames
#     images = [cv2.imread(p) for p in frame_paths]
    
#     # Ensure all images are at least 7x7
#     images = [img for img in images if img.shape[0] >= win_size and img.shape[1] >= win_size]
    
#     to_remove = set()
#     for i in range(len(images)):
#         for j in range(i + 1, len(images)):
#             if i not in to_remove and j not in to_remove:
#                 similarity = compare_images(images[i], images[j])
#                 if similarity > threshold:
#                     to_remove.add(j)
    
#     # Remove similar frames
#     for i in sorted(to_remove, reverse=True):
#         os.remove(frame_paths[i])
    
#     print(f"Removed similar frames from {frame_dir}.")


def process_videos(video_dir='videos', frame_dir='frames'):
    os.makedirs(frame_dir, exist_ok=True)
    
    for filename in os.listdir(video_dir):
        if filename.endswith(('.mp4', '.avi', '.mov')):
            video_path = os.path.join(video_dir, filename)
            video_output_dir = os.path.join(frame_dir, os.path.splitext(filename)[0])
            
            # Extract frames every 3 seconds
            extract_frames(video_path, video_output_dir, interval_sec=3)
            
            # Remove similar frames
            remove_similar_frames(video_output_dir, threshold=0.9)

if __name__ == "__main__":
    process_videos()


Frames extracted from videos/2.mp4 to frames/2.
Removed similar frames from frames/2.


In [6]:
import os
import cv2
import shutil
from skimage.metrics import structural_similarity as ssim
import numpy as np

def consolidate_frames(source_dirs, target_dir, image_prefix="image"):
    """
    Consolidate frames from multiple directories into a single directory and rename them.

    Parameters:
    - source_dirs: List of source directories containing frames.
    - target_dir: Directory where consolidated and renamed frames will be saved.
    - image_prefix: Prefix for naming images in the target directory.
    """
    os.makedirs(target_dir, exist_ok=True)
    image_count = 1

    for source_dir in source_dirs:
        for filename in os.listdir(source_dir):
            if filename.endswith('.png') or filename.endswith('.jpg'):
                source_path = os.path.join(source_dir, filename)
                target_path = os.path.join(target_dir, f"{image_prefix}{image_count}.jpg")
                shutil.copy(source_path, target_path)
                image_count += 1

    print(f"Consolidated and renamed frames into {target_dir}.")

# def remove_similar_frames(frame_dir, threshold=0.9, win_size=3):
#     """
#     Remove similar frames based on SSIM.

#     Parameters:
#     - frame_dir: Directory containing frames.
#     - threshold: SSIM similarity threshold for considering frames as similar.
#     - win_size: Window size for SSIM computation.
#     """
#     def compare_images(img1, img2):
#         # Ensure images are at least win_size x win_size
#         if img1.shape[0] < win_size or img1.shape[1] < win_size:
#             return 0
        
#         # Compute SSIM between two images
#         return ssim(img1, img2, multichannel=True, win_size=win_size)
    
#     frames = [f for f in os.listdir(frame_dir) if f.endswith('.jpg')]
#     frame_paths = [os.path.join(frame_dir, f) for f in frames]
    
#     # Load all frames
#     images = [cv2.imread(p) for p in frame_paths]
    
#     # Ensure all images are at least win_size x win_size
#     images = [img for img in images if img.shape[0] >= win_size and img.shape[1] >= win_size]
    
#     to_remove = set()
#     for i in range(len(images)):
#         for j in range(i + 1, len(images)):
#             if i not in to_remove and j not in to_remove:
#                 similarity = compare_images(images[i], images[j])
#                 if similarity > threshold:
#                     to_remove.add(j)
    
#     # Remove similar frames
#     for i in sorted(to_remove, reverse=True):
#         os.remove(frame_paths[i])
    
#     print(f"Removed similar frames from {frame_dir}.")

# Example usage
source_dirs = [f'frames/{i}' for i in range(1, 2)]  # Directories containing frames
target_dir = 'consolidated_frames'  # Directory to save consolidated frames

# Consolidate and rename frames
consolidate_frames(source_dirs, target_dir, image_prefix="image")

# Remove similar frames
# remove_similar_frames(target_dir, threshold=0.9, win_size=3)


Consolidated and renamed frames into consolidated_frames.


In [8]:
import os
import cv2
import numpy as np
from skimage.metrics import structural_similarity as ssim

def load_images_from_folder(folder):
    images = []
    filenames = []
    for filename in os.listdir(folder):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                filenames.append(img_path)
    return images, filenames

def compare_images(img1, img2, win_size=3):
    # Ensure images are at least win_size x win_size
    if img1.shape[0] < win_size or img1.shape[1] < win_size:
        return 0
    
    # Convert to grayscale for SSIM comparison
    img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    img2_gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
    
    # Compute SSIM between two images
    return ssim(img1_gray, img2_gray, win_size=win_size)

def remove_duplicate_images(folder, threshold=0.9, win_size=3):
    images, filenames = load_images_from_folder(folder)
    to_remove = set()
    
    for i in range(len(images)):
        if i in to_remove:
            continue
        for j in range(i + 1, len(images)):
            if j in to_remove:
                continue
            similarity = compare_images(images[i], images[j], win_size)
            if similarity > threshold:
                to_remove.add(j)
    
    # Remove duplicate images
    for i in sorted(to_remove, reverse=True):
        os.remove(filenames[i])
    
    print(f"Removed {len(to_remove)} duplicate images from {folder}.")

# Example usage
image_folder = '../labeled_image'  # Folder containing labeled images

# Remove duplicate images
remove_duplicate_images(image_folder, threshold=0.9, win_size=3)


Removed 7 duplicate images from ../labeled_image.
