In [1]:
!pip install opencv-python-headless pillow fpdf


[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Downloading opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB 1.4 MB/s eta 0:00:29
   ---------------------------------------- 0.1/38.8 MB 1.1 MB/s eta 0:00:36
   ---------------------------------------- 0.1/38.8 MB 871.5 kB/s eta 0:00:45
   ---------------------------------------- 0.2/38.8 MB 833.5 kB/s eta 0:00:47
   ----------------------------

In [2]:
import cv2
from PIL import Image
from fpdf import FPDF
import os

def extract_frames(video_path, output_folder):
    # Load the video file
    video_capture = cv2.VideoCapture(video_path)
    success, frame = video_capture.read()
    count = 0

    # Make sure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Extract frames
    while success:
        # Save frame as image file
        frame_path = os.path.join(output_folder, f"frame_{count}.jpg")
        cv2.imwrite(frame_path, frame)
        success, frame = video_capture.read()
        count += 1

    video_capture.release()
    return [os.path.join(output_folder, f"frame_{i}.jpg") for i in range(count)]

def create_pdf_from_images(image_paths, pdf_path):
    pdf = FPDF()
    
    for image_path in image_paths:
        image = Image.open(image_path)
        
        # Convert to RGB mode if not already
        if image.mode != 'RGB':
            image = image.convert('RGB')

        # Get the image size
        width, height = image.size

        # Calculate page size in mm (FPDF uses mm) from pixels
        width_mm = width * 0.264583
        height_mm = height * 0.264583

        # Add a new page with calculated size
        pdf.add_page()
        pdf.image(image_path, 0, 0, width_mm, height_mm)

    pdf.output(pdf_path, "F")

def main(video_path, output_folder, pdf_path):
    # Step 1: Extract frames from the video
    image_paths = extract_frames(video_path, output_folder)
    
    # Step 2: Create PDF from the extracted images
    create_pdf_from_images(image_paths, pdf_path)

    print(f"PDF created successfully: {pdf_path}")

# Set paths
video_path = "Z:\\IMPORTANT\\local H\\GENERAL STUDY\\ONLINE LEARNING MODULE\\HCL_CERTIFICATE_ONBOARD\\SNOWFLAKE TRAINING\\lms_notes\\video_to_pdf\\LMS-SNOW-L1.mp4"
output_folder = "extracted_images"
pdf_path = "output.pdf"

# Run the main function
main(video_path, output_folder, pdf_path)


PDF created successfully: output.pdf


In [2]:
from skimage.metrics import structural_similarity as ssim

In [5]:
import cv2
from PIL import Image
from fpdf import FPDF
import os
from skimage.metrics import structural_similarity as ssim
import numpy as np
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")

def extract_unique_frames(video_path, output_folder, similarity_threshold=0.95):
    """
    Extracts unique frames from a video file based on structural similarity.
    
    Parameters:
        video_path (str): Path to the video file.
        output_folder (str): Directory to save the extracted images.
        similarity_threshold (float): Threshold for similarity between frames (0-1).
        
    Returns:
        List of paths to the unique image frames.
    """
    # Initialize video capture and variables
    video_capture = cv2.VideoCapture(video_path)
    success, frame = video_capture.read()  # First frame read, assigning both 'success' and 'frame'
    count = 0
    unique_images = []

    # Make sure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Loop to process frames
    while success:
        frame_path = os.path.join(output_folder, f"frame_{count}.jpg")
        
        # For the first frame, save it directly without comparison
        if count > 0:
            # Convert frames to grayscale for comparison
            prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
            curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            # Calculate structural similarity
            score, _ = ssim(prev_gray, curr_gray, full=True)
            
            # Skip if similar to the previous frame
            if score >= similarity_threshold:
                logging.info(f"Duplicate frame {count} detected (similarity {score:.2f}). Skipping...")
                success, frame = video_capture.read()  # Read the next frame
                count += 1
                continue
        
        # Save unique frame and update the list
        cv2.imwrite(frame_path, frame)
        unique_images.append(frame_path)
        logging.info(f"Unique frame {count} saved: {frame_path}")
        
        # Update previous frame and read the next one
        prev_frame = frame
        success, frame = video_capture.read()  # Read the next frame
        count += 1

    video_capture.release()
    return unique_images



def create_pdf_from_images(image_paths, pdf_path):
    """
    Create a PDF from a list of image paths.
    
    Parameters:
        image_paths (list of str): Paths to images to be added to the PDF.
        pdf_path (str): Path where the PDF should be saved.
    """
    pdf = FPDF()
    
    for image_path in image_paths:
        image = Image.open(image_path)
        
        # Convert to RGB if not already
        if image.mode != 'RGB':
            image = image.convert('RGB')

        # Get the image size
        width, height = image.size

        # Convert size from pixels to mm
        width_mm = width * 0.264583
        height_mm = height * 0.264583

        # Add image to PDF as a new page
        pdf.add_page()
        pdf.image(image_path, 0, 0, width_mm, height_mm)
    
    pdf.output(pdf_path, "F")
    logging.info(f"PDF created successfully: {pdf_path}")

def clean_up_images(image_paths):
    """
    Deletes images from disk after creating the PDF.
    
    Parameters:
        image_paths (list of str): Paths to images to be deleted.
    """
    for image_path in image_paths:
        os.remove(image_path)
        logging.info(f"Deleted image: {image_path}")

def main(video_path, output_folder, pdf_path):
    # Step 1: Extract unique frames from the video
    logging.info("Starting to extract unique frames from the video...")
    unique_image_paths = extract_unique_frames(video_path, output_folder)
    
    # Step 2: Create PDF from unique images
    logging.info("Creating PDF from unique images...")
    create_pdf_from_images(unique_image_paths, pdf_path)
    
    # Step 3: Clean up extracted images to save disk space
    logging.info("Cleaning up temporary images...")
    clean_up_images(unique_image_paths)

    logging.info("Process completed successfully!")

# Set paths
video_path = "Z:\\IMPORTANT\\local H\\GENERAL STUDY\\ONLINE LEARNING MODULE\\HCL_CERTIFICATE_ONBOARD\\SNOWFLAKE TRAINING\\lms_notes\\video_to_pdf\\LMS-SNOW-L1.mp4"
output_folder = "extracted_images"
pdf_path = "output.pdf"

# Run the main function
main(video_path, output_folder, pdf_path)


2024-11-11 01:41:05,375 - Starting to extract unique frames from the video...


2024-11-11 01:41:05,416 - Unique frame 0 saved: extracted_images\frame_0.jpg
2024-11-11 01:41:05,675 - Duplicate frame 1 detected (similarity 0.96). Skipping...
2024-11-11 01:41:05,864 - Duplicate frame 2 detected (similarity 0.96). Skipping...
2024-11-11 01:41:06,057 - Duplicate frame 3 detected (similarity 0.96). Skipping...
2024-11-11 01:41:06,243 - Duplicate frame 4 detected (similarity 0.96). Skipping...
2024-11-11 01:41:06,440 - Duplicate frame 5 detected (similarity 0.96). Skipping...
2024-11-11 01:41:06,734 - Duplicate frame 6 detected (similarity 0.96). Skipping...
2024-11-11 01:41:06,918 - Duplicate frame 7 detected (similarity 0.96). Skipping...
2024-11-11 01:41:07,131 - Unique frame 8 saved: extracted_images\frame_8.jpg
2024-11-11 01:41:07,317 - Duplicate frame 9 detected (similarity 1.00). Skipping...
2024-11-11 01:41:07,495 - Duplicate frame 10 detected (similarity 0.99). Skipping...
2024-11-11 01:41:07,680 - Duplicate frame 11 detected (similarity 0.99). Skipping...
2024

In [9]:
from fpdf import FPDF
from PIL import Image
import cv2
import os
from skimage.metrics import structural_similarity as ssim
import numpy as np
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")

def extract_unique_frames(video_path, output_folder, similarity_threshold=0.95):
    """
    Extracts unique frames from a video file based on structural similarity.
    
    Parameters:
        video_path (str): Path to the video file.
        output_folder (str): Directory to save the extracted images.
        similarity_threshold (float): Threshold for similarity between frames (0-1).
        
    Returns:
        List of paths to the unique image frames.
    """
    video_capture = cv2.VideoCapture(video_path)
    success, frame = video_capture.read()
    count = 0
    unique_images = []

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    while success:
        frame_path = os.path.join(output_folder, f"frame_{count}.jpg")
        
        if count > 0:
            prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
            curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            score, _ = ssim(prev_gray, curr_gray, full=True)
            
            if score >= similarity_threshold:
                logging.info(f"Duplicate frame {count} detected (similarity {score:.2f}). Skipping...")
                success, frame = video_capture.read()
                count += 1
                continue
        
        cv2.imwrite(frame_path, frame)
        unique_images.append(frame_path)
        logging.info(f"Unique frame {count} saved: {frame_path}")
        
        prev_frame = frame
        success, frame = video_capture.read()
        count += 1

    video_capture.release()
    return unique_images

def create_pdf_from_images(image_paths, pdf_path):
    """
    Create a PDF from a list of image paths in landscape mode.
    
    Parameters:
        image_paths (list of str): Paths to images to be added to the PDF.
        pdf_path (str): Path where the PDF should be saved.
    """
    pdf = FPDF(orientation="L", unit="mm", format="A4")
    
    for image_path in image_paths:
        image = Image.open(image_path)
        
        if image.mode != 'RGB':
            image = image.convert('RGB')

        # Get the size of A4 page in landscape in mm
        a4_width_mm = 297
        a4_height_mm = 210

        # Add image as a new page, scaled to fit A4 landscape
        pdf.add_page()
        pdf.image(image_path, x=0, y=0, w=a4_width_mm, h=a4_height_mm)
    
    pdf.output(pdf_path, "F")
    logging.info(f"PDF created successfully: {pdf_path}")

def main(video_path, output_folder, pdf_path):
    # Step 1: Extract unique frames from the video
    logging.info("Starting to extract unique frames from the video...")
    unique_image_paths = extract_unique_frames(video_path, output_folder)
    
    # Step 2: Create PDF from unique images
    logging.info("Creating PDF from unique images...")
    create_pdf_from_images(unique_image_paths, pdf_path)

    logging.info("Process completed successfully!")

# Set paths
video_path = "Z:\\IMPORTANT\\local H\\GENERAL STUDY\\ONLINE LEARNING MODULE\\HCL_CERTIFICATE_ONBOARD\\SNOWFLAKE TRAINING\\lms_notes\\video_to_pdf\\lms_snow_v4.mp4"
output_folder = "extracted_images_v4"
pdf_path = "output_lms_v4.pdf"

# Run the main function
main(video_path, output_folder, pdf_path)


2024-11-11 01:55:42,686 - Starting to extract unique frames from the video...
2024-11-11 01:55:42,721 - Unique frame 0 saved: extracted_images_v4\frame_0.jpg
2024-11-11 01:55:42,849 - Duplicate frame 1 detected (similarity 0.96). Skipping...
2024-11-11 01:55:43,005 - Duplicate frame 2 detected (similarity 0.96). Skipping...
2024-11-11 01:55:43,145 - Duplicate frame 3 detected (similarity 0.96). Skipping...
2024-11-11 01:55:43,278 - Duplicate frame 4 detected (similarity 0.96). Skipping...
2024-11-11 01:55:43,402 - Duplicate frame 5 detected (similarity 0.96). Skipping...
2024-11-11 01:55:43,532 - Duplicate frame 6 detected (similarity 0.96). Skipping...
2024-11-11 01:55:43,670 - Duplicate frame 7 detected (similarity 0.96). Skipping...
2024-11-11 01:55:43,817 - Unique frame 8 saved: extracted_images_v4\frame_8.jpg
2024-11-11 01:55:43,948 - Duplicate frame 9 detected (similarity 1.00). Skipping...
2024-11-11 01:55:44,087 - Duplicate frame 10 detected (similarity 0.98). Skipping...
2024-