In [2]:
%pip install transformers torchvision imagehash

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
from PIL import Image
from fpdf import FPDF
from moviepy.editor import VideoFileClip
from transformers import ViTFeatureExtractor, ViTForImageClassification
import torch
from imagehash import phash
import os


text_feature_extractor = ViTFeatureExtractor.from_pretrained("JuanMa360/text-in-image-detection")
text_model = ViTForImageClassification.from_pretrained("JuanMa360/text-in-image-detection")



scene_model = None

def compute_text_probability(frame):
    inputs_text = text_feature_extractor(images=frame, return_tensors="pt")
    outputs_text = text_model(**inputs_text)
    logits_text = outputs_text.logits
    probability_text = torch.nn.functional.softmax(logits_text, dim=1)[0, 2].item()
    return probability_text

def hash_image(image):
    return phash(image)

def classify_frames(frames, confidence_threshold_text=0.985, hash_threshold=10, confidence_threshold_scene=0.5, similarity_threshold=0.5):
    processed_frames = set()
    important_frames = []


    for i, frame in enumerate(frames):

        probability_text = compute_text_probability(frame)


        if probability_text > confidence_threshold_text:
            important_frames.append(frame)
            image_hash = hash_image(frame)
            processed_frames.add(image_hash)


    for i in range(1, len(important_frames)):
        current_frame = important_frames[i]
        previous_frame = important_frames[i - 1]

        similarity = 1.0 - (phash(current_frame) - phash(previous_frame)) / 64.0


        if similarity < similarity_threshold:

            important_frames.append(current_frame)


    for frame in important_frames:
        image_hash = hash_image(frame)


        scene_probability = 0.0
        if scene_model is not None:

            scene_probability = 0.0

        if scene_probability > confidence_threshold_scene:
            processed_frames.add(image_hash)

    return important_frames

def extract_frames(video_path, num_frames=5):
    clip = VideoFileClip(video_path)
    frames = []

    frame_interval = max(int(clip.fps * clip.duration) // num_frames, 5)

    for i in range(0, int(clip.fps * clip.duration), frame_interval):
        frame = clip.get_frame(i / clip.fps)
        pil_image = Image.fromarray(frame.astype('uint8'), mode='RGB')
        frames.append(pil_image)

    return frames

def generate_pdf(frames, output_pdf_path):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.set_font("Arial", size=12)

    for i in range(0, len(frames), 2):

        pdf.add_page()


        image_filename_1 = f"frame_{i + 1}.png"
        frames[i].save(image_filename_1, format='PNG')

        available_width = pdf.w - 30  # 15 units margin on both sides
        available_height = pdf.h - 30  # 15 units margin on both top and bottom

        # Calculate the aspect ratio of the images
        aspect_ratio_1 = frames[i].width / frames[i].height

        # Calculate the width and height of the first image to fit the available space
        width_1 = min(available_width, frames[i].width)
        height_1 = width_1 / aspect_ratio_1

        # Calculate the x and y positions for the first image
        x_1 = 15  # Left margin
        y_1 = 15  # Top margin

        # Add the first image
        pdf.image(image_filename_1, x=x_1, y=y_1, w=width_1, h=height_1)
        os.remove(image_filename_1)

        # Add the second image if available
        if i + 1 < len(frames):
            image_filename_2 = f"frame_{i + 2}.png"
            frames[i + 1].save(image_filename_2, format='PNG')

            # Calculate the width and height of the second image to fit the available space
            width_2 = min(available_width, frames[i + 1].width)
            height_2 = width_2 / aspect_ratio_1  # Use the same aspect ratio as the first image

            # Calculate the x and y positions for the second image
            x_2 = 15  # Left margin
            y_2 = y_1 + height_1  # Place the second image below the first

            # Add the second image
            pdf.image(image_filename_2, x=x_2, y=y_2, w=width_2, h=height_2)
            os.remove(image_filename_2)

    pdf.output(output_pdf_path)



In [7]:
video_path = r"video.mp4"
output_pdf_path = 'pdf.pdf'

frames = extract_frames(video_path, num_frames=20)
important_frames = classify_frames(frames)

if important_frames:
    generate_pdf(important_frames, output_pdf_path)
    print(f"PDF generated successfully at {output_pdf_path}")
else:
    print("No important frames found.")

PDF generated successfully at pdf.pdf
