In [2]:
%pip install transformers

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
from PIL import Image
from fpdf import FPDF
from moviepy.editor import VideoFileClip
from transformers import ViTFeatureExtractor, ViTForImageClassification
import torch
import os

def classify_frames(frames, confidence_threshold=0.985):
    # Load the ViT model and feature extractor
    feature_extractor = ViTFeatureExtractor.from_pretrained("JuanMa360/text-in-image-detection")
    model = ViTForImageClassification.from_pretrained("JuanMa360/text-in-image-detection")

    important_frames = []

    for i, frame in enumerate(frames):
        # Preprocess the image
        inputs = feature_extractor(images=frame, return_tensors="pt")

        # Perform image classification
        outputs = model(**inputs)
        logits = outputs.logits

        # Get the probability for the 'image_with_text' class
        probability = torch.nn.functional.softmax(logits, dim=1)[0, 2].item()

        # Check if the probability is above the threshold
        if probability > confidence_threshold:
            important_frames.append(frame)

    return important_frames


def extract_frames(video_path, num_frames=5):
    clip = VideoFileClip(video_path)
    frames = []

    frame_interval = max(int(clip.fps * clip.duration) // num_frames, 5)

    for i in range(0, int(clip.fps * clip.duration), int(clip.duration)):
        frame = clip.get_frame(i / clip.fps)
        pil_image = Image.fromarray((frame * 255).astype('uint8'))
        frames.append(pil_image)
    print("len" + str(len(frames)))
    return frames

def generate_pdf(frames, output_pdf_path):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.set_font("Arial", size=12)

    for i, frame in enumerate(frames):
        image_filename = f"frame_{i + 1}.png"
        frame.save(image_filename, format='PNG')
        pdf.add_page()
        pdf.image(image_filename, x=15, y=15, w=180)
        os.remove(image_filename)

    pdf.output(output_pdf_path)

if __name__ == "__main__":
    video_path = 'video.mp4'
    output_pdf_path = 'output_pdf.pdf'

    frames = extract_frames(video_path, num_frames=5)
    
    # Classify frames to select only important ones
    
    important_frames = classify_frames(frames)

    if important_frames:
        generate_pdf(important_frames, output_pdf_path)
        print(f"PDF generated successfully at {output_pdf_path}")
    else:
        print("No important frames found.")


len30
PDF generated successfully at output_pdf.pdf
