In [None]:
!pip install super_image

Collecting super_image
  Downloading super_image-0.1.7-py3-none-any.whl.metadata (14 kB)
Downloading super_image-0.1.7-py3-none-any.whl (91 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.0/91.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: super_image
Successfully installed super_image-0.1.7


In [None]:
import cv2
from PIL import Image
import torch
from super_image import CarnModel, ImageLoader
import numpy as np
import torchvision.transforms as transforms
import os


In [None]:
# Load the CarnModel for super-resolution with scale 4
model = CarnModel.from_pretrained('eugenesiow/carn-bam', scale=4)  # You can adjust the scale as needed

# Load the video
input_video_path = "D02_20240705162958 (online-video-cutter.com) (8).mp4"
output_video_path = "D02_20240705162958 (online-video-cutter.com) (8) output.mp4"

# Load the pre-trained face detector (Haar Cascade)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Open the input video
video = cv2.VideoCapture(input_video_path)

# Get the frame width, height, and FPS
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video.get(cv2.CAP_PROP_FPS))

# Prepare the output video writer
output_video = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

# Define a tensor-to-PIL conversion utility
to_pil_image = transforms.ToPILImage()

# Loop through each frame in the video
while True:
    ret, frame = video.read()
    if not ret:
        break  # Exit loop if there are no more frames

    # Convert the frame to grayscale for face detection
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    if len(faces) > 0:
        # Crop the first detected face
        x, y, w, h = faces[0]  # Assuming the first detected face is the one we want to crop
        face_frame = frame[y:y+h, x:x+w]

        # Convert the cropped face to a PIL Image for processing
        pil_image = Image.fromarray(cv2.cvtColor(face_frame, cv2.COLOR_BGR2RGB))  # Convert BGR to RGB for PIL

        # Load the image as input for the model
        inputs = ImageLoader.load_image(pil_image)

        # Apply super resolution using CarnModel
        with torch.no_grad():
            preds = model(inputs)

        # Convert the tensor output back to a PIL image using torchvision
        sr_image = to_pil_image(preds.squeeze(0))

        # Convert the PIL image back to OpenCV format (BGR)
        sr_image = np.array(sr_image)
        sr_image = cv2.cvtColor(sr_image, cv2.COLOR_RGB2BGR)  # Convert RGB back to BGR

        # Resize the upscaled face frame to fit the original frame size (optional)
        resized_face_frame = cv2.resize(sr_image, (frame_width, frame_height))

        # Write the resized face frame to the output video
        output_video.write(resized_face_frame)
    else:
        # Write the original frame if no face is detected
        output_video.write(frame)

# Release the video objects
video.release()
output_video.release()

print("Video processing complete with CarnModel. The enhanced video is saved as:", output_video_path)


https://huggingface.co/eugenesiow/carn-bam/resolve/main/pytorch_model_4x.pt
Video processing complete with CarnModel. The enhanced video is saved as: D02_20240705162958 (online-video-cutter.com) (8) output.mp4
