# ENHANCE! 

## Super Resolution with OpenVINO

WORK IN PROGRESS NOTEBOOK NOT READY AND FOR PUBLIC RELEASE

[Super Resolution Model description](https://github.com/openvinotoolkit/open_model_zoo/blob/develop/models/intel/single-image-super-resolution-1032/description/single-image-super-resolution-1032.md)

## Preparation

### Install requirements

In [None]:
# ! pip install --quiet --index-url https://test.pypi.org/simple --extra-index-url https://pypi.org/simple openvino-dev
# ! pip install matplotlib youtube_dl Pillow

### Imports

In [None]:
import os
import urllib
from base64 import b64encode
from pathlib import Path, PurePosixPath

import cv2
import matplotlib.pyplot as plt
import numpy as np
import youtube_dl
from IPython.display import HTML
from IPython.display import Image as DisplayImage
from openvino.inference_engine import IECore
from PIL import Image

### Settings

In [None]:
model_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2021.3/models_bin/2/single-image-super-resolution-1032/FP16/single-image-super-resolution-1032.xml"
device = "CPU"

model_name = os.path.basename(model_url)
model_xml = f"models/{model_name}"

### Functions


In [None]:
# TODO: clean up, add comments and make more robust!
def resize_and_pad(image, input_shape, interpolation=None):
    """
    Resize image to input_shape (width, height), preserving aspect ratio, adding padding where necessary
    """
    if image.shape[:2] == input_shape[::-1]:
        return image
    else:
        target_width, target_height = input_shape
        height_multi = target_height / image.shape[0]
        width_multi = target_width / image.shape[1]
        multi = min(width_multi, height_multi)
        multi_dimension = np.argmin((height_multi, width_multi))
        new_target_width = int(round(image.shape[1] * multi))
        new_target_height = int(round(image.shape[0] * multi))

        resized_image = cv2.resize(image, (new_target_width, new_target_height), interpolation=interpolation)
        if target_width / target_height != image.shape[2] / image.shape[1]:
            # Aspect ratio of image is not the same as aspect ratio of target: add padding
            if multi_dimension == 0:  # pad width
                index = (target_width - resized_image.shape[1]) // 2
                pad = ((0, 0), (index, index + (target_width - resized_image.shape[1]) % 2), (0, 0))
            else:  # pad height
                index = (target_height - resized_image.shape[0]) // 2
                pad = ((index, index + (target_height - resized_image.shape[0]) % 2), (0, 0), (0, 0))
            padded = np.pad(resized_image, pad, mode="constant")
        else:
            padded = resized_image

        return padded

In [None]:
def write_text_on_image(image, text):
    """
    Write the specified text in the top left corner of the image
    """
    font = cv2.FONT_HERSHEY_PLAIN
    org = (20, 20)
    font_scale = 4
    font_color = (255, 255, 255)
    line_type = 1
    font_thickness = 2
    text_color_bg = (0, 0, 0)
    x, y = org

    (text_w, text_h), _ = cv2.getTextSize(text, font, font_scale, font_thickness)
    result_im = cv2.rectangle(image, org, (x + text_w, y + text_h), text_color_bg, -1)

    textim = cv2.putText(image, text, (x, y + text_h + font_scale - 1), font, font_scale, font_color, font_thickness, line_type)
    return textim

## Download and load model

In [None]:
urllib.request.urlretrieve(model_url, f"models/{model_name}")
urllib.request.urlretrieve(model_url[:-4] + ".bin", f"models/{model_name[:-4]}.bin");

Load the model in Inference Engine with `ie.read_network` and load it to the specified device with `ie.load_network`

The Super Resolution model expects two inputs: 1) the input image, 2) a bicubic interpolation of the input image to a size of 1920x1080. It returns the super resolution version of the image in 1920x180.

In [None]:
ie = IECore()
net = ie.read_network(model=model_xml, weights=model_xml.replace("xml", "bin"))
exec_net = ie.load_network(network=net, device_name=device)

In [None]:
# Network inputs and outputs are dictionaries. Get the keys for the dictionaries.
original_image_key = list(exec_net.input_info)[0]
bicubic_image_key = list(exec_net.input_info)[1]
output_key = list(exec_net.outputs.keys())[0]

# Get the expected input and target shape. `.dims[2:]` returns the height and width. OpenCV's resize function
# expects the shape as (width, height), so we reverse the shape with `[::-1]` and convert it to a tuple
input_height, input_width = tuple(exec_net.input_info["0"].tensor_desc.dims[2:])
target_height, target_width = tuple(exec_net.input_info["1"].tensor_desc.dims[2:])

## Single Image Super Resolution

### Download, load, resize and reshape input image

The input image is read with OpenCV, resized to network input size, and reshaped to (N,C,H,W) (H=height, W=width, C=number of channels, N=number of images). The image is also resized to network output size, with bicubic interpolation. This bicubic image is the second input to the network.

In [None]:
# image source: https://www.flickr.com/people/roland/ via https://storage.googleapis.com/openimages/web/visualizer/index.html?set=train&type=segmentation&r=false&c=%2Fm%2F0k4j&id=531b67238c25813b CC BY 2.0
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/b/bd/Bled_(9783636305).jpg/170px-Bled_(9783636305).jpg"
image_dir = "images"
image_filename = "image.jpg"

image_path = os.path.join(image_dir, image_filename)

In [None]:
urllib.request.urlretrieve(image_url, f"{image_path}");

In [None]:
# Read the image.
image = cv2.imread(image_path)

# The network expects landscape images. If the input image is portrait, rotate it before
# propagating through the network
portrait = False
if image.shape[0] > image.shape[1]:
    portrait = True
    image = cv2.rotate(image, 2)

# Resize the image to network input shape
resized_image = resize_and_pad(image, (input_width, input_height))
# Reshape the image from (H,W,C) to (N,C,H,W)
input_image_original = np.expand_dims(resized_image.transpose(2, 0, 1), axis=0)

# Resize the image to the target shape with bicubic interpolation
bicubic_image = resize_and_pad(image, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0)

### Do inference

In [None]:
# Do inference
network_result = exec_net.infer(inputs={original_image_key: input_image_original, bicubic_image_key: input_image_bicubic})
# Reshape inference result to image shape and data type
result = network_result[output_key].squeeze(0).transpose(1, 2, 0) * 255
result[result < 0] = 0
result[result > 255] = 255
result = result.astype(np.uint8)

In [None]:
if portrait:
    # Rotate image and result back to portrait mode
    result = cv2.rotate(result, 0)
    bicubic_image = cv2.rotate(bicubic_image, 0)

### Show result

DEBUG: Showing subtraction of bicubic and super resolution version for testing purposes
TODO: remove padding from visualization

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(30, 15))
ax[0].imshow(bicubic_image[:,:,(2,1,0)])
ax[1].imshow(result[:,:,(2,1,0)])
ax[2].imshow(bicubic_image - result)

### Write animated gif with bicubic/superresolution comparison

In [None]:
image_super = write_text_on_image(cv2.UMat(result), "SUPER")
image_bicubic = write_text_on_image(bicubic_image, "BICUBIC")
cv2.imwrite(f"{image_path[:-4]}_enhanced.jpg", image_super)
cv2.imwrite(f"{image_path[:-4]}_bicubic.jpg", image_bicubic);

In [None]:
result_pil = Image.fromarray(image_super.get()[:,:,(2,1,0)])
bicubic_pil = Image.fromarray(image_bicubic[:,:,(2,1,0)])

In [None]:
gif_filename = f"{image_path[:-4]}_comparison.gif"
result_pil.save(
    fp=gif_filename,
    format="GIF",
    append_images=[
        bicubic_pil,
    ],
    save_all=True,
    duration=1000,
    loop=0,
)

In [None]:
DisplayImage(gif_filename)

## Superresolution on Video

DEBUG: Uses Youtube_DL for quickly downloading a video from Youtube. 

Reads first 1200 frames from video. Change NUM_FRAMES below to modify this.

In [None]:
NUM_FRAMES = 1200

# Use youtube_dl to download a video. It downloads to the videos subdirectory. You can also place a local video there and comment out the last three lines
video_dir = "videos"
video_name = "pat.mp4"
video_path = os.path.join(video_dir, video_name)

# Comment this out if the video in video_path already exists
video_url = "https://www.youtube.com/watch?v=V8yS3WIkOrA"
with youtube_dl.YoutubeDL({"outtmpl": video_path}) as ydl:
    ydl.download([video_url])

In [None]:
portrait = False
i = 0

# Read all video frames and ENHANCE them. Save the bicubic upsampled and superresolution frames to resultlist and bicubiclist
cap = cv2.VideoCapture(video_path)
while cap.isOpened():
    ret, image = cap.read()
    i = i + 1
    if not ret:
        cap.release()
        break

    if i == 1:
        # Get video dimensions and determine if video is in portrait mode
        frame_height, frame_width = image.shape[:2]
        if frame_height > frame_width:
            portrait = True

        # Create video's to write the results to
        superres_video = cv2.VideoWriter(
            f"{video_path[:-4]}_superres.avi",
            cv2.VideoWriter_fourcc("M", "J", "P", "G"),
            cap.get(cv2.CAP_PROP_FPS) / 2,
            (frame_width, frame_height),
        )
        bicubic_video = cv2.VideoWriter(
            f"{video_path[:-4]}_bicubic.avi",
            cv2.VideoWriter_fourcc("M", "J", "P", "G"),
            cap.get(cv2.CAP_PROP_FPS) / 2,
            (frame_width, frame_height),
        )
        stacked_video = cv2.VideoWriter(
            f"{video_path[:-4]}_stacked.avi",
            cv2.VideoWriter_fourcc("M", "J", "P", "G"),
            cap.get(cv2.CAP_PROP_FPS) / 2,
            (frame_width * 2, frame_height),
        )

    if i == NUM_FRAMES:
        break
    if portrait:
        # resize to landscape
        image = cv2.rotate(image, 2)

    # Resize the image to network input shape
    resized_image = resize_and_pad(image, (input_width, input_height))
    # Reshape the image from (H,W,C) to (N,C,H,W)
    input_image_original = np.expand_dims(resized_image.transpose(2, 0, 1), axis=0)

    # Resize the image to the target shape with bicubic interpolation
    bicubic_image = resize_and_pad(image, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
    input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0)

    # Do inference
    result = exec_net.infer(inputs={original_image_key: input_image_original, bicubic_image_key: input_image_bicubic})[output_key].squeeze(
        0
    )

    # Transform inference result into frame
    result = result.transpose(1, 2, 0) * 255
    result[result > 255] = 255
    result[result < 0] = 0
    result = result.astype(np.uint8)

    # Write result frame and bicubic frame to video
    superres_video.write(result)
    bicubic_video.write(bicubic_image)
    stacked_frame = np.hstack((bicubic_image, result))
    stacked_video.write(stacked_frame)

superres_video.release()
bicubic_video.release()
stacked_video.release()

### Compress and show video

DEBUG: remove ffmpeg

In [None]:
stacked_video_path = f"{video_path[:-4]}_stacked.avi"

compressed_video_path = stacked_video_path + "_compressed.mp4"
! ffmpeg -i $stacked_video_path -vcodec libx264 $compressed_video_path -hide_banner -loglevel error -y

In [None]:
mp4 = open(compressed_video_path, "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(
    """
<video width=1200 controls>
      <source src="%s" type="video/mp4">
</video>
"""
    % data_url
)