# Dependencies

In [4]:
from pathlib import Path

import cv2
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np

# local dependencies
from utils.mpeg import MPEG
from utils.quality_assessment import mse

In [5]:
output_path = Path('../output/videos')
output_path.mkdir(parents=True, exist_ok=True)

# Load a Video

In [6]:
cap = cv2.VideoCapture("https://media.xiph.org/video/derf/y4m/suzie_qcif.y4m")

fps = round(cap.get(cv2.CAP_PROP_FPS))
frames = []

while (cap.isOpened()):
    ret, frame = cap.read()

    if ret == True:
        frames.append(frame)
    else:
        break

cap.release()

In [7]:
# color space: BGR
frames = np.array(frames)

# log
print(f"video.shape: {frames.shape}")
print(f"video.dtype: {frames.dtype}")
print(f"type(video): {type(frames)}")
print(f"frame per second (fps): {fps}")

video.shape: (150, 144, 176, 3)
video.dtype: uint8
type(video): <class 'numpy.ndarray'>
frame per second (fps): 30


# Moving Picture Experts Group (MPEG)

In [8]:
# Encode pattern : IPPPP....
i_frame = frames[0]
p_frames = frames[1:]

# compression rate
scale_1 = 1
scale_2 = 4
scale_3 = 16

mpeg_1 = MPEG(frames, scale=scale_1, search_area=4)
mpeg_2 = MPEG(frames, scale=scale_2, search_area=4)
mpeg_3 = MPEG(frames, scale=scale_3, search_area=4)

### Encode I-frame
   - Returns quantized 2D DCT Coefficients of the `i-frame` [YCrCb color space]
   - `tuple[y_dct, cr_dct, cb_dct]`

In [9]:
i_encoded_1 = mpeg_1.i_encode(frame=i_frame)
i_encoded_2 = mpeg_2.i_encode(frame=i_frame)
i_encoded_3 = mpeg_3.i_encode(frame=i_frame)

### Encode P-frame
   - Returns `Motion Vectors` & quantized 2D DCT Coefficients of the `Diffs` & `Residuals` for the whole frames
   - `tuple[motion_vectors, tuple[diffs_y_dct, diffs_cr_dct, diffs_cb_dct], residuals]`

In [10]:
mv_1, diff_encoded_1, residuals_1 = mpeg_1.encode(i=i_encoded_1)
mv_2, diff_encoded_2, residuals_2 = mpeg_2.encode(i=i_encoded_2)
mv_3, diff_encoded_3, residuals_3 = mpeg_3.encode(i=i_encoded_3)

### Decode frames
   - Returns `decoded video`

In [11]:
# color space of the reconstructed videos: BGR
reconstructed_video_1 = mpeg_1.decode(i=i_encoded_1, mv=mv_1, diff=diff_encoded_1)
reconstructed_video_2 = mpeg_2.decode(i=i_encoded_2, mv=mv_2, diff=diff_encoded_2)
reconstructed_video_3 = mpeg_3.decode(i=i_encoded_3, mv=mv_3, diff=diff_encoded_3)

In [12]:
# plot
pad = np.zeros(shape=(20, frames.shape[2], frames.shape[3]), dtype=np.uint8)

for i in range(len(frames)):

    original, reconstruct_1, reconstruct_2, reconstruct_3 = map(
        np.copy,
        (
            np.vstack((pad, frames[i])),
            np.vstack((pad, reconstructed_video_1[i])),
            np.vstack((pad, reconstructed_video_2[i])),
            np.vstack((pad, reconstructed_video_3[i]))
        )
    )

    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(original, 'Original Video', (2, 15), font, .5, (255, 255, 255), 1, cv2.LINE_AA)
    cv2.putText(reconstruct_1, f"scale:{scale_1} , mse:{mse(original, reconstruct_1):.2f}", (2, 15), font, .5, (255, 255, 255), 1, cv2.LINE_AA)
    cv2.putText(reconstruct_2, f"scale:{scale_2} , mse:{mse(original, reconstruct_2):.2f}", (2, 15), font, .5, (255, 255, 255), 1, cv2.LINE_AA)
    cv2.putText(reconstruct_3, f"scale:{scale_3} , mse:{mse(original, reconstruct_3):.2f}", (2, 15), font, .5, (255, 255, 255), 1, cv2.LINE_AA)

    concatenated_frame = np.concatenate((original, reconstruct_1, reconstruct_2, reconstruct_3), axis=1)

    cv2.imshow('Suzie Video', concatenated_frame)
    cv2.waitKey(int(1000 / fps))

cv2.destroyAllWindows()

## More details on reconstructed_video_1

### Reconstructed I-frame

In [None]:
nrows = 1
ncols = 2

fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * 4, nrows * 4), layout='compressed')
fig.suptitle('I-frame')

axs[0].imshow(cv2.cvtColor(frames[0], cv2.COLOR_BGR2RGB), cmap='gray')
axs[0].set_title(f"original i-frame")
axs[0].axis('off')

axs[1].imshow(cv2.cvtColor(reconstructed_video_1[0], cv2.COLOR_BGR2RGB), cmap='gray')
axs[1].set_title(f"reconstructed i-frame")
axs[1].axis('off')

plt.show()

### Residuals from frame 10 to 15

In [None]:
nrows = 2
ncols = 3
start_frame = 9

fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * 4, nrows * 4), layout='compressed')
fig.suptitle('Residuals')

for i in range(2):
    for j in range(3):
        axs[i, j].imshow(residuals_1[start_frame + i * ncols + j], cmap='gray')
        axs[i, j].set_title(f"frame {start_frame + i * ncols + j + 1}")
        axs[i, j].axis('off')

plt.show()

### Enhance residuals

In [15]:
video_fps = 30
num_neighbours = video_fps / 10
enhanced_residuals = np.abs(residuals_1)

for i in range(len(enhanced_residuals)):
    lower_bound = int(i - num_neighbours) if (i - num_neighbours) >= 0 else 0
    upper_bound = int(i + num_neighbours + 1)
    enhanced_residuals[i] = np.mean(enhanced_residuals[lower_bound: upper_bound], axis=0)

enhanced_residuals = (enhanced_residuals - enhanced_residuals.min()) / (enhanced_residuals.max() - enhanced_residuals.min())
enhanced_residuals[enhanced_residuals < .1] = 0
enhanced_residuals **= .8

In [None]:
nrows = 2
ncols = 6
start_frame = 9

fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * 3, nrows * 3), layout='compressed')
fig.suptitle('Enhancing Residuals')

for i in range(ncols):
    axs[0, i].imshow(residuals_1[start_frame + i], cmap='gray')
    axs[0, i].set_title(f"frame {start_frame + i + 1}")
    axs[1, i].imshow(enhanced_residuals[start_frame + i], cmap='gray')
    axs[1, i].set_title(f"frame {start_frame + i + 1}")

for ax in fig.axes:
    ax.axis('off')

plt.show()

### Motion Vectors from frame 10 to 15
   - motion vector: the direction of motion from the `Reference` frame to the `Current` frame
   - In array language, if we have motion: [1, 2], in x-y axis we shall plot [2, 1]
   - y_axis should be inverted also [array indexing form]
   - the scale of quiver function should be the size of the macroblock but for better visualization, the scale is 1

In [None]:
nrows = 2
ncols = 3
start_frame = 9

fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * 4, nrows * 4), layout='compressed')
fig.suptitle('Motion Vectors')

height, width = mv_1.shape[1:3]

# create a grid of x, y coordinates
x, y = np.meshgrid(range(width), range(height))

for i in range(nrows):
    for j in range(ncols):

        # plot arrows
        axs[i, j].quiver(x, y, mv_1[start_frame + i * ncols + j, :, :, 1], mv_1[start_frame + i * ncols + j, :, :, 0], angles='xy', scale_units='xy', scale=1, color='k')
        axs[i, j].set_title(f"frame {start_frame + i * ncols + j + 1}")

        # invert the y axis
        axs[i, j].invert_yaxis()

plt.show()

### Save the reconstructed frames as a video file

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 4), layout='compressed')

# updates the figure frame by frame


def update(i):

    # clear the current frame
    for ax in axs:
        ax.cla()

    axs[0].imshow(cv2.cvtColor(frames[i], cv2.COLOR_BGR2RGB))
    axs[0].set_title('Original frame')
    axs[0].axis('off')

    axs[1].imshow(cv2.cvtColor(reconstructed_video_1[i], cv2.COLOR_BGR2RGB))
    axs[1].set_title('Reconstructed frame')
    axs[1].axis('off')


# create an animation
ani = animation.FuncAnimation(fig, update, frames=len(frames))

# save the animation as a video file
ani.save(filename=f"{output_path}/output1.mp4", writer='ffmpeg', fps=fps)

In [None]:
fig = plt.figure(figsize=(reconstructed_video_1.shape[2] / 100, reconstructed_video_1.shape[1] / 100))
ax = fig.add_axes([0, 0, 1, 1])


def update(i):
    ax.cla()
    ax.imshow(cv2.cvtColor(reconstructed_video_1[i], cv2.COLOR_BGR2RGB))
    ax.axis('off')


ani = animation.FuncAnimation(fig, update, frames=len(frames))
ani.save(filename=f"{output_path}/output2.mp4", writer='ffmpeg', fps=fps)