### A notebook to compare videos of the avatar using different models for animation

In [4]:
import cv2
import numpy as np
from skimage.metrics import structural_similarity

video1 = cv2.VideoCapture('videos/low_9_gt.mkv')
video2 = cv2.VideoCapture('videos/model0.1_low_9_pred.mkv')
FHD = (1920, 1080)
out_gt = cv2.VideoWriter('out_gt.mp4', -1, 60, FHD) #name, codec, framerate, resolution
out_pred = cv2.VideoWriter('out_pred.mp4', -1, 60, FHD)
diff_vid = cv2.VideoWriter('diff.mp4', -1, 60, FHD)
diff_box_vid = cv2.VideoWriter('diff_box.mp4', -1, 60, FHD)
mask_vid = cv2.VideoWriter('mask.mp4', -1, 60, FHD)
filled_vid = cv2.VideoWriter('filled.mp4', -1, 60, FHD)

psnr_array = []
ssim_array = []

def calculate_psnr(original, prediction):
    mse = np.mean((original - prediction) ** 2)
    max_pixel_value = 255.0
    psnr = 20 * np.log10(max_pixel_value / np.sqrt(mse))
    return psnr

while(video1.isOpened()):
    ret1, original = video1.read()
    ret2, prediction = video2.read()

    if not ret1 or not ret2:
        break
    
    original_g = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    prediction_g = cv2.cvtColor(prediction, cv2.COLOR_BGR2GRAY)

    #frame_diff = cv2.absdiff(original_g, prediction_g)
    psnr = calculate_psnr(original, prediction)
    (score, diff) = structural_similarity(original_g, prediction_g, full=True) #ssim between -1 et 1, can be represented as a %
    diff = (diff * 255).astype("uint8")
    diff_box = cv2.merge([diff, diff, diff])

    # Threshold the difference image, followed by finding contours to
    # obtain the regions of the two input images that differ
    thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]

    mask = np.zeros(original.shape, dtype='uint8')
    filled_after = prediction.copy() #filled is the predicted frame where filled section are differences with the ground truth frame
    
    for c in contours:
        area = cv2.contourArea(c)
        if area > 40:
            x,y,w,h = cv2.boundingRect(c)
            cv2.rectangle(original, (x, y), (x + w, y + h), (36,255,12), 2)
            cv2.rectangle(prediction, (x, y), (x + w, y + h), (36,255,12), 2)
            cv2.rectangle(diff_box, (x, y), (x + w, y + h), (36,255,12), 2)
            cv2.drawContours(mask, [c], 0, (255,255,255), -1)
            cv2.drawContours(filled_after, [c], 0, (0,255,0), -1)

    # cv2.imshow('original', original)
    # cv2.imshow('prediction', prediction)
    # cv2.imshow('diff', diff)
    # cv2.imshow('diff_box', diff_box)
    # cv2.imshow('mask', mask)
    # cv2.imshow('filled after', filled_after)
    out_gt.write(original)
    out_pred.write(prediction)
    diff_vid.write(diff)
    diff_box_vid.write(diff_box)
    mask_vid.write(mask)
    filled_vid.write(filled_after)

    psnr_array.append(psnr)
    ssim_array.append(score*100)

print(f"mean ssim {np.mean(ssim_array)}%")
print(f"mean psnr {np.mean(psnr_array)}")
video1.release()
video2.release()
out_gt.release()
out_pred.release()
diff_vid.release()
diff_box_vid.release()
mask_vid.release()
filled_vid.release()
cv2.destroyAllWindows()

mean ssim 97.32744856895044%
mean psnr 41.894250215994646


In [9]:
import torch
#test how the padding affect the loss computation
array = [1, 1, 1, 0, 0, 0]
array2 = [1, 1, 0, 0, 0, 0]
torch.nn.functional.mse_loss(torch.FloatTensor(array), torch.FloatTensor(array2)).item()

0.1666666716337204