In [1]:
import cv2
from abc import ABCMeta, abstractmethod, ABC



class LandmarkDetectorAbstract(ABC):
    @abstractmethod
    def get_68_landmarks(image):
        """
        In here you have to implement everything you need to return 68 landmark coordimation
        given an image. Including face detector + landmark detector
        At the end of this method, we will get 2D numpy array with len==68
        """
        raise NotImplementedError("You have to implement this method. \
                                   Input is image, output are 2D numpy array representing coordination of landmarks ")


class PFLDLandmarkDetector(LandmarkDetectorAbstract):
    import dlib
    from imutils import face_utils

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("/home/vuthede/VinAI/mydeformation/model.dat")

    def get_rect_and_keypoints(image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        rects = detector(gray, 1)
        kps_list = []
        for rect in rects:
            kps = predictor(gray, rect)
            kps = face_utils.shape_to_np(kps)
            kps_list.append(kps)
        
        if len(kps_list):
            return kps_list[0]
    
        return []

    def get_68_landmarks(self, image):
        return get_rect_and_keypoints(image)



In [3]:
import glob
import cv2
import numpy as np






def calculateLSEInOneVideo(lmdetector, videopath, annodir):
    def get_gt_landmark_from_file(anno):
        file1 = open(anno, 'r') 
        ls = file1.readlines() 
        ls = ls[3:-1] # Get onlu lines that contain landmarks. 68 lines

        lm = []
        for l in ls:
            l = l.replace("\n","")
            a = l.split(" ")
            a = [float(i) for i in a]
            lm.append(a)
        
        lm = np.array(lm)
        assert len(lm)==68, "There should be 68 landmarks. Get {len(lm)}"
        return lm

    anno_files = glob.glob(annodir + "/*.pts")
    cap = cv2.VideoCapture(videopath)
    num_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    assert len(anno_files) == num_frame, f"Number of annotation files {len(anno_files)} \
                                         is not equal to number of frames {num_frame} "
    
    lse_list = [] # List losses in all frames
    pre_gt_landmark = None
    pre_pred_landmark  = None
    for i, anno in enumerate(anno_files):
        ret, frame = cap.read()

    
        gt_landmark = get_gt_landmark_from_file(anno)
        pred_landmark = lmdetector.get_68_landmarks(frame)

        assert gt_landmark.shape == pred_landmark.shape, f"Shape of pred landmark is \
                                                            different from gt landmark {gt_landmark.shape}"
        
        # Calculate LSE for this frame
        N=68
        interocular = np.linalg.norm(gt_landmark[36] - gt_landmark[45])
        if i==0: # The first frame
            sum_delta = 0
        else:
            sum_delta = np.sum(np.linalg.norm((gt_landmark-pre_gt_landmark) - (pred_landmark-pre_pred_landmark), axis=1))
        lse_one_frame = sum_delta/(interocular*N)
        lse_list.append(lse_one_frame)

        print(f"LSE frame {i}: {lse_one_frame}")

        # Cache the precious predicted and gt landmark for later use in the next frame
        pre_gt_landmark = gt_landmark
        pre_pred_landmark = pred_landmark


    lse_video = sum(lse_list)/len(lse_list)

    return lse_video


pdld_lm_detector =  PFLDLandmarkDetector()
video1 = "/hdd/data/VinAI/300VW_Dataset_2015_12_14/007/vid.avi"
anno1 = "/hdd/data/VinAI/300VW_Dataset_2015_12_14/007/annot"
lse = calculateLSEInOneVideo(pdld_lm_detector, videopath=video1, annodir=anno1)
print("LSE error: ", lse)

LSE frame 0: 0.0
LSE frame 1: 0.15632874756086546
LSE frame 2: 0.12572033832431345
LSE frame 3: 0.11924433469623062
LSE frame 4: 0.04220068994452393
LSE frame 5: 0.10704500080761749
LSE frame 6: 0.047332489832071736
LSE frame 7: 0.273954910657909
LSE frame 8: 0.2494086175462714
LSE frame 9: 0.3346240394465178
LSE frame 10: 0.787426366478185
LSE frame 11: 0.7071279132879125
LSE frame 12: 0.10235348534327308
LSE frame 13: 0.2823417810422266
LSE frame 14: 0.29605979336852156
LSE frame 15: 0.30469564866130167
LSE frame 16: 0.24246243041132667
LSE frame 17: 0.34493080789321845
LSE frame 18: 0.12709722199935847
LSE frame 19: 0.13582506320727616
LSE frame 20: 0.04460755611466434
LSE frame 21: 0.09572130734861813
LSE frame 22: 0.28577374214029533
LSE frame 23: 0.04395814078304099
LSE frame 24: 0.07546223840712528
LSE frame 25: 0.20718549646293857
LSE frame 26: 0.08143050931356569
LSE frame 27: 0.37219435454028554
LSE frame 28: 0.3406964015628131
LSE frame 29: 0.08981059143657331
LSE frame 30: 

In [18]:

a = np.array([[1,1],[2,2],[3,3]])
b = np.array([[3,3],[3,3],[3,3]])
# a-b
np.linalg.norm(a-b, axis=1)

array([2.82842712, 1.41421356, 0.        ])

In [47]:
import ast



file1 = open(anno1+"/000001.pts", 'r') 
ls = file1.readlines() 
lm = []
ls = ls[3:-1]
for l in ls:
    l = l.replace("\n","")
    a = l.split(" ")
    a = [float(i) for i in a]
    lm.append(a)
print(np.array(lm))

[[741.178 194.555]
 [742.129 212.826]
 [745.478 230.57 ]
 [750.403 248.113]
 [757.5   263.799]
 [768.059 277.73 ]
 [780.72  289.206]
 [794.731 298.483]
 [810.153 301.851]
 [826.669 299.532]
 [842.467 289.884]
 [856.362 277.209]
 [866.998 261.862]
 [873.249 244.143]
 [876.722 225.768]
 [878.995 206.899]
 [879.264 187.83 ]
 [750.478 173.087]
 [758.796 167.175]
 [769.46  166.239]
 [780.359 168.875]
 [790.426 173.932]
 [815.385 174.126]
 [826.828 169.674]
 [838.791 167.536]
 [850.519 169.543]
 [859.711 175.768]
 [803.612 192.672]
 [803.972 205.263]
 [804.145 217.782]
 [804.469 230.466]
 [793.658 239.254]
 [799.224 241.462]
 [805.13  242.954]
 [811.065 241.72 ]
 [816.778 240.212]
 [763.832 192.615]
 [770.783 188.75 ]
 [779.952 189.178]
 [787.906 195.37 ]
 [779.458 197.647]
 [770.294 197.474]
 [821.803 196.244]
 [829.284 190.377]
 [838.73  190.538]
 [846.55  194.646]
 [839.567 198.979]
 [830.165 199.056]
 [785.014 263.   ]
 [792.308 257.522]
 [799.939 254.421]
 [805.419 256.344]
 [811.039 25

In [30]:

import torch
import numpy as np
import math
def wing_loss(y_true, y_pred, w=10.0, epsilon=2.0, N_LANDMARK = 106):
    y_pred = y_pred.reshape(-1, N_LANDMARK, 2)
    y_true = y_true.reshape(-1, N_LANDMARK, 2) 
    
    x = y_true - y_pred
    c = w * (1.0 - math.log(1.0 + w / epsilon))
    absolute_x = torch.abs(x)
    losses = torch.where(w > absolute_x, w * torch.log(1.0 + absolute_x/epsilon), absolute_x - c)
    loss = torch.mean(torch.sum(losses, axis=[1, 2]), axis=0)
    return loss


def custom_wing_loss(y_true, y_pred, w=10.0, epsilon=2.0, N_LANDMARK = 98):
    c = w * (1.0 - math.log(1.0 + w / epsilon))
    x = y1 - y
    absolute_x = torch.abs(x)
    print("Loss beforr:",torch.sum(absolute_x, axis=[1,2]))

    losses = torch.where(w > absolute_x, w * torch.log(1.0 + absolute_x/epsilon), absolute_x - c)
    print("loss hsape:", losses.shape)
    losses = torch.sum(losses, axis=[1,2])

    return losses # Mean wingloss for each sample in batch


y = torch.rand((3, 98,2))
y1 = torch.rand((3, 98,2))
custom_wing_loss(y, y1)

Loss beforr: tensor([59.5201, 67.6106, 62.0940])
loss hsape: torch.Size([3, 98, 2])


tensor([268.1287, 301.2068, 279.6934])