In [None]:
import os
import numpy as np
from glob import glob
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import json
import pandas as pd

- keypoint 는 [17, 3] 의 2차원 배열
- 17는 키포인트의 개수이고 3은 [x, y, visibility] 이다
- visibility {0:안보여서 알수없음, 1:가려짐, 2:보임)

In [None]:
NUM_KEYPOINT = 17 # 키포인트 개수
NUM_KEYPOINT_CH = 3 # x, y, visibility
NUM_KEYPOINT_CLASS = 3
thresh_keypoint_count = 10 # 데이터 선별시 최소 개수
VISIBILITY_NOT_EXIST = 0
VISIBILITY_OCCLUDED = 1
VISIBILITY_VISIBLE = 2
MASK_NEGATIVE = 0
MASK_IGNORE = 1
MASK_POSITIVE = 2

In [None]:
max_data_m = 100
channel_label = 4 + 1 + NUM_KEYPOINT * NUM_KEYPOINT_CH #(box:4, cls:1, keypoint:17*3)
channel_label

In [None]:
folder_COCO = '/media/mvlab/469B5B3C650FBA77/data/COCO/'
folder_COCO_annotation = folder_COCO + 'annotations_trainval2017/annotations/'
folder_img = 'val2017'
#folder_img = 'train2017'

In [None]:
path_annotation_keypoints_val2017 = folder_COCO_annotation + 'person_keypoints_'+folder_img + '.json'

In [None]:
os.path.isdir(folder_COCO), os.path.isdir(folder_COCO_annotation)

In [None]:
os.path.isfile(path_annotation_keypoints_val2017)

In [None]:
with open(path_annotation_keypoints_val2017) as json_file:
    json_data = json.load(json_file)
    
len(json_data), json_data.keys()

In [None]:
class_keypoints = json_data['categories'][0]['keypoints']
images = json_data['images']
annotations = json_data['annotations']

len(class_keypoints), len(images), len(annotations)

In [None]:
class_keypoints

#### AIHub 사람 동작 영상 AI 데이터는 키포인트 16개
- 추가 4 head, neck, chest, hip
- 제거 5 nose, left_eye, right_eye, left_ear, right_ear

##### COCO VS AIHub
- COCO 얼굴 중요(눈, 코, 귀)
- AIHub 동작 중요(목, 가슴, 골반 중심)|

In [None]:
images[0]

In [None]:
annotations[0].keys()

In [None]:
annotations[0]['id']

In [None]:
annotations[0]['bbox']

In [None]:
dict_id_image_info = dict()
image_w = []
image_h = []
for image_info in images:
    #print('image_info', image_info)    
    
    image_w.append(image_info['width'])
    image_h.append(image_info['height'])
    
    image_id = image_info['id']    
    dict_id_image_info[image_id] = image_info
        
len(dict_id_image_info)

In [None]:
np.min(image_w), np.min(image_h), np.max(image_w), np.max(image_h)

In [None]:
image_w_max = 640
image_h_max = 640

In [None]:
(np.array(image_w) < 200).mean(), (np.array(image_h) < 200).mean()

In [None]:
plt.title('w, h')
plt.scatter(image_w, image_h, s=2)
plt.xlabel('width')
plt.ylabel('height')

In [None]:
# get image_filename from image_id
list_id = []
list_category_id = []
for annotation in annotations:
    
    image_id = annotation['image_id']
    category_id = annotation['category_id']    
    
    image_info = dict_id_image_info[image_id]
    if len(list_id) == 0:
        print('category_id', category_id)
        print('annotation', annotation)
        print('image_info', image_info)
        
    file_name = image_info['file_name']
    height = image_info['height']
    width = image_info['width']
    
    file_full_path = folder_COCO + folder_img + os.sep + file_name
    
    if not os.path.isfile(file_full_path):
        print('not exist', file_full_path)
        continue
        
    annotation['file_name'] = file_full_path
    annotation['height'] = height
    annotation['width'] = width
    
    #print(annotation)
    list_id.append(image_id)
    list_category_id.append(category_id)
    #break

In [None]:
# 전부 사람(1) 카테고리
np.unique(list_category_id)

In [None]:
#generate dictionary(key:filename, value:annotation list)
#박스와 클래스, 키포인트를 통합
dict_annotation_coco = dict()
for annotation in annotations:
    #print(annotation)
    segmentation = annotation['segmentation']
    bbox = annotation['bbox']
    category_id = annotation['category_id']
    keypoints = annotation['keypoints']    
    file_name = annotation['file_name']
    
    #x0, y0, x0 + w, y0 + h
    x0, y0, w, h = bbox
    b = np.array([x0, y0, x0 + w, y0 + h]).flatten()
    c = np.array(category_id).flatten()
    k = np.array(keypoints).flatten()
    #print(len(b), 'box', bbox, len(c), len(k))
    cbox = np.concatenate((b, c, k)).reshape((1, -1))
    
    if len(dict_annotation_coco) == 0:        
        print('file_name', file_name)
        print('bbox', bbox)
        print('segmentation', len(segmentation), len(segmentation[0]))
        print('keypoints', np.array(keypoints).reshape((-1, 3)))    
        print('cbox', len(cbox), cbox)
    
    if file_name in dict_annotation_coco.keys():
        pre_annotation = dict_annotation_coco[file_name]
        new_bbox = np.concatenate((pre_annotation, cbox), axis=0)        
        dict_annotation_coco[file_name] = new_bbox
    else:
        dict_annotation_coco[file_name] = cbox
    #break

In [None]:
len(dict_annotation_coco), list(dict_annotation_coco.keys())[0]

In [None]:
len(list_id), len(set(list_id))

In [None]:
box_keypoint_label = np.concatenate(list(dict_annotation_coco.values()), 0)
box_keypoint_label.shape

In [None]:
x0 = box_keypoint_label[:, 0]
y0 = box_keypoint_label[:, 1]
x1 = box_keypoint_label[:, 2]
y1 = box_keypoint_label[:, 3]
box_w = x1-x0
box_h = y1-y0
plt.title('h / w = ' +  str(np.mean(box_h/box_w)))
ax = plt.hist(box_w )
ax = plt.hist(box_h, alpha=0.5)

데이터 선별
- 한사람의 키포인트가 일정 개수 이상 있음

In [None]:
len(dict_annotation_coco)

In [None]:
dict_annotation_coco_select = dict()

for key in dict_annotation_coco:
    value = dict_annotation_coco[key]
    #print('key', key)
    #print('value', type(value), value.shape, value)
    
    box = value[:, :4]
    cls = value[:, 4]
    keypoint = value[:, 5:]
    keypoint = keypoint.reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))
    visibility = keypoint[:, :, 2]
    visibility_count_per_person = np.sum(visibility > VISIBILITY_NOT_EXIST, -1)
    visibility_max_count_per_person = np.max(visibility_count_per_person)
        
    if visibility_max_count_per_person >= thresh_keypoint_count:                
        dict_annotation_coco_select[key] = value

len(dict_annotation_coco), len(dict_annotation_coco_select)
#train2017:(64115, 45900), val2017:(2693, 2012)

In [None]:
dict_annotation_coco = dict_annotation_coco_select

In [None]:
def draw_cbox(image, cbox):
    draw = ImageDraw.Draw(image)

    x0, y0, x1, y1, cls = cbox
    draw.text((x0+2, y0), str(int(cls)))
    draw.rectangle((x0, y0, x1, y1), fill=None, outline=(0,255,0), width=2)

In [None]:
def draw_keypoint(image, keypoints, is_show_class=True):
    draw = ImageDraw.Draw(image)
    
    #print('draw_keypoint', keypoints.shape, keypoints[:,2])
    #[0. 0. 0. 0. 0. 1. 2. 2. 2. 2. 2. 2. 2. 0. 2. 0. 2.]
    for i in range(len(keypoints)):
        point = keypoints[i]
        cls_name = str(i)+':'+ class_keypoints[i] if is_show_class else ''                
        cx = point[0]
        cy = point[1]
        state = point[2]
        circle_radius = 2
        if state > 1:
            draw.ellipse((cx-circle_radius, cy-circle_radius, cx+circle_radius, cy+circle_radius),fill=None, width=1)
            draw.text((cx, cy-10), cls_name)    

In [None]:
def draw_valid_line(draw, p0, p1, color):
    #point : [x, y, state]
    state0 = p0[2]
    state1 = p1[2]
    if state0 * state1 > 0:
        draw.line((tuple(p0[:2]), tuple(p1[:2])), fill=color)
    

def draw_keypoint_line(image, keypoints):
    draw = ImageDraw.Draw(image)
        
    nose = keypoints[0]
    left_eye = keypoints[1]
    right_eye = keypoints[2]
    left_ear = keypoints[3]
    right_ear = keypoints[4]            
    left_shoulder = keypoints[5]
    right_shoulder = keypoints[6]
    left_elbow = keypoints[7]
    right_elbow = keypoints[8]
    left_wrist = keypoints[9]
    right_wrist = keypoints[10]
    left_hip = keypoints[11]
    right_hip = keypoints[12]
    left_knee = keypoints[13]
    right_knee = keypoints[14]
    left_ankle = keypoints[15]
    right_ankle = keypoints[16]        
    
    color_center = (255, 255, 255)
    color_left = (255,255,0)
    color_right = (0,255,255)
    
    draw_valid_line(draw, left_shoulder, right_shoulder, color_center)
    draw_valid_line(draw, left_hip, right_hip, color_center)
    
    draw_valid_line(draw, nose, left_eye, color_left)
    draw_valid_line(draw, left_eye, left_ear, color_left)
    
    draw_valid_line(draw, nose, right_eye, color_right)
    draw_valid_line(draw, right_eye, right_ear, color_right)
        
    draw_valid_line(draw, left_shoulder, left_ear, color_left)    
    draw_valid_line(draw, left_shoulder, left_elbow, color_left)
    draw_valid_line(draw, left_shoulder, left_hip, color_left)
    draw_valid_line(draw, left_elbow, left_wrist, color_left)
    draw_valid_line(draw, left_hip, left_knee, color_left)
    draw_valid_line(draw, left_knee, left_ankle, color_left)
    
    draw_valid_line(draw, right_shoulder, right_ear, color_right)
    draw_valid_line(draw, right_shoulder, right_elbow, color_right)
    draw_valid_line(draw, right_shoulder, right_hip, color_right)
    draw_valid_line(draw, right_elbow, right_wrist, color_right)    
    draw_valid_line(draw, right_hip, right_knee, color_right)
    draw_valid_line(draw, right_knee, right_ankle, color_right)
    

In [None]:
def draw_box_keypoint(image, keypoint_box_label, is_show_class=False):
    #keypoint_box_label : (m, 17*3+5)
    box_label = keypoint_box_label[:, :5]
    keypoint_label = keypoint_box_label[:,5:].reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))
    #box_label : (m, 5)
    #keypoint_label : (m, 17, 3)
    
    if np.mean(box_label[:, :4]) < 2:
        print('unnormalize')
        img_w, img_h = image.width, image.height        
        box_label[:, :4] *= np.array((img_w, img_h, img_w, img_h)).reshape((1, 4))
        keypoint_label[:, :, :2] *= np.array((img_w, img_h)).reshape((1, 1, 2))
    
    for keypoints in keypoint_label:
        draw_keypoint(image, keypoints, is_show_class=is_show_class)
        draw_keypoint_line(image, keypoints)

    for cbox in box_label:
        draw_cbox(image, cbox)    

#### test

In [None]:
sample_i = 9

In [None]:
sample_image_path = list(dict_annotation_coco.keys())[sample_i]
keypoint_box_label_sample = dict_annotation_coco[sample_image_path]

box_label_sample = keypoint_box_label_sample[:, :5]
keypoint_label_sample = keypoint_box_label_sample[:,5:].reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))
sample_image_path, len(keypoint_label_sample), keypoint_label_sample

In [None]:
Image.open(sample_image_path)

In [None]:
pd.DataFrame(keypoint_label_sample[0].astype(np.int), class_keypoints, ['x','y','visibility'])

In [None]:
box_label_sample.shape

In [None]:
sample_image = Image.open(sample_image_path)
print('sample_image.size', sample_image.size)
draw_box_keypoint(sample_image, keypoint_box_label_sample, is_show_class=True)

In [None]:
sample_image

- 학습 데이터 구성
- X : 이미지 (h,w,3)
- Y : 박스 + 클래스 + 키포인트 (box:4, class:1, keypoint:17*3(x,y,visibility)))

keypoint 는 영상에서의 좌표가 아닌 박스에서의 상대좌표? NO, 박스가 부정확하면 keypoint 까지 망가진다
- 박스 검출 후 그 내부에서 keypoint 의 상대좌표를 regression 하면 안됨

keypoint 를 검출해서 그것으로부터 박스를 그리는 것이 안전
- keypoint 의 visibility 까지 학습시켜서 keypoint를 그리는데 사용하자
- 그려진 keypoint를 이용해서 box 를 그리자

- box & keypoint = positive
- no box * no keypoint = background
- box & no keypoint = ignore (New!)
- no box & keypoint = not exist

1. keypoint는 groundtruth 로 클래스별 hitmap으로 변환해서 traget 이 되어 학습된다.
    - hitmap gt 는 좌표맵과 키포인트 좌표의 차이를 이용해 생성한다
      - 차이가 가장 작은 위치는 positive + (VISIBLE, OCCLUDED)
      - 차이가 일정 범위 이하는 ignore
      - 차이가 일정 범위 이상은 negative + (NOT_EXIST)
    - hitmap_visibility_gt 는 hitmap_coord_gt 와 별도로 생성한다
      - hitmap의 할당값은 VISIBLE(2), OCCLUDED(1), NOT_EXIST(0) 로 맵핑
      - hitmap_gt는 class별로 존재하나 visibility 는 class와 독립적인 변수이다.
      - hitmap_gt는 keypoint를 찾는 용도이며 visibility는 hitmap_gt로 찾은 keypoint의 상태이다.
    - hitmap gt 는 multi-scale 일 필요가 있다
    - hitmap gt 는 anchor별로 생성될 필요가 있다. 그것이 구현이 더 편리하다
    - 모델은 클래스별 hitmap을 출력한다
    - hitmap loss 적용
1. 모델은 검출기와 같은 anchor를 사용한다
   - 모델은 objectness와 박스 regression을 출력한다
1. positive objectness를 갖는 anchor 는 모델이 예측한 box 변환 값을 이용해 오브젝트 후보의 경계 박스(ROI)를 생성한다
   1. ROI 내부에 존재하는 최대 확률의 keypoint (ROI에 대한 상대)좌표를 구한다
      - keypoint 간의 연결성에 대해서는 무시하는 꼴이다.
      - 따라서 주위에서 침입한 keypoint 가 있는 경우 문제가 발생한다. 이 문제는 hierarchical keypoint 나 affinity vector를 이용하면 해결 가능하다.
      - 일단은 연결성은 무시해서 구현해보자
   1. ROI 정보를 이용해 상대좌표를 이미지에 대한 상대 좌표로 변환한다

In [None]:
def load_xy(annotation, stride=1):
    input_list = []
    bbox_list = []
    path_list = []
    i = 0
    
    for path_image in annotation:
        i+=1
        if stride!=1 and np.random.randint(1, 1+stride)%stride!=0:
            continue
            
        cls_bbox = annotation[path_image]                
        bbox = np.array(cls_bbox[:, :4])
        cls = cls_bbox[:, 4:5]
        keypoint = cls_bbox[:, 5:].astype(np.float)
        keypoint_3d = np.reshape(keypoint, (-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))
        
        img = Image.open(path_image)    
        scale = np.array((img.width, img.height, img.width, img.height))
        scale = np.reshape(scale, (1, 4)).astype(np.float)
        scale_keypoint = np.array((img.width, img.height, 1)).reshape((1,1,3))

        img_arr = np.array(img)
        if img_arr.ndim < 3: 
            print('gray image skip')
            continue #gray image skip
        
        try:
            std_v = np.std(img_arr)
            if std_v < 3:
                print('std_v', std_v)
                continue
        except:
            print('error', path_image)
            continue
        
        box_width = bbox[:, 2] - bbox[:, 0]
        box_height = bbox[:, 3] - bbox[:, 1]
        
        if np.min(box_width) < 1 or np.min(box_height) < 1:
            print('box_size < 1', box_width, box_height)#check
            continue
            
        bbox_norm = bbox.astype(np.float) / scale        
        keypoint_norm = keypoint_3d / scale_keypoint        
        keypoint_norm_2d = np.reshape(keypoint_norm, (-1, NUM_KEYPOINT * NUM_KEYPOINT_CH))
        
        cls_bbox_norm = np.concatenate((bbox_norm, cls, keypoint_norm_2d), axis=1)

        input_list.append(img_arr)
        bbox_list.append(cls_bbox_norm)
        path_list.append(path_image)
        if len(input_list)%100==0:        
            print(len(annotation), i, len(input_list))   
        if len(input_list) >= max_data_m:
            break       

    print(len(input_list), len(bbox_list))
    return input_list, bbox_list

In [None]:
len(dict_annotation_coco)

In [None]:
list_x, list_y = load_xy(dict_annotation_coco, stride=1)

In [None]:
list_x[0].shape, list_y[0].shape

In [None]:
plt.imshow(list_x[0])

In [None]:
list_y[0].shape

In [None]:
list_y[0][:, :4]

In [None]:
list_y[0][:, 5:].reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))

### box check

In [None]:
for y in list_y:
    box = y[:, :4]
    box_wh = box[:, 2:4] - box[:, 0:2]
    print('y', y.shape)
    print('box_wh', box_wh)

## bg_mask

In [None]:
def coordinate_map_np(h, w):
    #return (h, w, 2)    
    x = np.arange(w)
    y = np.arange(h)
    X, Y = np.meshgrid(x, y)
    xy = np.stack((X, Y), -1)
    xy = (xy).astype(np.float32) + 0.5
    return xy 

## debug

In [None]:
list_y[0][:, 5:].reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))[:,:,-1]

In [None]:
#list_y[0][:, 5:].reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))[:,:,-1] = 1+np.arange(NUM_KEYPOINT)

In [None]:
list_y[0][:, 5:].reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH))[:,:,-1]

In [None]:
def display_data(list_x, list_y, stride=1):
    
    for i in range(0, len(list_x), stride):
        x = list_x[i]
        y = list_y[i]
       
        img = Image.fromarray(x)
        kp = y[:, -NUM_KEYPOINT * NUM_KEYPOINT_CH:]        
        kp_cls = kp[:, 2::3]
        print(i, 'y', 'box', y[:, :4])
        #print('kp', kp)
        draw_box_keypoint(img, y, is_show_class=False)
        display(img)    

In [None]:
display_data(list_x, list_y, stride=1)

## preprocessing for model

In [None]:
#https://towardsdatascience.com/building-a-resnet-in-keras-e8f1322a49ba
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import Tensor
import tensorflow.keras as keras
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization,\
                                    Add, AveragePooling2D, Flatten, Dense, MaxPool2D
from tensorflow.keras.models import Model

In [None]:
#https://github.com/tensorflow/addons/blob/v0.11.2/tensorflow_addons/image/__init__.py
from tensorflow_addons.image.color_ops import sharpness
from tensorflow_addons.image.filters import gaussian_filter2d
from tensorflow_addons.image.dense_image_warp import dense_image_warp

In [None]:
tf.__version__, tf.test.is_gpu_available()

In [None]:
4 * 2 ** 7, 3 * 2 ** 7

In [None]:
padded_image_shape = (384, 384)#384, 512, 640
anchor_k = 9
num_classes = 3
level_start = 4
level_end = 8
l1 = 1e-8
activation = 'swish'#'selu' is not converted to tflite
kernel_init = tf.initializers.he_normal()
edgecolors = np.random.rand(num_classes, 3) 
edgecolors = np.minimum(edgecolors+0.1, 1.0)
path_weight = "weight/keypoint_efficientDet-D2"

In [None]:
m = len(list_x)
m_train = m//2
list_x_train = list_x[:m_train]
list_x_test = list_x[m_train:]
list_y_train = list_y[:m_train]
list_y_test = list_y[m_train:]
m, m_train, len(list_x), len(list_x_train), len(list_y_train)

#### model function

In [None]:
def swap_xy(boxes):
    """Swaps order the of x and y coordinates of the boxes.
    Arguments:
      boxes: A tensor with shape `(num_boxes, 4)` representing bounding boxes.
    Returns:
      swapped boxes with shape same as that of boxes.
    """
    return tf.stack([boxes[:, 1], boxes[:, 0], boxes[:, 3], boxes[:, 2]], axis=-1)


def convert_to_xywh(boxes):
    """Changes the box format to center, width and height.
    Arguments:
      boxes: A tensor of rank 2 or higher with a shape of `(..., num_boxes, 4)`
        representing bounding boxes where each box is of the format
        `[xmin, ymin, xmax, ymax]`.
    Returns:
      converted boxes with shape same as that of boxes.
    """
    return tf.concat(
        [(boxes[..., :2] + boxes[..., 2:]) / 2.0, boxes[..., 2:] - boxes[..., :2]],
        axis=-1,
    )


def convert_to_corners(boxes):
    """Changes the box format to corner coordinates
    Arguments:
      boxes: A tensor of rank 2 or higher with a shape of `(..., num_boxes, 4)`
        representing bounding boxes where each box is of the format
        `[x, y, width, height]`.
    Returns:
      converted boxes with shape same as that of boxes.
    """
    return tf.concat(
        [boxes[..., :2] - boxes[..., 2:] / 2.0, boxes[..., :2] + boxes[..., 2:] / 2.0],
        axis=-1,
    )


"""
## Computing pairwise Intersection Over Union (IOU)
As we will see later in the example, we would be assigning ground truth boxes
to anchor boxes based on the extent of overlapping. This will require us to
calculate the Intersection Over Union (IOU) between all the anchor
boxes and ground truth boxes pairs.
"""

def compute_iou(boxes1, boxes2):#compute_iou(anchor_boxes, gt_boxes)
    """Computes pairwise IOU matrix for given two sets of boxes
    Arguments:
      boxes1: A tensor with shape `(N, 4)` representing bounding boxes
        where each box is of the format `[x, y, width, height]`.
        boxes2: A tensor with shape `(M, 4)` representing bounding boxes
        where each box is of the format `[x, y, width, height]`.
    Returns:
      pairwise IOU matrix with shape `(N, M)`, where the value at ith row
        jth column holds the IOU between ith box and jth box from
        boxes1 and boxes2 respectively.
    """
    boxes1_corners = convert_to_corners(boxes1)
    boxes2_corners = convert_to_corners(boxes2)
    lu = tf.maximum(boxes1_corners[:, None, :2], boxes2_corners[:, :2])
    rd = tf.minimum(boxes1_corners[:, None, 2:], boxes2_corners[:, 2:])
    intersection = tf.maximum(0.0, rd - lu)
    intersection_area = intersection[:, :, 0] * intersection[:, :, 1]
    boxes1_area = boxes1[:, 2] * boxes1[:, 3]
    boxes2_area = boxes2[:, 2] * boxes2[:, 3]
    union_area = tf.maximum(
        boxes1_area[:, None] + boxes2_area - intersection_area, 1e-8
    )
    return tf.clip_by_value(intersection_area / union_area, 0.0, 1.0)

In [None]:

"""
## Implementing Anchor generator
Anchor boxes are fixed sized boxes that the model uses to predict the bounding
box for an object. It does this by regressing the offset between the location
of the object's center and the center of an anchor box, and then uses the width
and height of the anchor box to predict a relative scale of the object. In the
case of RetinaNet, each location on a given feature map has nine anchor boxes
(at three scales and three ratios).
"""
class AnchorBox:
    """Generates anchor boxes.
    This class has operations to generate anchor boxes for feature maps at
    strides `[8, 16, 32, 64, 128]`. Where each anchor each box is of the
    format `[x, y, width, height]`.
    Attributes:
      aspect_ratios: A list of float values representing the aspect ratios of
        the anchor boxes at each location on the feature map
      scales: A list of float values representing the scale of the anchor boxes
        at each location on the feature map.
      num_anchors: The number of anchor boxes at each location on feature map
      areas: A list of float values representing the areas of the anchor
        boxes for each feature map in the feature pyramid.
      strides: A list of float value representing the strides for each feature
        map in the feature pyramid.
    """

    def __init__(self):
        self.level_start = level_start
        self.level_end = level_end
        
        if anchor_k==9:
            self.aspect_ratios = [0.5, 1.0, 2.0]        
            self.scales = [2 ** x for x in [0, 1 / 3, 2 / 3]]
        else:
            self.aspect_ratios = [1.0]        
            self.scales = [2 ** x for x in [0]]
                
        self._num_anchors = len(self.aspect_ratios) * len(self.scales)
        self._strides = [2 ** i for i in range(self.level_start, self.level_end)]
        self._areas = [x ** 2 for x in [32.0, 64.0, 128.0, 196.0, 256.0]]                        
        self._areas = self._areas[:level_end - level_start]
        
        self._anchor_dims = self._compute_dims()

    def _compute_dims(self):
        """Computes anchor box dimensions for all ratios and scales at all levels
        of the feature pyramid.
        """
        anchor_dims_all = []
        for area in self._areas:
            anchor_dims = []
            for ratio in self.aspect_ratios:
                anchor_height = tf.math.sqrt(area / ratio)
                anchor_width = area / anchor_height
                dims = tf.reshape(
                    tf.stack([anchor_width, anchor_height], axis=-1), [1, 1, 2]
                )
                for scale in self.scales:
                    anchor_dims.append(scale * dims)
            anchor_dims_all.append(tf.stack(anchor_dims, axis=-2))
        return anchor_dims_all

    def _get_anchors(self, feature_height, feature_width, level):
        """Generates anchor boxes for a given feature map size and level
        Arguments:
          feature_height: An integer representing the height of the feature map.
          feature_width: An integer representing the width of the feature map.
          level: An integer representing the level of the feature map in the
            feature pyramid.
        Returns:
          anchor boxes with the shape
          `(feature_height * feature_width * num_anchors, 4)`
        """
        rx = tf.range(feature_width, dtype=tf.float32) + 0.5
        ry = tf.range(feature_height, dtype=tf.float32) + 0.5
        centers = tf.stack(tf.meshgrid(rx, ry), axis=-1) * self._strides[level - self.level_start]
        centers = tf.expand_dims(centers, axis=-2)
        centers = tf.tile(centers, [1, 1, self._num_anchors, 1])
        dims = tf.tile(
            self._anchor_dims[level - self.level_start], [feature_height, feature_width, 1, 1]
        )
        anchors = tf.concat([centers, dims], axis=-1)
        return tf.reshape(
            anchors, [feature_height * feature_width * self._num_anchors, 4]
        )

    def get_anchors(self, image_height, image_width):
        """Generates anchor boxes for all the feature maps of the feature pyramid.
        Arguments:
          image_height: Height of the input image.
          image_width: Width of the input image.
        Returns:
          anchor boxes for all the feature maps, stacked as a single tensor
            with shape `(total_anchors, 4)`
        """
        anchors = [
            self._get_anchors(
                tf.math.ceil(image_height / 2 ** i),
                tf.math.ceil(image_width / 2 ** i),
                i,
            )
            for i in range(self.level_start, self.level_end)
        ]
        return tf.concat(anchors, axis=0)
    
    def get_anchors_check(self, image_height, image_width):
        """Generates anchor boxes for all the feature maps of the feature pyramid.
        Arguments:
          image_height: Height of the input image.
          image_width: Width of the input image.
        Returns:
          anchor boxes for all the feature maps, stacked as a single tensor
            with shape `(total_anchors, 4)`
        """
        anchors = [
            self._get_anchors(
                tf.math.ceil(image_height / 2 ** i),
                tf.math.ceil(image_width / 2 ** i),
                i,
            )
            for i in range(self.level_start, self.level_end)
        ]
        return anchors

In [None]:
padded_image_shape

In [None]:
def shuffle_pixel_lr(net, h, w, c):
    # net # (m, h, w, c)    
    n0, n1 = tf.split(net, 2, -1)
    n01 = tf.stack((n0, n1), 3) # (m, h, w, 2, c)    
    out = tf.reshape(n01, [-1, h, w*2, c//2])
    return out

def shuffle_pixel_2x(net, h, w, c):
    # net # (m, h, w, c)
    #n0, n1 = tf.split(net, 2, -1)
    n0, n1, n2, n3 = tf.split(net, 4, -1)
    n01 = tf.stack((n0, n1), 3) # (m, h, w, 2, c)
    n23 = tf.stack((n2, n3), 3) # (m, h, w, 2, c)

    n01234 = tf.stack((n01, n23), 2)# (m, h, 2, w, 2, c)
    out = tf.reshape(n01234, [-1, h*2, w*2, c//4])
    return out


def shuffle_pixel_3x(net, h, w):
    # net # (m, h, w, c)
    #n0, n1 = tf.split(net, 2, -1)
    n0, n1, n2, n3, n4, n5, n6, n7, n8 = tf.split(net, 9, -1)
    r0 = tf.stack((n0, n4, n1), 3) # (m, h, w, 2, c)
    r1 = tf.stack((n5, n6, n7), 3) # (m, h, w, 2, c)
    r2 = tf.stack((n2, n8, n3), 3) # (m, h, w, 2, c)

    r = tf.stack((r0, r1, r2), 2)# (m, h, 2, w, 2, c)
    out = tf.reshape(r, [-1, h*3, w*3, 1])
    return out


def shuffle_pixel_4x(net):
    # net # (m, h, w, c)
    #n0, n1 = tf.split(net, 2, -1)
    net_split = tf.split(net, 16, -1)
    r0 = tf.stack(net_split[0:4], 3) # (m, h, w, 2, c)
    r1 = tf.stack(net_split[4:8], 3) # (m, h, w, 2, c)
    r2 = tf.stack(net_split[8:12], 3)
    r3 = tf.stack(net_split[12:16], 3)

    r = tf.stack((r0, r1, r2, r3), 2)# (m, h, 4, w, 2, c)
    #out = tf.reshape(r, [-1, h*4, w*4, 1])
    out = tf.reshape(r, [-1])
    return out


def shuffle_pixel_5x(net, h, w, c=1):
    # net # (m, h, w, c)
    k = 5
    net_split = tf.split(net, k*k, -1)
    r0 = tf.stack(net_split[k * 0:k * 1], 3)
    r1 = tf.stack(net_split[k * 1:k * 2], 3)
    r2 = tf.stack(net_split[k * 2:k * 3], 3)
    r3 = tf.stack(net_split[k * 3:k * 4], 3)
    r4 = tf.stack(net_split[k * 4:k * 5], 3)

    r = tf.stack((r0, r1, r2, r3, r4), 2)
    out = tf.reshape(r, [-1, h*k, w*k, c])
    return out

In [None]:
def image_color_augment(x):
    if tf.random.uniform(()) < -0.5:
        x_max = tf.reduce_max(x, [1, 2], True)
        x = x_max - x
    if tf.random.uniform(()) < -0.2:
        r, g, b = tf.split(x, 3, axis=-1)
        x = tf.concat((r, b, g), -1)
    elif tf.random.uniform(()) < -0.4:
        r, g, b = tf.split(x, 3, axis=-1)
        x = tf.concat((b, r, g), -1)
    if tf.random.uniform(()) < 0.2:
        x = tf.image.random_hue(x, 0.08/2)
        x = tf.image.random_saturation(x, 0.7, 1.3)
    if tf.random.uniform(()) < 0.2:
        x = tf.image.random_brightness(x, 0.05)
        x = tf.image.random_contrast(x, 0.8, 1.2)
    if tf.random.uniform(()) < 0.2:
        gray = tf.image.rgb_to_grayscale(x)
        x = tf.concat((gray, gray, gray), -1)        
   
    if tf.random.uniform(()) < -0.2:
        x = gaussian_filter2d(x, filter_shape=tuple(np.random.randint(1, 10, (2))), sigma=10)
        #x = gaussian_filter2d(x, filter_shape=np.random.randint(3, 10, (2)), sigma=10)
    if tf.random.uniform(()) < 0.2:        
        x = sharpness(x, factor=10)    
    
    return x

In [None]:
padded_image_shape

In [None]:
def random_flip_horizontal(image, boxes):
    """Flips image and boxes horizontally with 50% chance
    Arguments:
      image: A 3-D tensor of shape `(height, width, channels)` representing an
        image.
      boxes: A tensor with shape `(num_boxes, 4)` representing bounding boxes,
        having normalized coordinates.
    Returns:
      Randomly flipped image and boxes
    """
    if tf.random.uniform(()) > 0.5:
        image = tf.image.flip_left_right(image)
        boxes = tf.stack([1 - boxes[:, 2], boxes[:, 1], 1 - boxes[:, 0], boxes[:, 3]], axis=-1)
   
    return image, boxes


def resize_and_pad_image(
    image, min_side=512.0, jitter=[128*4, 128*4+1], stride=128.0):
   
    image_shape = tf.cast(tf.shape(image)[:2], dtype=tf.float32)
    
    ratio = min_side / tf.reduce_min(image_shape)
    
    image_shape = ratio * image_shape
    image = tf.image.resize(image, tf.cast(image_shape, dtype=tf.int32))
    
    image = tf.image.pad_to_bounding_box(image, 0, 0, padded_image_shape[0], padded_image_shape[1]) 
    
    return image, image_shape, ratio

def resize_and_pad_image_input(
    image, bbox, kp, jitter=[padded_image_shape[0]-64, padded_image_shape[0]+1]):
    
    image_shape = tf.cast(tf.shape(image)[:2], dtype=tf.float32)    
    dst_h = tf.random.uniform((), jitter[0], jitter[1], dtype=tf.float32)
    dst_w = tf.random.uniform((), jitter[0], jitter[1], dtype=tf.float32)
    dst_shape = tf.cast((dst_h, dst_w), tf.int32)
    ratio_h = dst_h / image_shape[0]
    ratio_w = dst_w / image_shape[1]    
    
    image = tf.image.resize(image, dst_shape)        
    image = tf.image.pad_to_bounding_box(image, 0, 0, padded_image_shape[0], padded_image_shape[1])
            
    bbox_padded = tf.stack(
        [
            bbox[:, 0] * ratio_w,
            bbox[:, 1] * ratio_h,
            bbox[:, 2] * ratio_w,
            bbox[:, 3] * ratio_h,
        ], axis=-1,
    )
    
    kp_padded = tf.stack(
        [
            kp[:, :, 0] * ratio_w,
            kp[:, :, 1] * ratio_h,
            kp[:, :, 2] * 1            
        ], axis=-1,
    )
    
    return image, bbox_padded, kp_padded

In [None]:
"""
## Preprocessing data
Preprocessing the images involves two steps:
- Resizing the image: Images are resized such that the shortest size is equal
to 800 px, after resizing if the longest side of the image exceeds 1333 px,
the image is resized such that the longest size is now capped at 1333 px.
- Applying augmentation: Random scale jittering  and random horizontal flipping
are the only augmentations applied to the images.
Along with the images, bounding boxes are rescaled and flipped if required.
"""

def unnormalize_box(bbox, image_shape):
    img_h = image_shape[0]
    img_w = image_shape[1]
    bbox = tf.stack(
        [
            bbox[:, 0] * img_w,
            bbox[:, 1] * img_h,
            bbox[:, 2] * img_w,
            bbox[:, 3] * img_h,
        ],
        axis=-1,
    )
    bbox = convert_to_xywh(bbox)    
    return bbox    


def unnormalize_keypoint(keypoint, image_shape):
    img_h = image_shape[0]
    img_w = image_shape[1]
    keypoint_un = tf.stack(
        [
            keypoint[:, :, 0] * img_w,
            keypoint[:, :, 1] * img_h,
            keypoint[:, :, 2]
        ],
        axis=-1,
    )    
    
    keypoint_un = tf.cast(keypoint_un, tf.float32)
    return keypoint_un


def resize_image(image, method='bilinear'):
    image_shape = tf.cast(tf.shape(image)[0:2], tf.float32)    
    resized_ratio = tf.cast(padded_image_shape / image_shape, tf.float32)
    
    image = tf.image.resize(image, padded_image_shape, method)    
    return image, resized_ratio

def flip_keypoint(kp, img_w):
    #keypoint_3d (n, 17, 3)
    kp_nose = kp[:, 0:1]
    kp_left = kp[:, 1::2]
    kp_right = kp[:, 2::2]        
    kp_nose = tf.stack((img_w - kp_nose[:, :, 0], kp_nose[:, :, 1], kp_nose[:, :, 2]), -1)
    kp_left = tf.stack((img_w - kp_left[:, :, 0], kp_left[:, :, 1], kp_left[:, :, 2]), -1)
    kp_right = tf.stack((img_w - kp_right[:, :, 0], kp_right[:, :, 1], kp_right[:, :, 2]), -1)
    
    kp_rl = tf.stack((kp_right, kp_left), 2)
    kp_rl = tf.reshape(kp_rl, [-1, NUM_KEYPOINT-1, NUM_KEYPOINT_CH])
    kp_flip = tf.concat((kp_nose, kp_rl), 1)
    return kp_flip

def flip_displace(displace):
    displace_lr = displace[:, ::-1]
    dr_x, dr_y, dg_x, dg_y = tf.split(displace_lr, 4, -1)
    displace_out = tf.concat((-1 * dr_x, dr_y, -1 * dg_x, dg_y), -1)    
    return displace_out

def random_flip_horizontal_inputs(image, boxes, kp):

    image = tf.image.flip_left_right(image)
    img_h = tf.shape(image)[0]
    img_w = tf.shape(image)[1]
    img_h = tf.cast(img_h, tf.float32)
    img_w = tf.cast(img_w, tf.float32)
    boxes = tf.stack([img_w - boxes[:, 2], boxes[:, 1], img_w - boxes[:, 0], boxes[:, 3]], axis=-1)
    kp = flip_keypoint(kp, img_w)    

    return image, boxes, kp

def random_attach(image, boxes, cls, kp, displace):
    
    boxes_w = boxes[:, 2] - boxes[:, 0]
    boxes_h = boxes[:, 3] - boxes[:, 1]
    boxes_diag = tf.sqrt(tf.square(boxes_w) + tf.square(boxes_h))
    if tf.reduce_max(boxes_diag) > 128:
        image_shape = tf.cast(tf.shape(image)[0:2]//2, tf.int32)
        image_h2 = tf.cast(image_shape[0], tf.float32)
        image_w2 = tf.cast(image_shape[1], tf.float32)
        image_s = tf.image.resize(image, image_shape)
        image_color_shuffle = tf.stack((image_s[:,:,0],image_s[:,:,2],image_s[:,:,1]), -1)
        image_s2 = tf.concat((image_s, image_color_shuffle), axis=0)
        
        boxes_0 = boxes/2
        boxes_1 = tf.stack((boxes_0[:, 0], image_h2+boxes_0[:, 1],boxes_0[:, 2],image_h2+boxes_0[:, 3]), -1)
       
        boxes_01 = tf.concat((boxes_0, boxes_1), 0)
                
        kp0 = tf.stack((kp[:,:,0]/2, kp[:,:,1]/2, kp[:,:,2]), -1)
        kp1 = tf.stack((kp0[:,:,0], image_h2 + kp0[:,:,1], kp0[:,:,2]), -1)
       
        kp01 = tf.concat((kp0, kp1), 0)
        
        displace_s = tf.image.resize(displace, image_shape, 'nearest')   
        displace_s2 = tf.concat((displace_s, displace_s), axis=0)
              
        image_s2_flip, boxes_01_flip, kp01_flip, displace_s2_flip = random_flip_horizontal_inputs(image_s2, boxes_01, kp01, displace_s2)
        
        boxes_01_flip = tf.stack((image_w2 + boxes_01_flip[:, 0],boxes_01_flip[:, 1],image_w2 +boxes_01_flip[:, 2],boxes_01_flip[:, 3]), -1)
        kp01_flip = tf.stack((image_w2 + kp01_flip[:,:,0], kp01_flip[:,:,1], kp01_flip[:,:,2]), -1)
        image = tf.concat((image_s2, image_s2_flip), axis=1)
        image = tf.cast(image, tf.uint8)
        boxes = tf.concat((boxes_01, boxes_01_flip), 0)
        kp = tf.concat((kp01, kp01_flip), 0)
        displace = tf.concat((displace_s2, displace_s2_flip), axis=1)
        cls = tf.repeat(cls, 4, axis=0)
        
    return image, boxes, cls, kp, displace

def preprocess_data(image, label):     
     
    #label (m,56)
    bbox_norm = label[:, :4]    
    cls = label[:, 4]
    keypoints = label[:, 5:]
    keypoint_3d = tf.reshape(keypoints, [-1, NUM_KEYPOINT, NUM_KEYPOINT_CH])    
    keypoint_3d = tf.cast(keypoint_3d, tf.float32)
    
    #if tf.random.uniform(()) > 10.5: image, bbox_norm, cls, keypoint_3d, displace = random_attach(image, bbox_norm, cls, keypoint_3d, displace)
        
    image, bbox_norm, keypoint_3d = resize_and_pad_image_input(image, bbox_norm, keypoint_3d)
    if tf.random.uniform(()) > 0.5:
        image, bbox_norm, keypoint_3d = random_flip_horizontal_inputs(image, bbox_norm, keypoint_3d)             
    
    #image, image_shape, _, bbox = resize_and_pad_image_bbox(image, bbox)    
    #image, image_shape, _ = resize_and_pad_image(image)
        
    image, image_shape = resize_image(image)        
    
    bbox_unnorm = unnormalize_box(bbox_norm, image_shape)
    keypoints_unnorm = unnormalize_keypoint(keypoint_3d, image_shape)
    #print('keypoints_unnorm', keypoints_unnorm)
    
    return image, bbox_unnorm, cls, keypoints_unnorm
    

In [None]:
tf.reduce_mean(tf.random.uniform((100,10)))

In [None]:
NUM_KEYPOINT, NUM_KEYPOINT_CH

In [None]:
channel_label, NUM_KEYPOINT, NUM_KEYPOINT_CH

In [None]:
y0 = list_y_train[0]
y0.shape, y0

In [None]:
def angle_to_radian(angle):
    return angle * np.pi/180

def box_convert_to_xywh(boxes):
    return np.concatenate(
        [(boxes[..., :2] + boxes[..., 2:]) / 2.0, boxes[..., 2:] - boxes[..., :2]], axis=-1,)

def box_convert_to_corners(boxes):    
    return np.concatenate(
        [boxes[..., :2] - boxes[..., 2:] / 2.0, boxes[..., :2] + boxes[..., 2:] / 2.0], axis=-1,)

def convert_theta(box_xy, angle, img_h, img_w):
    img_size = np.array([[img_w, img_h]]).astype(np.float)
    box_xy_norm = box_xy / img_size    
    box_uv = (np.reshape(box_xy_norm, [-1, 2]) - 0.5) * 2
    
    scale_mat = np.array([1, 0, 0, 1.0*img_h/img_w]).reshape((2,2))
    scale_mat_rev = np.array([1, 0, 0, 1.0*img_w/img_h]).reshape((2,2))
    
    radian = angle_to_radian(angle)        
    rotate_mat = np.array([np.cos(radian), -np.sin(radian), np.sin(radian), np.cos(radian)])        
    rotate_mat = np.reshape(rotate_mat, (2, 2))
    
    box_uv_trans = np.matmul(box_uv, scale_mat)
    box_uv_trans = np.matmul(box_uv_trans, rotate_mat)
    box_uv_trans = np.matmul(box_uv_trans, scale_mat_rev)
    box_trans = (box_uv_trans + 1)/2
    box_trans_xy = np.reshape(box_trans, [-1, 2]) * img_size
    return box_trans_xy

def rotate_box(box, angle, img_h, img_w):
    box_other = box[:, 4:]
    box_xywh = box_convert_to_xywh(box[:, :4])
    box_xy = box_xywh[:, :2] 
    box_wh = box_xywh[:, 2:4]
    
    box_trans_xy = convert_theta(box_xy, angle, img_h, img_w)
    
    box_trans_xywh = np.concatenate((box_trans_xy, box_wh), axis=1)
    box_trans = box_convert_to_corners(box_trans_xywh)
    out = np.concatenate((box_trans, box_other), -1)
    return out

def rotate_keypoint(kp, angle, img_h, img_w):
    
    kp_xy = kp[:, :, :2] 
    kp_vis = kp[:, :, 2:]     

    kp_xy = convert_theta(kp_xy, angle, img_h, img_w)
    kp_xy = np.reshape(kp_xy, [-1, NUM_KEYPOINT, 2])
    kp_3d = np.concatenate((kp_xy, kp_vis), axis=-1)    
    return kp_3d

In [None]:
list_y_train[0][:,:4], rotate_box(list_y_train[0], 10, 10, 10)[:, :4]

In [None]:
keypoints = list_y_train[0][:, 5:]
keypoint_3d = np.reshape(keypoints, [-1, NUM_KEYPOINT, NUM_KEYPOINT_CH])    
keypoint_3d

In [None]:
rotate_keypoint(keypoint_3d, 1, 512, 512)

In [None]:
list_x_train[0].shape

In [None]:
is_rotate_aug = True
def generator():
    #ind = np.arange((len(list_x_train)//2)*2)
    #ind = np.arange(len(list_x_train))    
    for i in range(len(list_x_train)):
        x = list_x_train[i]
        y = list_y_train[i]        
        if is_rotate_aug:            
            angle = 20 * (np.random.rand() - 0.5) #0~1 > -0.5~0.5
            if np.abs(angle) > 0.1:                
                x_rotated = Image.fromarray(x).rotate(angle)
                x = np.array(x_rotated)
                box = y[:, :5]
                #cls = y[:, 4:5]
                keypoints = y[:, 5:]
                keypoint_3d = np.reshape(keypoints, [-1, NUM_KEYPOINT, NUM_KEYPOINT_CH])                                                    
                img_h, img_w, img_c = x.shape
                box_trans = rotate_box(box, angle, img_h, img_w)            
                keypoint_trans = rotate_keypoint(keypoint_3d, angle, img_h, img_w)
                keypoint_trans = np.reshape(keypoint_trans, [-1, NUM_KEYPOINT * NUM_KEYPOINT_CH])
                y = np.concatenate((box_trans, keypoint_trans), -1)
        yield (x, y)

def generator_test():    
    for i in range(len(list_x_test)):
        x = list_x_test[i]
        y = list_y_test[i]
        yield (x, y)

dataset = tf.data.Dataset.from_generator(
    generator, 
    output_types=(tf.uint8, tf.float32), 
    output_shapes=(tf.TensorShape([None, None, 3]), tf.TensorShape([None, channel_label])))
dataset_test = tf.data.Dataset.from_generator(
    generator_test, 
    output_types=(tf.uint8, tf.float32), 
    output_shapes=(tf.TensorShape([None, None, 3]), tf.TensorShape([None, channel_label])))

np.set_printoptions(precision=2)
for example in tfds.as_numpy(dataset):
    image = example[0]
    label = example[1]
    
    print(image.dtype, image.shape, label.shape)
    print(label[0])
    #print(label[0][-NUM_KEYPOINT*3::3])
    break

In [None]:
#전처리 확인
train_dataset = dataset.map(preprocess_data)
for x, yb, yc, yk in train_dataset:
    print('yb', yb)
    print('yc', yc)
    print('yk', yk)    
    break

In [None]:
def coordinate_map_uv(h, w):
    #return (6, 18, 256)
    x = tf.range(0.5, w, 1) / tf.cast(w, tf.float32) * 2.0 -1
    y = tf.range(0.5, h, 1) / tf.cast(h, tf.float32) * 2.0 -1    
    X, Y = tf.meshgrid(x, y)
    xy = tf.stack((X, Y), -1)
    xy = tf.expand_dims(xy, axis=0)   
    return xy 

def coordinate_map(h, w):
    #return (h, w, 2)    
    x = tf.range(w)
    y = tf.range(h)
    X, Y = tf.meshgrid(x, y)
    xy = tf.stack((X, Y), -1)
    xy = tf.cast(xy, tf.float32) + 0.5
    return xy 


def coordinate_map_norm(h, w):
    #return (h, w, 2)    
    x = (tf.range(w, dtype=tf.float32) + 0.5) / w
    y = (tf.range(h, dtype=tf.float32) + 0.5) / h
    X, Y = tf.meshgrid(x, y)
    xy = tf.stack((X, Y), -1)
    xy = tf.cast(xy, tf.float32)
    return xy 


def add_map(net):
    shape = tf.shape(net)
    map_norm = coordinate_map(shape[1], shape[2])            
    map_norm = tf.expand_dims(map_norm, 0)
    #net = tf.concat((map_norm + net[:, :, :, :2], net[:, :, :, 2:]), -1)    
    net = tf.concat((net[:, :, :, :-2], net[:, :, :, -2:] + map_norm), -1)    
    return net

In [None]:
def affine_grid_generator(height, width, theta):
    num_batch = tf.shape(theta)[0]
    
    x = tf.linspace(-1.0, 1.0, width)
    y = tf.linspace(-1.0, 1.0, height)
    x_t, y_t = tf.meshgrid(x, y)
    x_t_flat = tf.reshape(x_t, [-1])
    y_t_flat = tf.reshape(y_t, [-1])

    ones = tf.ones_like(x_t_flat)
    sampling_grid = tf.stack([x_t_flat, y_t_flat, ones])  # (3, h*w)
    sampling_grid = tf.expand_dims(sampling_grid, axis=0)
    # sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1]))#(num_batch, 3, h*w)
    sampling_grid = tf.tile(sampling_grid, [num_batch, 1, 1])  # (num_batch, 3, h*w)
    theta = tf.cast(theta, tf.float32)
    sampling_grid = tf.cast(sampling_grid, tf.float32)

    batch_grids = tf.matmul(theta, sampling_grid)  # (m, 2, 3)@(m, 3, h*w)=(m,2,h*w)
    batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width])
    return batch_grids


def get_pixel_value(img, x, y):
    # img (m,h,w,c)
    # x,y (m,h,w)
    shape = tf.shape(x)
    m = shape[0]
    h = shape[1]
    w = shape[2]
    batch_idx = tf.range(0, m)
    batch_idx = tf.reshape(batch_idx, [m, 1, 1])
    b = tf.tile(batch_idx, [1, h, w])

    indices = tf.stack([b, y, x], axis=3)  # (m,h,w,3)

    return tf.gather_nd(img, indices)


def bilinear_sampler(img, batch_grids):
    # batch_grids (m, 2, h, w)
    # img (m,h,w,c)
    uv_x = batch_grids[:, 0]
    uv_y = batch_grids[:, 1]
    H = tf.shape(img)[1]
    W = tf.shape(img)[2]
    max_y = tf.cast(H - 1, tf.float32)
    max_x = tf.cast(W - 1, tf.float32)
    # x [-1, 1]
    x = 0.5 * ((uv_x + 1.0) * max_x)
    y = 0.5 * ((uv_y + 1.0) * max_y)

    # grab 4 nearest corner points for each (x_i, y_i)
    x0 = tf.floor(x)  # precision bad?
    x1 = x0 + 1
    y0 = tf.floor(y)
    y1 = y0 + 1

    # clip out of boundary index
    x0 = tf.clip_by_value(x0, 0, max_x)
    x1 = tf.clip_by_value(x1, 0, max_x)
    y0 = tf.clip_by_value(y0, 0, max_y)
    y1 = tf.clip_by_value(y1, 0, max_y)

    # deltas
    wa = (x1 - x) * (y1 - y)
    wb = (x1 - x) * (y - y0)
    wc = (x - x0) * (y1 - y)
    wd = (x - x0) * (y - y0)

    wa = tf.expand_dims(wa, -1)
    wb = tf.expand_dims(wb, -1)
    wc = tf.expand_dims(wc, -1)
    wd = tf.expand_dims(wd, -1)

    x0 = tf.cast(x0, tf.int32)
    x1 = tf.cast(x1, tf.int32)
    y0 = tf.cast(y0, tf.int32)
    y1 = tf.cast(y1, tf.int32)

    Ia = get_pixel_value(img, x0, y0)
    Ib = get_pixel_value(img, x0, y1)
    Ic = get_pixel_value(img, x1, y0)
    Id = get_pixel_value(img, x1, y1)

    out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])

    return out

def grid_generator(height, width, theta):
    num_batch = tf.shape(theta)[0]
    
    x = tf.linspace(-1.0, 1.0, width)
    y = tf.linspace(-1.0, 1.0, height)
    x_t, y_t = tf.meshgrid(x, y)
    x_t_flat = tf.reshape(x_t, [-1])
    y_t_flat = tf.reshape(y_t, [-1])

    ones = tf.ones_like(x_t_flat)
    z = tf.ones_like(x_t_flat)
    sampling_grid = tf.stack([x_t_flat, y_t_flat, ones])  # (3, h*w)
    sampling_grid = tf.expand_dims(sampling_grid, axis=0)    
    sampling_grid = tf.tile(sampling_grid, [num_batch, 1, 1])  # (num_batch, 3, h*w)
    theta = tf.cast(theta, tf.float32)
    sampling_grid = tf.cast(sampling_grid, tf.float32)

    batch_grids = tf.matmul(theta, sampling_grid)  # (m, 3, 3)@(m, 4, h*w)=(m,3,h*w)
    #print('batch_grids', batch_grids)
    w = batch_grids[:, -1:]    
    #print('w','mean', tf.reduce_mean(w), w)
    
    batch_grids = batch_grids[:, :2]/w
    batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width])
    return batch_grids


def sampling(net, theta, dst_h, dst_w):
    
    #theta = tf.reshape(theta, [-1, 3, 3])
    #theta = theta[:, :2]
    #h = tf.shape(net)[1]
    #w = tf.shape(net)[2]
    #batch_grids = affine_grid_generator(dst_h, dst_w, theta)
    batch_grids = grid_generator(dst_h, dst_w, theta)
    out = bilinear_sampler(net, batch_grids)
    return out

In [None]:
def generate_displace_map(boxes, keypoints, x_h, x_w):
    #gt_boxes[i], keypoints[i], images_shape[1], images_shape[2]  
    #boxes = y[:, :4]
    #keypoints = y[:, 5:].reshape((-1, NUM_KEYPOINT, NUM_KEYPOINT_CH)).astype(np.int)# 키포인트 별로      
    keypoints_xy = keypoints[:, :, :2]
    keypoints_cls = keypoints[:, :, 2]
    visible = keypoints_cls > VISIBILITY_OCCLUDED
    #print('keypoints', keypoints)

    #boxes_wh = boxes[:, 2:4] - boxes[:, 0:2]
    #boxes_cxy = (boxes[:, 2:4] + boxes[:, 0:2]) / 2    
    boxes_cxy = boxes[:, 0:2]
    boxes_wh = boxes[:, 2:4]

    #box_cxy:(n, 2), kp_xy:(n, k, 2), k = 17
    #displace : (n, k, 2)
    #displace_kp_to_root : (n, k, 2) - (n, 1, 2) = (n, k, 2)
    #displace_kp_norm : (n, k, 2) / (n, 1, 2) = (n, k, 2)        
    roots = tf.expand_dims(boxes_cxy, 1)
    x_size = tf.cast(tf.reshape([x_w, x_h], [1, 1, 2]), tf.float32)
    displace_kp_to_root_norm = (keypoints_xy - roots) / tf.expand_dims(boxes_wh, 1) #(n, k, 2)
    displace_kp_to_root_norm_global = (keypoints_xy - roots) / x_size #(n, k, 2)

    #heatmap_mask = |coord_map - kp| < disk radius : (h, w, 1, 2) - (1, 1, nk, 2)      
    coord_map = coordinate_map(x_h, x_w)#(h, w, 2)
    #kp_coord_distance = coord_map - kp : (h, w, 1, 2) - (1, 1, nk, 2) = (h, w, nk, 2)
    kp_coord_distance_xy = tf.expand_dims(coord_map, 2) - tf.reshape(keypoints_xy, [1, 1, -1, 2])
    kp_coord_distance = tf.sqrt(tf.reduce_sum(tf.square(kp_coord_distance_xy), -1))#(h, w, nk)
    #print('kp_coord_distance', kp_coord_distance.shape)
    disk_radius_rel = tf.reshape(tf.reduce_min(boxes_wh, -1), [1, 1, -1, 1])*0.05
    #disk_radius = 10

    kp_coord_distance_4d = tf.reshape(kp_coord_distance, [x_h, x_w, -1, NUM_KEYPOINT])#(h,w,n,k)
    heatmap_mask_kp = kp_coord_distance_4d < disk_radius_rel
    heatmap_mask_kp = tf.expand_dims(heatmap_mask_kp, -1)
    heatmap_mask_kp = tf.reshape(heatmap_mask_kp, [x_h, x_w, -1, 1])

    #heatmap_mask_kp = np.expand_dims((kp_coord_distance < disk_radius).astype(np.float32), -1)
    visible_4d = tf.reshape(visible, [1, 1, -1, 1])

    heatmap_mask_kp_valid = tf.cast(heatmap_mask_kp, tf.float32) * tf.cast(visible_4d, tf.float32)
    #(h, w, nk, 1) * (1, 1, nk, 1) * (1, 1, nk, 2)
    displace_kp = heatmap_mask_kp_valid * tf.reshape(displace_kp_to_root_norm, [1, 1, -1, 2])#(h, w, nk, 2)
    displace_kp_global = heatmap_mask_kp_valid * tf.reshape(displace_kp_to_root_norm_global, [1, 1, -1, 2])#(h, w, nk, 2)
    #print('displace_kp', displace_kp.shape)        

    displace_kp_max = tf.reduce_max(displace_kp, 2)
    displace_kp_min = tf.reduce_min(displace_kp, 2)
    displace_kp = tf.where(tf.abs(displace_kp_min) > displace_kp_max, displace_kp_min, displace_kp_max)

    displace_kp_global_max = tf.reduce_max(displace_kp_global, 2)
    displace_kp_global_min = tf.reduce_min(displace_kp_global, 2)
    displace_kp_global = tf.where(tf.abs(displace_kp_global_min) > displace_kp_global_max, displace_kp_global_min, displace_kp_global_max)
    
    heatmap_mask = tf.reduce_any(heatmap_mask_kp_valid > 0, 2)        
    heatmap_mask = tf.cast(heatmap_mask, tf.float32)

    displace_map = tf.concat((displace_kp, displace_kp_global), -1) * heatmap_mask
    return displace_map

In [None]:
"""
## Encoding labels
The raw labels, consisting of bounding boxes and class ids need to be
transformed into targets for training. This transformation consists of
the following steps:
- Generating anchor boxes for the given image dimensions
- Assigning ground truth boxes to the anchor boxes
- The anchor boxes that are not assigned any objects, are either assigned the
background class or ignored depending on the IOU
- Generating the classification and regression targets using anchor boxes
"""

class LabelEncoder:
    """Transforms the raw labels into targets for training.
    This class has operations to generate targets for a batch of samples which
    is made up of the input images, bounding boxes for the objects present and
    their class ids.
    Attributes:
      anchor_box: Anchor box generator to encode the bounding boxes.
      box_variance: The scaling factors used to scale the bounding box targets.
    """

    def __init__(self):
        self._anchor_box = AnchorBox()
        self._box_variance = tf.convert_to_tensor(
            [0.1, 0.1, 0.2, 0.2], dtype=tf.float32
        )           
    
    def _match_anchor_boxes(
        self, anchor_boxes, gt_boxes, match_iou=0.3, ignore_iou=0.1
    ):
        # anchor : (anchor_k, 4), gt_boxes : (box_m, 4)
        iou_matrix = compute_iou(anchor_boxes, gt_boxes)#(anchor_k, box_m)        
        max_iou = tf.reduce_max(iou_matrix, axis=1)#from anchor to object-box        
        matched_gt_idx = tf.argmax(iou_matrix, axis=1)    
        positive_mask = tf.greater_equal(max_iou, match_iou)# not only this, but also need max iou cell
        
        positive_proposal_mask = tf.greater_equal(iou_matrix, match_iou)
        positive_mask = tf.reduce_any(positive_proposal_mask, axis=1)        
        negative_mask = tf.less(max_iou, ignore_iou)
        
        max_iou_anchor = tf.reduce_max(iou_matrix, axis=0, keepdims=True)
        max_iou_anchor_mask = tf.greater_equal(iou_matrix, max_iou_anchor)
        
        positive_max_mask = tf.reduce_any(max_iou_anchor_mask, axis=1)
        positive_mask = tf.logical_or(positive_mask, positive_max_mask)#new      
        
        negative_mask = tf.logical_and(negative_mask, tf.logical_not(positive_mask))
        ignore_mask = tf.logical_not(tf.logical_or(positive_mask, negative_mask))        
        
        return (
            matched_gt_idx,            
            tf.cast(positive_mask, dtype=tf.float32),
            tf.cast(positive_max_mask, dtype=tf.float32),            
            tf.cast(ignore_mask, dtype=tf.float32),
        )
    
    
    def _compute_box_target(self, anchor_boxes, matched_gt_boxes):
        """Transforms the ground truth boxes into targets for training"""
        target = tf.concat(
            [
                (matched_gt_boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:],
                tf.math.log(matched_gt_boxes[:, 2:] / anchor_boxes[:, 2:]),
            ],
            axis=-1,
        )
        target = target / self._box_variance
        return target
    
    def _compute_keypoint_target(self, anchor_boxes, matched_gt_points):
        """Transforms the ground truth boxes into targets for training"""
        target = (matched_gt_points[:, :, :2] - anchor_boxes[:, None, :2]) / anchor_boxes[:, None, 2:]        
        target = target / self._box_variance[:2]
        return target    
      
    
    def _encode_sample(self, image_shape, gt_boxes, cls_ids, gt_keypoints):
        """Creates box and classification targets for a single sample"""
        
        anchor_boxes = self._anchor_box.get_anchors(image_shape[1], image_shape[2])
        
        matched_gt_idx, positive_mask, positive_max_mask, ignore_mask = self._match_anchor_boxes(
            anchor_boxes, gt_boxes)
        
        gt_keypoints_xy = gt_keypoints[:, :, :2]
        gt_keypoints_cls = gt_keypoints[:, :, 2]       
        
        print('gt_keypoints_xy', gt_keypoints_xy)
        
        matched_gt_boxes = tf.gather(gt_boxes, matched_gt_idx)
        matched_gt_keypoint_xy = tf.gather(gt_keypoints_xy, matched_gt_idx)
        
        box_target = self._compute_box_target(anchor_boxes, matched_gt_boxes)
        keypoint_xy_target = self._compute_keypoint_target(anchor_boxes, matched_gt_keypoint_xy)
                
        matched_gt_cls_ids = tf.gather(cls_ids, matched_gt_idx)
        matched_gt_keypoint_cls = tf.gather(gt_keypoints_cls, matched_gt_idx)
                
        cls_target = tf.where(tf.not_equal(positive_mask, 1.0), 0.0, matched_gt_cls_ids)        
        cls_target = tf.where(tf.equal(ignore_mask, 1.0), -1.0, cls_target)
                
        #[?,4], [?,?], [?,17,2], []
        keypoint_xy_target = tf.reshape(keypoint_xy_target, [-1, NUM_KEYPOINT * 2])
        keypoint_cls_target = tf.reshape(matched_gt_keypoint_cls, [-1, NUM_KEYPOINT])
                
        cls_target = tf.expand_dims(cls_target, axis=-1)
        targets = tf.concat([box_target, cls_target, keypoint_xy_target, keypoint_cls_target], axis=-1)
        return targets
    
    
    def _encode_keypoint_heatmap(self, img_h, img_w, keypoints):
        #keypoints (n, 17, 3)
        #return gt_map (h, w)        
        neighbor_k = 1
        keypoints_xy = keypoints[:, :, :2]
        keypoints_visible = keypoints[:, :, 2]
        keypoints_cls = tf.zeros_like(keypoints_visible) + tf.range(NUM_KEYPOINT, dtype=tf.float32)
                
        xy_map = coordinate_map(img_h, img_w)           
        
        keypoints_xy_5d = tf.reshape(keypoints_xy, [1, 1, -1, NUM_KEYPOINT, 2])#(1, 1, n, 17, 2)        
        keypoints_visible_5d = tf.reshape(keypoints_visible, [1, 1, -1, NUM_KEYPOINT, 1])
        keypoints_visible_4d_mask = tf.cast(keypoints_visible_5d[:,:,:,:,0] > VISIBILITY_NOT_EXIST, tf.float32)
                
        keypoints_xy_5d_to_find_min = keypoints_xy_5d + 5000 * tf.cast(keypoints_visible_5d < 1,tf.float32)
        visible_max_xy = tf.reduce_max(keypoints_xy_5d, 3, True)
        visible_min_xy = tf.reduce_min(keypoints_xy_5d_to_find_min, 3, True)
        visible_xy_range = tf.abs(visible_max_xy - visible_min_xy)#(1, 1, n, 1, 2)
        
        #neighbor_k_relative = neighbor_k + 0.03 * tf.reduce_mean(visible_xy_range, -1)#(1, 1, n, 1)
        neighbor_k_relative = neighbor_k + 0.02 * tf.reduce_mean(visible_xy_range, -1) * keypoints_visible_4d_mask
        #neighbor_k_relative = neighbor_k_relative + (2 * keypoints_visible_4d_mask)
        
        xy_map_exp = tf.expand_dims(tf.expand_dims(xy_map, 2), 2)
        distance_xy = xy_map_exp - keypoints_xy_5d#(h, w, 1, 1, 2) - (1, 1, n, 17, 2)               
        distance = tf.sqrt(tf.reduce_sum(tf.square(distance_xy), -1))#(h, w, n, 17)
        
        #(h, w, n, 17)
        heatmap_visible = tf.cast(distance < neighbor_k_relative, tf.float32) * keypoints_visible_5d[:,:,:,:,0]
        heatmap_visible = tf.cast(tf.reduce_max(heatmap_visible, [-2, -1]), tf.int32)
        
        disk_mask = tf.cast(distance < neighbor_k_relative, tf.float32) * keypoints_visible_4d_mask
        bg_zero = tf.zeros_like(distance[:,:,:,:1])
                
        disk_exist = tf.reduce_sum(disk_mask, [2, 3], True) > 0
        disk_exist_mask = tf.cast(disk_exist, tf.float32)
        bg_map = disk_exist_mask * 10000 + (1 - disk_exist_mask) * bg_zero
        
        distance_map_with_bg = tf.concat((bg_map, distance), -1)        
        distance_map_with_bg = tf.reshape(distance_map_with_bg, [img_h, img_w, -1]) 
        
        cls_map = tf.argmin(distance_map_with_bg, -1, output_type=tf.int32)#(h, w, n)                
        cls_k = NUM_KEYPOINT + 1
        cls_map = cls_map % cls_k        
        
        out = tf.stack((cls_map, heatmap_visible), -1)
        return out
    
    def convert_box_to_map(self, boxes, kps, img_h, img_w):
        #keypoints (n, 17, 3), box:convert_to_xywh
        #box : xywh[315.802  77.022  16.874  13.665]
        #sampling(net, theta, dst_h, dst_w) #를 이용해서 heatmap 라벨에 -1을 만들기 #todo
        
        visibile = kps[:, :, 2]
        has_kp = tf.reduce_any(visibile > 0, 1, True)
        has_kp = tf.cast(has_kp, tf.float32)
        z = tf.zeros_like(boxes[:, 0])
        boxes_dummy = tf.stack((z+1, z+1, z+3, z+3), -1)
        box_margin = 5
        boxes_wide = tf.concat((boxes[:, 0:2], boxes[:, 2:4] + box_margin), -1)
        boxes = has_kp * boxes_dummy + (1 - has_kp) * boxes_wide
        
        img_wf = tf.cast(img_w, tf.float32)
        img_hf = tf.cast(img_h, tf.float32)
        boxes_norm_xywh = tf.stack((boxes[:, 0]/img_wf, boxes[:, 1]/img_hf, boxes[:, 2]/img_wf, boxes[:, 3]/img_hf), -1)
        
        sx = 1.0 / boxes_norm_xywh[:, 2]
        sy = 1.0 / boxes_norm_xywh[:, 3]        
        tx = (0.5 - boxes_norm_xywh[:, 0]) * 2 # +:left, -:right
        ty = (0.5 - boxes_norm_xywh[:, 1]) * 2# +:top, -:down      
        
        theta_scale = tf.stack((sx, z, z, z, sy, z, z, z, z+1), -1)
        theta_scale = tf.reshape(theta_scale, [-1, 3, 3])         
        theta_translate = tf.stack((z+1, z, tx, z, z+1, ty, z, z, z+1), -1)
        theta_translate = tf.reshape(theta_translate, [-1, 3, 3])         
        m = tf.shape(boxes)[0]
        
        h = img_h
        net = tf.ones((m, h, h, 1), tf.float32)        
        net_transformed = sampling(net, theta_scale, h, h)
        net_transformed = sampling(net_transformed, theta_translate, h, h)        
        net_transformed = tf.squeeze(net_transformed, -1)
        out = tf.reduce_max(net_transformed, 0)        
        return out
    
    def encode_batch(self, batch_images, gt_boxes, cls_ids, keypoints):
        """Creates box and classification targets for a batch"""
        #keypoints(batch_m, n, 17, 3)
        images_shape = tf.shape(batch_images)
        batch_size = images_shape[0]
        
        labels = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
        labels_heatmap = tf.TensorArray(dtype=tf.int32, size=batch_size, dynamic_size=True)
        labels_heatmap_box = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
        label_displace = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
        
        for i in range(batch_size):
            label = self._encode_sample(images_shape, gt_boxes[i], cls_ids[i], keypoints[i])
            labels = labels.write(i, label)
            label_heat = self._encode_keypoint_heatmap(images_shape[1], images_shape[2], keypoints[i])        
            labels_heatmap = labels_heatmap.write(i, label_heat)
            
            heat_box = self.convert_box_to_map(gt_boxes[i], keypoints[i], images_shape[1], images_shape[2])
            labels_heatmap_box = labels_heatmap_box.write(i, heat_box)
            
            _displace = generate_displace_map(gt_boxes[i], keypoints[i], images_shape[1], images_shape[2])
            label_displace = label_displace.write(i, _displace)
        
        batch_images = tf.cast(batch_images, tf.float32)
        labels = labels.stack()
        labels_heatmap = labels_heatmap.stack()
        if True:
            labels_no_kp_mask = labels_heatmap_box.stack() # test
            labels_no_kp_mask = tf.cast(labels_no_kp_mask, tf.int32)
            labels_no_kp_mask = tf.expand_dims(labels_no_kp_mask, -1)
            labels_heatmap = (1 - labels_no_kp_mask) * labels_heatmap + labels_no_kp_mask * -1
        
        label_displace = label_displace.stack()
        labels_heatmap_exp = tf.cast(labels_heatmap, tf.float32)
        heatmap_dispalce = tf.concat((labels_heatmap_exp, label_displace), -1)                
        
        gt = {"detect": labels, "heatmap": heatmap_dispalce, 'heatmap_coord':labels}#dual
        return batch_images, gt
     
    def encode_batch_train(self, batch_images, gt_boxes, cls_ids, keypoints):
        
        batch_images = image_color_augment(batch_images)
        
        return self.encode_batch(batch_images, gt_boxes, cls_ids, keypoints)    

In [None]:
level_start, 2**level_start

In [None]:
label_encoder = LabelEncoder()
anchor_k = len(label_encoder._anchor_box.aspect_ratios)*len(label_encoder._anchor_box.scales)
anchor_k

In [None]:
strategy = tf.distribute.MirroredStrategy()
#strategy = tf.distribute.OneDeviceStrategy(device='/gpu:0')
print("Number of devices: {}".format(strategy.num_replicas_in_sync))

In [None]:
batch_size = 1#min(2 * strategy.num_replicas_in_sync, len(list_x_train))
autotune = tf.data.experimental.AUTOTUNE
batch_size

In [None]:
check_dataset = dataset.map(preprocess_data, num_parallel_calls=autotune)
print('check_dataset', check_dataset)
i = 0
for image, bbox_unnorm, cls, keypoints_unnorm in check_dataset:
    i += 1
    print(image.shape, bbox_unnorm.shape, cls.shape, keypoints_unnorm.shape)
    print('bbox_unnorm', bbox_unnorm)
    print('cls', cls)
    print('keypoints_unnorm', keypoints_unnorm.shape)    
    if i>0:
        break

In [None]:
train_dataset = dataset.map(preprocess_data, num_parallel_calls=autotune)
#train_dataset = train_dataset.shuffle(8 * batch_size)
#train_dataset = train_dataset.padded_batch(batch_size=batch_size, padding_values=(0.0, 1e-8, -1), drop_remainder=False)
train_dataset = train_dataset.padded_batch(batch_size=batch_size)
train_dataset = train_dataset.map(label_encoder.encode_batch_train, num_parallel_calls=autotune)
train_dataset = train_dataset.prefetch(autotune)

In [None]:
def split_gt_heatmap_displace(gt):    
    heatmap = gt[:, :, :, 0]
    heatmap_visible = gt[:, :, :, 1] #{-1:unknown, 0:not visible, 1:occluded, 2:visible}
    displace = gt[:, :, :, 2:]
    return heatmap, heatmap_visible, displace

In [None]:
def split_heatmap_displace(heatmap):
    k = NUM_KEYPOINT + 1
    heatmap_score = heatmap[:, :, :, :k]
    visible_score = heatmap[:, :, :, k:k+3]
    displace = heatmap[:, :, :, -4:]
    
    return heatmap_score, visible_score, displace

In [None]:
i = 0
for x, multi_y in train_dataset:
    i += 1
    #print('multi_y', type(multi_y), multi_y.keys())
    y = multi_y['detect']
    y_heatmap_displace = multi_y['heatmap']    
    y_heatmap, heatmap_visible, y_distance = split_gt_heatmap_displace(y_heatmap_displace)
    print('y_distance', y_distance.shape, tf.reduce_sum(tf.abs(y_distance)))#(2, 512, 512, 2)
    print('heatmap_visible', heatmap_visible.shape, np.unique(np.array(heatmap_visible)))
    
    #print('y_heatmap', tf.shape(y_heatmap), tf.reduce_max(y_heatmap), tf.reduce_max(y_distance))
    #print('y_heatmap', tf.reduce_mean(y_heatmap))
    
    img_map_xy = tf.cast(y_heatmap[0], tf.uint8)
    alpha = 255//(NUM_KEYPOINT+1)
    hitmap_h = img_map_xy * alpha

    ignore_heatmap = y_heatmap[0] < 0
        
    hitmap_img = tf.stack((hitmap_h, hitmap_h, hitmap_h), -1)
    distance_img = tf.abs(y_distance[0,:,:,0])*100
    distance_global_img = tf.abs(y_distance[0,:,:,-1])*100
    fig, ax = plt.subplots(1, 5)    
    ax[0].imshow(tf.cast(x[0], tf.uint8))
    ax[1].imshow(hitmap_img, cmap='gray')
    ax[2].imshow(tf.cast(distance_img,tf.uint8), cmap='gray')
    ax[3].imshow(tf.cast(distance_global_img,tf.uint8), cmap='gray')
    ax[4].imshow(heatmap_visible[0], cmap='gray')
    if i > 2:break
        

In [None]:
show_img = np.stack((y_heatmap[0,:,:], y_distance[0,:,:,1]*2, x[0,:,:,0]/355.0), -1)
plt.imshow(show_img)

In [None]:
plt.imshow(np.array(y_distance[0,:,:,2]))

In [None]:
plt.imshow(np.array(heatmap_visible[0])+1)

In [None]:
plt.imshow(np.array(heatmap_visible[0])+1, cmap='gray')

In [None]:
np.unique(y_heatmap_displace[0,:,:,0])

In [None]:
val_dataset = dataset_test.map(preprocess_data, num_parallel_calls=autotune)
#val_dataset = val_dataset.padded_batch(batch_size=batch_size, padding_values=(0.0, 1e-8, -1), drop_remainder=False)
val_dataset = val_dataset.padded_batch(batch_size=1)
val_dataset = val_dataset.map(label_encoder.encode_batch, num_parallel_calls=autotune)
val_dataset = val_dataset.prefetch(autotune)

In [None]:
def split_gt(y_true):
    #targets = tf.concat([box_target, cls_target, keypoint_xy_target, keypoint_cls_target], axis=-1)
    y_box = y_true[:, :, :4]
    y_cls = y_true[:, :, 4]
    y_keypoint = y_true[:, :, 5:]
    return y_box, y_cls, y_keypoint

In [None]:
#filters = 4 + 1 + num_classes + num_keypoints * (2 + num_keypoint_classes)       
def split_hyperthesis(y_pred):
    h_box = y_pred[:, :, :4]
    h_obj = y_pred[:, :, 4]
    h_cls = y_pred[:, :, 5:5+num_classes]        
    h_keypoint = y_pred[:, :, 5+num_classes:]       
    
    h_obj = tf.nn.sigmoid(h_obj)
    return h_box, h_obj, h_cls, h_keypoint

In [None]:
def split_gt_keypoint(y_keypoint):
    xy = y_keypoint[:, :, :NUM_KEYPOINT*2]
    cls = y_keypoint[:, :, -NUM_KEYPOINT:]
        
    x = xy[:, :, 0::2]
    y = xy[:, :, 1::2]    
    return x, y, cls    

In [None]:
def split_h_keypoint(h_keypoint):    
    h_x = h_keypoint[:, :, :NUM_KEYPOINT]
    h_y = h_keypoint[:, :, NUM_KEYPOINT:NUM_KEYPOINT * 2]
    h_cls_score = h_keypoint[:, :, NUM_KEYPOINT * 2:]
    
    #h_x = tf.tanh(h_x)
    #h_y = tf.tanh(h_y)
    
    keypoints_cls = tf.round(tf.sigmoid(h_cls_score) * 2)
    return h_x, h_y, h_cls_score, keypoints_cls

In [None]:
def _decode_box_predictions(anchor_boxes, box_predictions):        
    boxes = box_predictions * label_encoder._box_variance
    boxes = tf.concat(
        [
            boxes[:, :, :2] * anchor_boxes[:, :, 2:] + anchor_boxes[:, :, :2],
            tf.math.exp(boxes[:, :, 2:]) * anchor_boxes[:, :, 2:],
        ],
        axis=-1,
    )
    boxes_transformed = convert_to_corners(boxes)
    return boxes_transformed

def _decode_keypoint_predictions(anchor_boxes, keypoint_predictions):        
    repeat = tf.constant([NUM_KEYPOINT], tf.int32)
    box_variance_rep = tf.tile(label_encoder._box_variance[:2], repeat)
    keypoints = keypoint_predictions * box_variance_rep
    
    batch_m = tf.shape(keypoints)[0]
    repeat = tf.constant((1, 1, NUM_KEYPOINT))
    
    anchor_xy_tile = tf.tile(anchor_boxes[:, :, :2], repeat)
    anchor_wh_tile = tf.tile(anchor_boxes[:, :, 2:], repeat)
    
    keypoint = keypoints * anchor_wh_tile + anchor_xy_tile
    return keypoint


def decode_debug(images, predictions, 
                      num_classes=num_classes,
                      confidence_threshold=0.5,
                      nms_iou_threshold=0.2,
                      max_detections_per_class=1000,
                      max_detections=1500,
                      box_variance=[0.1, 0.1, 0.2, 0.2]):
    
    _anchor_box = AnchorBox()    
        
    image_shape = tf.cast(tf.shape(images), dtype=tf.float32)
    #anchor_boxes = _anchor_box.get_anchors(image_shape[1], image_shape[2])#free size        
    image_h = padded_image_shape[0]
    image_w = padded_image_shape[1]
    anchor_boxes = _anchor_box.get_anchors(image_h, image_w)
    
    box_predictions, objectness, keypoint_predictions = split_gt(predictions)
    x, y, keypoints_cls = split_gt_keypoint(keypoint_predictions)
    
    m = tf.shape(images)[0]
    keypoints_xy = tf.stack((x, y), -1)
    keypoints_xy = tf.reshape(keypoints_xy, (m, -1, NUM_KEYPOINT * 2))
    
    cls = objectness * 0 + 1
            
    boxes = _decode_box_predictions(anchor_boxes[None, ...], box_predictions)
    keypoints_xy = _decode_keypoint_predictions(anchor_boxes[None, ...], keypoints_xy)
    
    boxes_2d = tf.reshape(boxes, [-1, 4])    
    scores = tf.reshape(objectness, [-1, 1])
    
    cls = tf.reshape(cls, [-1, 1])
    
    keypoints_xy_3d = tf.reshape(keypoints_xy, [-1, NUM_KEYPOINT, 2])
    keypoints_cls_3d = tf.reshape(keypoints_cls, [-1, NUM_KEYPOINT, 1])    
    keypoints_3d = tf.concat((keypoints_xy_3d, keypoints_cls_3d), -1)
    keypoints_2d = tf.reshape(keypoints_3d, [-1, NUM_KEYPOINT * 3])
    
    ccbox = tf.concat((cls, scores, boxes_2d, keypoints_2d), -1)
    ccbox_check = tf.concat((boxes_2d, cls, keypoints_2d), -1)
    
    selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(    
        ccbox[:, 2:2+4],
        ccbox[:, 1],        
        max_detections,
        nms_iou_threshold,
        confidence_threshold,        
    )
    output = tf.gather(ccbox_check, selected_indices)        
    return output   

In [None]:
def decodePredictions(images, predictions, 
                      num_classes=num_classes,
                      confidence_threshold=0.5,
                      nms_iou_threshold=0.2,
                      max_detections_per_class=100,
                      max_detections=150,
                      box_variance=[0.1, 0.1, 0.2, 0.2]):
    
    _anchor_box = AnchorBox()    
        
    image_shape = tf.cast(tf.shape(images), dtype=tf.float32)
    anchor_boxes = _anchor_box.get_anchors(image_shape[1], image_shape[2])#free size        
    #image_h = padded_image_shape[0]
    #image_w = padded_image_shape[1]
    #anchor_boxes = _anchor_box.get_anchors(image_h, image_w)
   
    h_box, objectness, h_cls, h_keypoint = split_hyperthesis(predictions)    
    #h_x, h_y, h_cls_score, keypoints_cls = split_h_keypoint(h_keypoint)
    
    keypoints_xy = tf.stack((h_x, h_y), -1) #keypoint_predictions[:, :, :NUM_KEYPOINT * 2]
    m = tf.shape(images)[0]
    
    keypoints_xy = tf.reshape(keypoints_xy, [m, -1, NUM_KEYPOINT * 2])
    
    cls_score = predictions[:, :, 5:5+num_classes]    
    cls_prob = tf.nn.softmax(cls_score)
    cls_prob_max = tf.reduce_max(cls_prob, -1)
    cls = tf.argmax(cls_score, -1)
    cls = tf.cast(cls, tf.float32)
    cls = predictions[:, :, 4]
            
    boxes = _decode_box_predictions(anchor_boxes[None, ...], h_box)
    keypoints_xy = _decode_keypoint_predictions(anchor_boxes[None, ...], keypoints_xy)
    
    boxes_2d = tf.reshape(boxes, [-1, 4])    
    scores = tf.reshape(objectness, [-1, 1])
    
    cls = tf.reshape(cls, [-1, 1])
    
    keypoints_xy_3d = tf.reshape(keypoints_xy, [-1, NUM_KEYPOINT, 2])
    keypoints_cls_3d = tf.reshape(keypoints_cls, [-1, NUM_KEYPOINT, 1])    
    keypoints_3d = tf.concat((keypoints_xy_3d, keypoints_cls_3d), -1)
    keypoints_2d = tf.reshape(keypoints_3d, [-1, NUM_KEYPOINT * 3])
    
    ccbox = tf.concat((cls, scores, boxes_2d, keypoints_2d), -1)
    ccbox_check = tf.concat((boxes_2d, cls, keypoints_2d), -1)
    
    selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(    
        ccbox[:, 2:2+4],
        ccbox[:, 1],        
        max_detections,
        nms_iou_threshold,
        confidence_threshold,        
    )
    output = tf.gather(ccbox_check, selected_indices)        
    return output   

In [None]:
anchor_check = AnchorBox()
anchors = anchor_check.get_anchors_check(512,512)
anchor_sum = 0
for anchor in anchors:
    print(anchor.shape, anchor[-1], 'sqrt', np.sqrt(anchor.shape[0]/anchor_k))
    anchor_sum +=anchor.shape[0]
print('anchor_sum', anchor_sum)

In [None]:
channel_label

In [None]:
for image, multi_y in val_dataset:
    target = multi_y['detect']
    heatmap = multi_y['heatmap']
    print('image', image.shape)    
    print('target', target.shape)# (1, 12096, 56)    
    detections = decode_debug(image, target, confidence_threshold=0.15, nms_iou_threshold=0.2)
    print('detections', detections.shape)
    print(detections[0])
    target_cp = target[:, :, 4+1:]
    target_cp_xy = target_cp[:, :, :NUM_KEYPOINT*2]
    print('target_cp', target_cp.shape)
    #max tf.Tensor(187.73685, shape=(), dtype=float32) tf.Tensor(-219.20311, shape=(), dtype=float32)
    print('max', tf.reduce_max(target_cp_xy), tf.reduce_min(target_cp_xy))#
    break    

In [None]:
print(list_y_train[0][0].shape)
print(list_y_train[0][0])

# 인코딩 라벨 어디감?
- 65번 가운데 여자(iou thresh 높이면 나옴)
- 73번 가운데 남자
- 76번 할아버지(label positive 낮추면 나옴)


In [None]:
i = 0
for image, multi_y in val_dataset:
    i += 1
    target = multi_y['detect']
    heatmap_dis = multi_y['heatmap']        
    detections = decode_debug(image, target, confidence_threshold=0.25, nms_iou_threshold=0.9)
    print(i, 'detections', detections.shape, 'target', target.shape)    
    kp = detections[:, -NUM_KEYPOINT * NUM_KEYPOINT_CH:]
    kp_cls = kp[:, 2::3]
    print(detections.shape) #양복 2명 할아버지 (7, 56)
    print(detections[:, :5])
    x = np.array(image[0], np.uint8)
    h = np.array(detections)
    
    img = Image.fromarray(x)        
    draw_box_keypoint(img, h, is_show_class=False)
    display(img)    
    

In [None]:
level_start, level_end, 2**level_start, 2**level_end

In [None]:
4 * 2**(level_end-1), 3 * 2**(level_end-1)


### model

In [None]:
netbase = keras.applications.EfficientNetB4(include_top=False, input_shape=[384, 512, 3])
netbase.summary()

In [None]:
def get_backbone():
    backbone = keras.applications.EfficientNetB2(include_top=False, input_shape=[None, None, 3])
    c2_output, c3_output, c4_output, c5_output = [
        backbone.get_layer(layer_name).output
        for layer_name in ["block2c_add", "block3c_add", "block5d_add", "block6d_add"]]#block5c_add, block6d_add    
    return keras.Model(
        inputs=[backbone.inputs], outputs=[c2_output, c3_output, c4_output, c5_output]
    )

#D0 for layer_name in ["block2b_add", "block3b_add", "block5c_add", "block6d_add"]]
#B2 for layer_name in ["block2c_add", "block3c_add", "block5d_add", "top_activation"]]
#B3 for layer_name in ["block2c_add", "block3c_add", "block5e_add", "top_activation"]]
#B4 for layer_name in ["block2d_add", "block3d_add", "block5f_add", "block6h_add"]]
#D7 for layer_name in ["block2f_add", "block3g_add", "block5j_add", "block6d_add"]]

In [None]:
def softmax_scale(a,b,c):
    a0 = tf.exp(a)
    a1 = tf.exp(b)
    a2 = tf.exp(c)
    a_sum = a0 + a1 + a2 + 1e-5
    a0 = a0 / a_sum
    a1 = a1 / a_sum
    a2 = a2 / a_sum
    return a0, a1, a2    

In [None]:
def softmax_scale_from_last_ch(A,B,C, index=-1):
    a = A[:, :, :, index:index+1]
    b = B[:, :, :, index:index+1]
    c = C[:, :, :, index:index+1]
    a0 = tf.exp(a)
    a1 = tf.exp(b)
    a2 = tf.exp(c)
    a_sum = a0 + a1 + a2
    a0 = a0 / a_sum
    a1 = a1 / a_sum
    a2 = a2 / a_sum
    return a0, a1, a2    

In [None]:
def Bifeature(c345, filters):    
    a2 = c345[0]
    a3 = c345[1]
    a4 = c345[2]
    a5 = c345[3]
    
    regulizer  = tf.keras.regularizers.L2(l1)
    dil = 2
    group = 2
    a2_0 = Conv2D(filters, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a2)    
    a33 = Conv2D(filters*2, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a3)
    a44 = Conv2D(filters*2, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a4)
    a55 = Conv2D(filters*2, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a5)
    a66 = Conv2D(filters*2, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(a5)
    
    a3_0, a3_1 = tf.split(a33, 2, -1)
    a4_0, a4_1 = tf.split(a44, 2, -1)
    a5_0, a5_1 = tf.split(a55, 2, -1)
    a6_0, a6_1 = tf.split(a66, 2, -1)
    
    a7_1 = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(a6_0)     
    b7 = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(a6_0)    
        
    a6_up = keras.layers.UpSampling2D(2)(a6_1)    
    b5 = keras.layers.Add()([a5_0, a6_up])  
        
    a5_up = keras.layers.UpSampling2D(2)(a5_1)    
    b4 = keras.layers.Add()([a4_0, a5_up])  
    
    b4_up = keras.layers.UpSampling2D(2)(b4)
    b3 = keras.layers.Add()([a3_0, b4_up])  
    
    b3_up = keras.layers.UpSampling2D(2)(b3)
    b2 = keras.layers.Add()([a2_0, b3_up])
    
    b2_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(b2)
    b3_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b3)    
    c3 = keras.layers.Add()([a3_1, b3_1, b2_down])    
    
    c3_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(c3)
    b4_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b4)    
    c4 = keras.layers.Add()([a4_1, b4_1, c3_down])    
    
    c4_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(c4)
    b5_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b5)    
    c5 = keras.layers.Add()([a5_1, b5_1, c4_down])    
    
    return b2, c3, c4, c5    

In [None]:
def BifeaturePyramidNet_dilate(c345, filters):    
    a2 = c345[0]
    a3 = c345[1]
    a4 = c345[2]
    a5 = c345[3]
    
    regulizer  = tf.keras.regularizers.L2(l1)
    dil = 2
    group = 2
    a2_0 = Conv2D(filters, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a2)    
    a33 = Conv2D(filters*2, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a3)
    a44 = Conv2D(filters*2, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a4)
    a55 = Conv2D(filters*2, 3, 1, "same", groups=group, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(a5)
    a66 = Conv2D(filters*2, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(a5)
    
    a3_0, a3_1 = tf.split(a33, 2, -1)
    a4_0, a4_1 = tf.split(a44, 2, -1)
    a5_0, a5_1 = tf.split(a55, 2, -1)
    a6_0, a6_1 = tf.split(a66, 2, -1)
    
    a7_1 = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(a6_0)     
    b7 = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(a6_0)    
    b6 = a6_0
    
    a6_up = keras.layers.UpSampling2D(2)(a6_1)    
    b5 = keras.layers.Add()([a5_0, a6_up])  
        
    a5_up = keras.layers.UpSampling2D(2)(a5_1)    
    b4 = keras.layers.Add()([a4_0, a5_up])  
    
    b4_up = keras.layers.UpSampling2D(2)(b4)
    b3 = keras.layers.Add()([a3_0, b4_up])  
    
    b3_up = keras.layers.UpSampling2D(2)(b3)
    b2 = keras.layers.Add()([a2_0, b3_up])
    
    b2_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(b2)
    b3_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b3)    
    c3 = keras.layers.Add()([a3_1, b3_1, b2_down])    
    
    c3_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(c3)
    b4_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b4)    
    c4 = keras.layers.Add()([a4_1, b4_1, c3_down])    
    
    c4_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(c4)
    b5_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b5)    
    c5 = keras.layers.Add()([a5_1, b5_1, c4_down])    
    
    c5_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(c5)
    b6_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b6)    
    c6 = keras.layers.Add()([a6_1, b6_1, c5_down])
    
    c6_down = Conv2D(filters, 3, 2, "same", groups=group, activation=activation, kernel_regularizer=regulizer)(c6)
    b7_1 = Conv2D(filters, 3, 1, "same", groups=1, activation=activation, dilation_rate=dil, kernel_regularizer=regulizer)(b7)
    c7 = keras.layers.Add()([a7_1, b7_1, c6_down])
    return a2 + b2, a3 + c3, a4 + c4, a5 + c5, c6, c7

In [None]:
2**level_start

In [None]:
def get_heatmap(nets, img_h, img_w, ch, alphas):
    #p234567
    is_shuffle_pixel = True
    ch = ch + 1 + 2 + 2 #(alpha, displace_rel, displace_global)
    if is_shuffle_pixel:
        ch *= 2
    heatmap_0 = nets[0][:, :, :, :ch]
    heatmap_1 = nets[1][:, :, :, :ch]
    heatmap_2 = nets[2][:, :, :, :ch]
    heatmap_3 = nets[3][:, :, :, :ch]
    
    if True:
        visible_0 = nets[0][:, :, :, ch+1:ch+3]
        visible_1 = nets[1][:, :, :, ch+1:ch+3]
        visible_2 = nets[2][:, :, :, ch+1:ch+3]
        visible_3 = nets[3][:, :, :, ch+1:ch+3]
        visible_0 = tf.image.resize(visible_0, [img_h, img_w])
        visible_1 = tf.image.resize(visible_1, [img_h, img_w])
        visible_2 = tf.image.resize(visible_2, [img_h, img_w])
        visible_3 = tf.image.resize(visible_3, [img_h, img_w])
        visible = (visible_0 + visible_1 + visible_2 + visible_3)/4
    
    if is_shuffle_pixel:
        heatmap_0 = shuffle_pixel_lr(heatmap_0, img_h//4, img_w//4, ch)        
        heatmap_1 = shuffle_pixel_lr(heatmap_1, img_h//8, img_w//8, ch)
        heatmap_2 = shuffle_pixel_lr(heatmap_2, img_h//16, img_w//16, ch)
        heatmap_3 = shuffle_pixel_lr(heatmap_3, img_h//32, img_w//32, ch)
    
    heatmap_fullsize_0 = tf.image.resize(heatmap_0, [img_h, img_w])
    heatmap_fullsize_1 = tf.image.resize(heatmap_1, [img_h, img_w])
    heatmap_fullsize_2 = tf.image.resize(heatmap_2, [img_h, img_w])
    heatmap_fullsize_3 = tf.image.resize(heatmap_3, [img_h, img_w])
  
    a0, a1, a2 = softmax_scale_from_last_ch(heatmap_fullsize_0, heatmap_fullsize_1, heatmap_fullsize_2, -5)                
    heatmap = a0 * heatmap_fullsize_0 + a1 * heatmap_fullsize_1 + a2 * heatmap_fullsize_2 + .2 * heatmap_fullsize_3
    #heatmap = heatmap_fullsize_0 + heatmap_fullsize_1 + heatmap_fullsize_2 + heatmap_fullsize_3
        
    heatmap_kp = heatmap[:, :, :, :-5]
    displace_kp = tf.nn.sigmoid(heatmap[:, :, :, -4:], name='displace') - 0.5            
    visible = tf.concat((heatmap_kp[:, :, :, :1], visible), -1)
            
    heatmap_displace = tf.concat((heatmap_kp, visible, displace_kp), -1)        
    return heatmap_displace

In [None]:
def add_position_encoding(nets):
    list_net = []
    for net in nets:
        #net_h = tf.shape(net)[1]
        #net_w = tf.shape(net)[2]
        #coord_xy = coordinate_map_norm(net_h, net_w)#(h, w, 2)
        net_emb = add_map(net)
        #net_emb = tf.concat((net[:, :, :, :2] + tf.expand_dims(coord_xy), net[:, :, :, 2:]), -1)
        list_net.append(net_emb)
    return list_net

In [None]:
def createNet(num_classes, num_keypoints, num_keypoint_classes, anchor_k, is_train=False, is_backbone_train=True):    
    inputs = Input(shape=(None, None, 3))            
    prior_probability = tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
    kernel_init = tf.initializers.he_normal()
    
    backbone = get_backbone()
    backbone.trainable = is_backbone_train
    nets_3 = backbone(inputs, training=is_train)
    #nets_3 = add_position_encoding(nets_3)
    ch = 256
    p34567 = Bifeature(nets_3, ch)
    p234567 = BifeaturePyramidNet_dilate(p34567, ch)
    #p34567 = BifeaturePyramidNet(nets_3, ch)
    img_h = tf.shape(inputs)[1]
    img_w = tf.shape(inputs)[2]
    
    cls_outputs = []
    box_outputs = []
    
    kernel_init = tf.initializers.he_normal()
    regulizer = tf.keras.regularizers.L2(l1)
    
    filters = 4 + 1 + num_classes# + num_keypoints * (2 + num_keypoint_classes)
    print('filters', filters)
    
    #conv_inter = keras.layers.Conv2D(ch, 3, activation=activation, padding="same", kernel_regularizer=regulizer, name='head_inter_0')
    conv_h0 = keras.layers.Conv2D(anchor_k * filters, 3, padding="same", name='head_0', kernel_initializer=kernel_init)   
    conv_h1 = keras.layers.Conv2D(anchor_k * filters, 3, padding="same", name='head_1', kernel_initializer=kernel_init)   
    conv_h2 = keras.layers.Conv2D(anchor_k * filters, 3, padding="same", name='head_2', kernel_initializer=kernel_init)
    conv_h3 = keras.layers.Conv2D(anchor_k * filters, 3, padding="same", name='head_3', kernel_initializer=kernel_init)
    
    conv_kernels = [conv_h0, conv_h0, conv_h0, conv_h1, conv_h2, conv_h3]
    
    drop = keras.layers.Dropout(0.1)
    N = tf.shape(nets_3[0])[0]
    
    cbox_outputs = []    
    alphas = []
    for i in range(2, len(p234567)):            
        feature = p234567[i]
        feature = tf.concat((feature[:,:,:,:60], drop(feature[:,:,:,60:])), -1)
        conv_kernel = conv_kernels[i]
        net_out = conv_kernel(feature)
        cbox_out = tf.reshape(net_out, [N, -1, filters])                
        cbox_outputs.append(cbox_out)
    
    cbox_all = tf.concat(cbox_outputs, axis=1)
    heatmap_displace = get_heatmap(p234567, img_h, img_w, num_keypoints + 1, alphas)    
    
    outputs = {'detect':cbox_all, 'heatmap':heatmap_displace, 'heatmap_coord':heatmap_displace}        
    return keras.Model(inputs=inputs, outputs=outputs)

In [None]:
class BoxLoss(tf.losses.Loss):

    def __init__(self, delta):
        super(BoxLoss, self).__init__(reduction="none", name="BoxLoss")
        self._delta = delta

    def call(self, y_true, y_pred):    
        
        difference = y_true - y_pred
        absolute_difference = tf.abs(difference)        
        loss = tf.where(
            tf.less(absolute_difference, self._delta),
            0.5 * (difference ** 2),
            absolute_difference - 0.5,
        )
        loss = tf.where(loss < 0.05, 0.0, loss)#new marginal loss        
        return tf.reduce_sum(loss, axis=-1)

class ClassificationLoss(tf.losses.Loss):

    def __init__(self, alpha, gamma, num_classes):
        super(ClassificationLoss, self).__init__(reduction="none", name="ClassificationLoss")
        self._alpha = alpha
        self._gamma = gamma
        self._num_classes = num_classes
        
        
    def call(self, y_cls, y_pred):
        y_cls_int = tf.cast(y_cls, dtype=tf.int32)
        y_hot = tf.one_hot(y_cls_int, depth=self._num_classes, dtype=tf.float32,)
        
        y_positive = tf.cast(y_cls > 0, tf.float32)#finetune, 1:unknown
        y_positive_identity = tf.cast(y_cls > 1, tf.float32)# 1:unknown
        
        obj_score = tf.identity(y_pred[:, :], name='obj_score')
        #cls_score = y_pred[:, :, 1:1+self._num_classes]
        
        pt = tf.clip_by_value(obj_score, 1e-7, 1.0 - 1e-7)
                
        loss_p = - (1.0 - self._alpha) * tf.pow(1.0 - pt, self._gamma) * y_positive * tf.math.log(pt)        
        loss_f = - self._alpha * tf.pow(pt, self._gamma) * (1 - y_positive) * tf.math.log(1 - pt)
        loss_obj = loss_p + loss_f
          
        if False:
            cls_pt = tf.nn.softmax(cls_score)        
            cls_pt = tf.clip_by_value(cls_pt, 1e-7, 1.0 - 1e-7)
            loss_cls_p = - tf.pow(1.0 - cls_pt, self._gamma) * y_hot * tf.math.log(cls_pt)
            loss_cls_f = - tf.pow(cls_pt, self._gamma) * (1 - y_hot) * tf.math.log(1 - cls_pt)
            loss_cls = tf.reduce_sum(loss_cls_p + loss_cls_f, axis=-1)        
            is_various_cls_exist = tf.cast(tf.math.reduce_std(y_cls) > 0, tf.float32)                
        #loss_cls = is_various_cls_exist * y_positive * loss_cls
                        
        #normalizer = tf.reduce_sum(y_positive_identity, axis=-1)
        #loss_cls = tf.math.divide_no_nan(tf.reduce_sum(loss_cls, axis=-1), normalizer)                        
        
        loss = loss_obj# + .1*loss_cls#when not stable                
        return loss   

In [None]:
class KeypointLoss(tf.losses.Loss):
    """Implements Smooth L1 loss"""

    def __init__(self, delta, gamma, num_classes):
        super(KeypointLoss, self).__init__(reduction="none", name="KeypointLoss")
        self._delta = delta
        self._gamma = gamma
        self._num_classes = num_classes
        
    def get_xy_loss(self, y_true, y_pred):        
        difference = y_true - y_pred
        absolute_difference = tf.abs(difference)        
        loss = tf.where(
            tf.less(absolute_difference, self._delta),
            0.5 * (difference ** 2),
            absolute_difference - 0.5,
        )
        #loss_xy = tf.where(loss < 0.05, 0.0, loss)#new marginal loss        
        loss = tf.reduce_mean(loss, axis=-1)
        return loss
    
    def get_cls_loss(self, y_cls, y_pred):        
        loss = tf.square(y_cls - y_pred)
        return loss    

    def call(self, y_true, y_pred):    
        #y: [m, anchors, NUM_KEYPOINT * (2 + classes)]        
        gt_x, gt_y, gt_cls = split_gt_keypoint(y_true)        
        h_x, h_y, h_cls, h_kp_cls = split_h_keypoint(y_pred)
        
        gt_xy = tf.stack((gt_x, gt_y), -1)
        h_xy = tf.stack((h_x, h_y), -1) 
        m = tf.shape(y_true)[0]
        #h_cls_score = tf.reshape(h_cls, [m, -1, NUM_KEYPOINT, NUM_KEYPOINT_CLASS])
        
        loss_cls = self.get_cls_loss(gt_cls, h_cls)#(m, anchor)
        loss_xy = self.get_xy_loss(gt_xy, h_xy)#(m, anchor, NUM_KEYPOINT)        
           
        is_exist_kp_label = tf.cast(tf.reduce_any(gt_cls > 0, -1, True), tf.float32)
        
        loss_xy = gt_cls * loss_xy 
        loss_cls = loss_cls * is_exist_kp_label
        loss = tf.reduce_mean(loss_xy + 0.1 * loss_cls, axis=-1) #(m, anchor)         
        return loss

In [None]:
NUM_KEYPOINT, NUM_KEYPOINT_CLASS

In [None]:
class NetLoss(tf.losses.Loss):
    """Wrapper to combine both the losses"""

    def __init__(self, num_classes=80, num_keypoint_classes=3, alpha=0.3, gamma=3.0, delta=1.0):#alpha=0.25
        super(NetLoss, self).__init__(reduction="auto", name="NetLoss")
        self._clf_loss = ClassificationLoss(alpha, gamma, num_classes)
        self._box_loss = BoxLoss(delta)        
        self._keypoint_loss = KeypointLoss(delta, gamma, num_keypoint_classes)                

    def call(self, y_true, y_pred):
        
        y_box, y_cls, y_keypoint = split_gt(y_true)
        h_box, h_obj, h_cls, h_keypoint = split_hyperthesis(y_pred)
        gt_kp_x, gt_kp_y, gt_kp_cls = split_gt_keypoint(y_true)
        
        positive_mask = tf.greater(y_cls, 0.0)
        ignore_mask = tf.less(y_cls, 0.0)
        
        clf_loss = self._clf_loss(y_cls, h_obj)
        box_loss = self._box_loss(y_box, h_box) 
        #keypoint_loss = self._keypoint_loss(y_keypoint, h_keypoint) 
        
        kp_cls_sum_per_anchor = 1 + tf.reduce_sum(gt_kp_cls, -1)
        weight_kp = kp_cls_sum_per_anchor / tf.reduce_max(kp_cls_sum_per_anchor, -1, True)
        
        clf_loss = tf.where(ignore_mask, 0.0, clf_loss)                
        box_loss = tf.where(positive_mask, box_loss, 0.0)
        #keypoint_loss = tf.where(positive_mask, keypoint_loss, 0.0)
        loss = clf_loss + 1 * box_loss# + 0.0000000 * keypoint_loss)
        
        positive_mask = tf.cast(positive_mask, tf.float32)        
        normalizer = tf.reduce_sum(positive_mask, axis=-1)
        normalizer = tf.sqrt(normalizer)
        loss = tf.math.divide_no_nan(tf.reduce_sum(loss, axis=-1), normalizer)        
        return loss 

In [None]:
def focal_loss(y_cls, score, class_k, gamma):
    y_cls_int = tf.cast(y_cls, dtype=tf.int32)
    y_hot = tf.one_hot(y_cls_int, depth=class_k, dtype=tf.float32)
        
    cls_pt = tf.nn.softmax(score)        
    cls_pt = tf.clip_by_value(cls_pt, 1e-7, 1.0 - 1e-7)
    loss_cls_p = - tf.pow(1.0 - cls_pt, gamma) * y_hot * tf.math.log(cls_pt)
    loss_cls_f = - tf.pow(cls_pt, gamma) * (1 - y_hot) * tf.math.log(1 - cls_pt)
    loss_cls = tf.reduce_sum(loss_cls_p + loss_cls_f, axis=-1) 
    return loss_cls

In [None]:
a_w = 7
a = tf.reshape(tf.range(a_w*a_w, dtype=tf.float32), [1, a_w, a_w, 1])
a = tf.where(tf.logical_and(a>10, a<14), a*0+1, 0)
filters = tf.ones((5, 5, 1), tf.float32)
a_dil = tf.nn.dilation2d(a, filters, [1,1,1,1], 'SAME', 'NHWC', dilations=(1,1,1,1))
a_ero = tf.nn.erosion2d(a, filters, [1,1,1,1], 'SAME', 'NHWC', dilations=(1,1,1,1))

In [None]:
a[0,:,:,0]

In [None]:
a_dil[0,:,:,0]# - a[0,:,:,0] > 1

In [None]:
a_ero[0,:,:,0]

In [None]:
def get_displace_loss(gt, h):            
    # gt : (m, h, w, 2) float32
    # h :  (m, h, w, 2) sigmoid - 0.5             
    diff = gt - h
    loss = tf.square(diff) + tf.abs(diff)# * tf.cast(tf.abs(gt)>0.01, tf.float32)
    loss = tf.boolean_mask(loss, tf.abs(gt) > 0.001)
    #loss = tf.reduce_sum(loss, -1)
    return loss

In [None]:
class HeatmapLoss(tf.losses.Loss):
    """Wrapper to combine both the losses"""

    def __init__(self, num_classes, alpha=0.25, gamma=3.0, delta=1.0):#alpha=0.25
        super(HeatmapLoss, self).__init__(reduction="auto", name="HeatmapLoss")
        self._delta = delta
        self._gamma = gamma
        self._num_classes = num_classes        
        
    def focal_loss(self, y_cls, score, class_k, gamma, alpha=0.5):
        #눈코입 라벨이 없으면 눈코입 히트맵은 살려놓자        
        y_cls_int = tf.cast(y_cls, dtype=tf.int32)
        y_hot = tf.one_hot(y_cls_int, depth=class_k, dtype=tf.float32)
        
        k_cls = tf.reduce_sum(y_hot[:,:,:,1:], [1, 2], True)#(m,1,1,k)
        k_cls_norm = k_cls / (0.01 + tf.reduce_max(k_cls, -1, True))
        k_cls_norm = tf.square(k_cls_norm)
        
        #cls 가 없는 채널은 손실을 0.x 적게 적용 > 손실 없음
        weight_cls = tf.cast(k_cls_norm, tf.float32)
        weight_cls = tf.concat((tf.reduce_min(weight_cls, -1, True), weight_cls), -1)
        
        cls_pt = tf.nn.softmax(score)        
        cls_pt = tf.clip_by_value(cls_pt, 1e-7, 1.0 - 1e-7)
        loss_cls_p = - alpha * tf.pow(1.0 - cls_pt, gamma) * y_hot * tf.math.log(cls_pt)
        loss_cls_f = - (1 - alpha) * tf.pow(cls_pt, gamma) * (1 - y_hot) * tf.math.log(1 - cls_pt)
        loss_cls = tf.reduce_sum(loss_cls_p + loss_cls_f, axis=-1) 
                
        loss_cls_exist_cls = tf.reduce_sum(weight_cls * (loss_cls_p + loss_cls_f), axis=-1)
        return loss_cls, loss_cls_exist_cls

    def call(self, y_true, y_pred):
        #(m, h, w, m_classes)
        
        y_heatmap, y_visible, y_displace = split_gt_heatmap_displace(y_true)
        h_heatmap, h_visible, h_displace = split_heatmap_displace(y_pred)        
        y_positive = tf.cast(y_heatmap > 0, tf.float32)
        y_ignore = tf.cast(y_heatmap < 0, tf.float32)
                
        #[1,32,32,18] vs. [1,32,32,7,18] mul        
        loss, loss_weighted = self.focal_loss(y_heatmap, h_heatmap, self._num_classes, self._gamma)
        loss_visible, loss_visible_w = self.focal_loss(y_visible, h_visible, 3, 3.0)        
        loss_visible = tf.where(y_visible < 0, 0.0, loss_visible)
        loss_visible = tf.where(y_visible < 1, 0.001 * loss_visible, loss_visible)
                
        filters = tf.ones((70, 70, 1), tf.float32)
        y_positive_dil = tf.nn.dilation2d(tf.expand_dims(y_positive, -1), filters, [1,1,1,1], 'SAME', 'NHWC', dilations=(1,1,1,1))
        y_positive_dil = tf.cast(y_positive_dil > 0, tf.float32)
        y_positive_dil = tf.squeeze(y_positive_dil, -1)
        area_near = y_positive_dil
        area_far = 1 - y_positive_dil        
        w_near = 2/3
                
        loss_kp = loss * tf.cast(y_heatmap > 0, tf.float32)        
        loss_bg = (1 - y_ignore)  * tf.cast(y_heatmap < 1, tf.float32) * loss_weighted
        loss_bg = w_near * area_near * loss_bg + (1 - w_near) * area_far * loss_bg
        loss_displace = get_displace_loss(y_displace, h_displace)        
        
        #오토바이 남녀 발이 보이는데 kp 표시가 없음        
        loss_heatmap = tf.reduce_mean(loss_kp, [1,2]) + 1e-2 * tf.reduce_mean(loss_bg, [1,2]) 
        loss = tf.reduce_mean(loss_heatmap) + 10 * tf.reduce_mean(loss_displace) + tf.reduce_mean(loss_visible)
        return loss 

In [None]:
def crop_image_by_box(images, img_h, img_w, boxes, crop_size_hw):# single batch only
    
    x1 = boxes[:, 0] / img_w
    y1 = boxes[:, 1] / img_h
    x2 = boxes[:, 2] / img_w
    y2 = boxes[:, 3] / img_h
    boxes_norm = tf.stack((y1, x1, y2, x2), -1)    #[num_boxes, 4], normalized coordinates ` [y1, x1, y2, x2]
    box_indices = tf.zeros_like(boxes_norm[:, 0], tf.int32)
    
    crop_image = tf.image.crop_and_resize(images, boxes_norm, box_indices, crop_size_hw)
    return crop_image

In [None]:
def get_kp_from_heatmap(crop_heatmap_score, crop_size):    
    #(m==1, h, w, ch+1)
    cls_idx = tf.range(NUM_KEYPOINT+1)    
    heamap_logit = tf.nn.softmax(crop_heatmap_score)
    heamap_logit_kp = heamap_logit[:,:,:,1:]
    
    #visible_1 = tf.reduce_any(heamap_logit_kp > 0.5, [1, 2])
    #visible_2 = tf.reduce_any(heamap_logit_kp > 0.9, [1, 2])
    
    thresh_k = crop_size[0] * 0.05
    k_1 = tf.reduce_sum(tf.cast(heamap_logit_kp > 0.5, tf.float32), [1, 2])
    k_2 = tf.reduce_sum(tf.cast(heamap_logit_kp > 0.9, tf.float32), [1, 2])
    visible_1 = k_1 > thresh_k
    visible_2 = k_2 > thresh_k
    
    visible = tf.cast(visible_1, tf.float32) + tf.cast(visible_2, tf.float32)
    
    crop_heatmap_score = crop_heatmap_score[:,:,:,1:]
    crop_heatmap_exp = tf.exp(crop_heatmap_score)
        
    crop_heatmap_norm = crop_heatmap_exp / (0 + tf.reduce_sum(crop_heatmap_exp, [1, 2], True))
    coord_xy = coordinate_map_norm(crop_size[0], crop_size[1])#(h, w, 2)
    coord_xy_5d = tf.reshape(coord_xy, [1, crop_size[0], crop_size[1], 1, 2])
    crop_heatmap_norm_5d = tf.expand_dims(crop_heatmap_norm, -1)
    #(1, h, w, 1, 2) * (None, 100, 200, 18, 1) = (None, h, w, 18, 2), sum() > (None, 18, 2)
    kp_xy = tf.reduce_sum(coord_xy_5d * crop_heatmap_norm_5d, [1, 2])
    return kp_xy, visible

In [None]:
def get_kp_from_heatmap_displace(crop_heatmap_score, displace, crop_size):
    
    #(m==1, h, w, ch+1)
    cls_idx = tf.range(NUM_KEYPOINT+1)    
    heamap_logit = tf.nn.softmax(crop_heatmap_score)
    heamap_logit_kp = heamap_logit[:,:,:,1:]
    
    #visible_1 = tf.reduce_any(heamap_logit_kp > 0.5, [1, 2])
    #visible_2 = tf.reduce_any(heamap_logit_kp > 0.9, [1, 2])
    
    thresh_k = crop_size[0] * 0.1
    k_1 = tf.reduce_sum(tf.cast(heamap_logit_kp > 0.5, tf.float32), [1, 2])
    k_2 = tf.reduce_sum(tf.cast(heamap_logit_kp > 0.9, tf.float32), [1, 2])
    visible_1 = k_1 > thresh_k
    visible_2 = k_2 > thresh_k
    
    visible = tf.cast(visible_1, tf.float32) + tf.cast(visible_2, tf.float32)    
    
    positive_kp_mask = tf.cast(tf.argmax(crop_heatmap_score, -1) > 0, tf.float32)
    crop_heatmap_score = crop_heatmap_score[:,:,:,1:]    
        
    coord_xy = coordinate_map_norm(crop_size[0], crop_size[1])#(h, w, 2), [0, 1]
    coord_xy_4d = tf.reshape(coord_xy, [1, crop_size[0], crop_size[1], 2])
    coord_xy_5d = tf.reshape(coord_xy, [1, crop_size[0], crop_size[1], 1, 2])
    coord_xy_uv = coord_xy - 0.5
    
    #gt : keypoints_xy - roots
    #distance와 kp의 거리
    #print('coord_xy_uv', coord_xy_uv)#(100, 200, 2)
    #print('displace', displace)#(None, 100, 200, 2)
    displace_rel = displace[:, :, :, :2]
    displace_global = displace[:, :, :, 2:4]
    distance_error = tf.sqrt(tf.reduce_sum(tf.square(tf.expand_dims(coord_xy_uv, 0) - displace_rel), -1, True))    
    distance_show = tf.cast(distance_error*255, tf.uint8)
    distance_show = tf.concat((distance_show,distance_show, distance_show), -1)
    
    #선별 (m,h,w,k) * (m,h,w,1)
    
    #positive kp내부에서 k개의 최소 에러와 최대 에러, 평균 에러
    if False:
        # p-kp 내부의 distance_error 의 평균 이하인 스코어를 선택
        distance_error_on_positive_kp = tf.expand_dims(positive_kp_mask, -1) * distance_error
        crop_heatmap_score_on_positive_kp = tf.expand_dims(positive_kp_mask, -1) * crop_heatmap_score
        distance_error_mean = tf.reduce_mean(distance_error, [1,2], True)
        crop_heatmap_score_mean = tf.reduce_mean(crop_heatmap_score, [1,2], True)        
        cond = tf.logical_and(crop_heatmap_score > crop_heatmap_score_mean, distance_error<distance_error_mean)
        crop_heatmap_score = tf.where(cond, crop_heatmap_score, crop_heatmap_score*0.1)#todo test
        #crop_heatmap_exp_group = tf.where(distance_error < distance_error_mean, tf.exp(crop_heatmap_score), 0)#todo test
        
    #crop_heatmap_exp_group = crop_heatmap_exp * tf.cast(valid_group, tf.float32)
    crop_heatmap_exp_group = tf.exp(crop_heatmap_score) * tf.pow(1 - distance_error, 5)        
    
    crop_heatmap_norm = crop_heatmap_exp_group / (0 + tf.reduce_sum(crop_heatmap_exp_group, [1, 2], True))
    crop_heatmap_norm_5d = tf.expand_dims(crop_heatmap_norm, -1)
    
    #(1, h, w, 1, 2) * (None, 100, 200, 18, 1) = (None, h, w, 18, 2), sum() > (None, 18, 2)
    kp_xy = tf.reduce_sum(coord_xy_5d * crop_heatmap_norm_5d, [1, 2])
    
    #실제 좌표맵과 kp의 거리 , (1, h, w, 2 ) - (m, h, w, 2) = (m, h, w, 2)
    
    return kp_xy, visible, distance_show

In [None]:
def get_kp_from_heatmap_displacex2(crop_heatmap_score, displace, crop_size, crop_coord_map):
    
    #(m==1, h, w, ch+1)
    cls_idx = tf.range(NUM_KEYPOINT+1)    
    heamap_logit = tf.nn.softmax(crop_heatmap_score)
    heamap_logit_kp = heamap_logit[:,:,:,1:]
    
    thresh_k = crop_size[0] * 0.03
    k_1 = tf.reduce_sum(tf.cast(heamap_logit_kp > 0.4, tf.float32), [1, 2])
    k_2 = tf.reduce_sum(tf.cast(heamap_logit_kp > 0.9, tf.float32), [1, 2])
    visible_1 = k_1 > thresh_k
    visible_2 = k_2 > thresh_k
    
    visible = tf.cast(visible_1, tf.float32) + tf.cast(visible_2, tf.float32)    
    
    positive_kp_mask = tf.cast(tf.argmax(crop_heatmap_score, -1) > 0, tf.float32)
    crop_heatmap_score = crop_heatmap_score[:,:,:,1:] 
        
    coord_xy = coordinate_map_norm(crop_size[0], crop_size[1])#(h, w, 2), [0, 1]
    coord_xy_4d = tf.reshape(coord_xy, [1, crop_size[0], crop_size[1], 2])
    coord_xy_5d = tf.reshape(coord_xy, [1, crop_size[0], crop_size[1], 1, 2])
    coord_xy_uv = coord_xy - 0.5
    
    #gt : keypoints_xy - roots
    #distance와 kp의 거리
    #print('coord_xy_uv', coord_xy_uv)#(100, 200, 2)
    #print('displace', displace)#(None, 100, 200, 2)
    displace_rel = displace[:, :, :, :2]
    displace_global = displace[:, :, :, 2:4]
    
    #print('crop_coord_map', crop_coord_map)#(m, 100, 100, 2)
    # h_displace_global = kp_center - box_center 
    #displace_kp_to_root_norm_global = (keypoints_xy - roots) / np.reshape([x_w, x_h], [1, 1, 2]) #(n, k, 2)
    
    distance_rel_error = tf.sqrt(tf.reduce_sum(tf.square(tf.expand_dims(coord_xy_uv, 0) - displace_rel), -1, True))    
    displace_global_gt = crop_coord_map - tf.reduce_mean(crop_coord_map, [1, 2], True)
    displace_global_error = tf.sqrt(tf.reduce_sum(tf.square(displace_global_gt - displace_global), -1, True))
  
    displace_rel_kp = tf.expand_dims(positive_kp_mask, -1) * displace_rel
    displace_rel_error_self = tf.sqrt(tf.reduce_sum(tf.square(displace_rel_kp - tf.reduce_mean(displace_rel_kp, [1,2], True)), -1, True))
    
    #선별 (m,h,w,k) * (m,h,w,1)    
    #crop_heatmap_exp_group = crop_heatmap_exp * tf.cast(valid_group, tf.float32)    
    displace_error = distance_rel_error + displace_global_error
    w = tf.pow(tf.maximum(0.0, 1 - displace_error), 2)    
    
    crop_heatmap_exp_group = heamap_logit_kp * w * tf.expand_dims(positive_kp_mask, -1)
    crop_heatmap_exp_group = 1e-7 + tf.where(crop_heatmap_exp_group >= 0.9 * tf.reduce_max(crop_heatmap_exp_group, [1, 2], True), crop_heatmap_exp_group, crop_heatmap_exp_group*0.00)
    
    crop_heatmap_norm = crop_heatmap_exp_group / tf.reduce_sum(crop_heatmap_exp_group, [1, 2], True)
    crop_heatmap_norm_5d = tf.expand_dims(crop_heatmap_norm, -1)
    
    #(1, h, w, 1, 2) * (None, 100, 200, 18, 1) = (None, h, w, 18, 2), sum() > (None, 18, 2)
    kp_xy = tf.reduce_sum(coord_xy_5d * crop_heatmap_norm_5d, [1, 2])
    
    #실제 좌표맵과 kp의 거리 , (1, h, w, 2 ) - (m, h, w, 2) = (m, h, w, 2)
    
    return kp_xy, visible

In [None]:
coordinate_map_norm(4,4) - tf.reduce_mean(coordinate_map_norm(4,4), [0, 1], True)

In [None]:
np.power(1 - 0.1, 2), np.power(1 - 0.1 - 0.1, 2)

In [None]:
coordinate_map_np(9, 19).mean(axis=0).mean(axis=0)

In [None]:
np.power(4, 2), np.power(0.5, 2), np.power(0.5, 3), np.power(0.5, 4), np.power(0.5, 5)

In [None]:
def nms_kp_dist(gt, h_heatmap):
    
    _num_classes = NUM_KEYPOINT + 1
    _anchor_ch = anchor_ch
    _anchor_box = AnchorBox()
    
    m = tf.shape(gt)[0]
    image_h = padded_image_shape[0]
    image_w = padded_image_shape[1]
    
    anchor_boxes = _anchor_box.get_anchors(image_h, image_w)

    box_predictions, objectness, keypoint_gt = split_gt(gt)
    x, y, keypoints_cls = split_gt_keypoint(keypoint_gt)

    keypoints_xy = tf.stack((x, y), -1)
    keypoints_xy = tf.reshape(keypoints_xy, (m, -1, NUM_KEYPOINT * 2))

    boxes = _decode_box_predictions(anchor_boxes[None, ...], box_predictions)
    keypoints_xy = _decode_keypoint_predictions(anchor_boxes[None, ...], keypoints_xy)
    boxes_2d = tf.reshape(boxes, [-1, 4])    
    scores = tf.reshape(objectness, [-1, 1])

    cls = objectness * 0 + 1
    cls = tf.reshape(cls, [-1, 1])

    keypoints_xy_3d = tf.reshape(keypoints_xy, [-1, NUM_KEYPOINT, 2])
    keypoints_cls_3d = tf.reshape(keypoints_cls, [-1, NUM_KEYPOINT, 1])    
    keypoints_3d = tf.concat((keypoints_xy_3d, keypoints_cls_3d), -1)
    keypoints_2d = tf.reshape(keypoints_3d, [-1, NUM_KEYPOINT * 3])

    ccbox = tf.concat((cls, scores, boxes_2d, keypoints_2d), -1)
    ccbox_check = tf.concat((boxes_2d, cls, keypoints_2d), -1)

    confidence_threshold=0.5
    nms_iou_threshold=0.2
    max_detections_per_class=100
    max_detections=150

    selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(    
        ccbox[:, 2:2+4],
        ccbox[:, 1],        
        max_detections,
        nms_iou_threshold,
        confidence_threshold,        
    )
        
    positive_box = tf.gather(ccbox_check, selected_indices)
    
    positive_keypoints = positive_box[:, 5:]
    positive_keypoints_x = positive_keypoints[:, 0::3]
    positive_keypoints_y = positive_keypoints[:, 1::3]
    positive_keypoints_cls = positive_keypoints[:, 2::3]
    positive_keypoints_xy = tf.stack((positive_keypoints_x, positive_keypoints_y), -1)
    
    cls_sum_max = tf.reduce_max(tf.reduce_sum(positive_keypoints_cls, -1))
    cond_select = tf.reduce_sum(positive_keypoints_cls, -1) > cls_sum_max / 2
    positive_box = tf.gather_nd(ccbox_check, tf.where(cond_select))#todo fix. without, Nan  
    
    positive_kp = positive_box[:, 5:]
    positive_kp_x = positive_kp[:, 0::3]
    positive_kp_y = positive_kp[:, 1::3]
    positive_kp_cls = positive_kp[:, 2::3]
    positive_kp_xy = tf.stack((positive_kp_x, positive_kp_y), -1)
        
    x1 = positive_box[:, 0] / image_w
    y1 = positive_box[:, 1] / image_h
    x2 = positive_box[:, 2] / image_w
    y2 = positive_box[:, 3] / image_h
    boxes = tf.stack((y1, x1, y2, x2), -1)    #[num_boxes, 4], normalized coordinates ` [y1, x1, y2, x2]
    box_indices = tf.zeros_like(boxes[:, 0], tf.int32)
    crop_size = [100, 100]#h, w        
    crop_heatmap_score = tf.image.crop_and_resize(h_heatmap, boxes, box_indices, crop_size)#not None

    kp_xy_norm, visible = get_kp_from_heatmap(crop_heatmap_score, crop_size)#None

    kp_offset = positive_box[:, None, :2]    
    kp_scale = positive_box[:, None, 2:4] - positive_box[:, None, 0:2]
    kp_xy = kp_scale * kp_xy_norm + kp_offset
    positive_kp_xy_norm = (positive_kp_xy - kp_offset) / kp_scale

    #loss_kp_xy = tf.reduce_mean(tf.abs(positive_kp_xy - kp_xy), -1)
    loss_kp_xy_norm = tf.reduce_sum(tf.abs(positive_kp_xy_norm - kp_xy_norm), -1)
    loss_kp_xy = tf.reduce_sum(tf.sqrt(tf.abs(positive_kp_xy - kp_xy)), -1)
    dist = (loss_kp_xy + loss_kp_xy_norm) * tf.cast(positive_kp_cls > 1, tf.float32)

    return tf.reduce_mean(dist)        

In [None]:
class HeatmapCoordLoss(tf.losses.Loss):   
    
    def __init__(self, num_classes, anchor_ch):
        super(HeatmapCoordLoss, self).__init__(reduction="auto", name="HeatmapCoordLoss")
        self._num_classes = num_classes
        self._anchor_ch = anchor_ch
        self._anchor_box = AnchorBox()   

    def call(self, gt, h_heatmap_displace):
       
        m = tf.shape(gt)[0]
        image_h = padded_image_shape[0]
        image_w = padded_image_shape[1]                
        anchor_boxes = self._anchor_box.get_anchors(image_h, image_w)
            
        h_heatmap, h_visible, h_displace = split_heatmap_displace(h_heatmap_displace)
        box_predictions, objectness, keypoint_gt = split_gt(gt)
        x, y, keypoints_cls = split_gt_keypoint(keypoint_gt)
        
        keypoints_xy = tf.stack((x, y), -1)
        keypoints_xy = tf.reshape(keypoints_xy, (m, -1, NUM_KEYPOINT * 2))

        boxes = _decode_box_predictions(anchor_boxes[None, ...], box_predictions)
        keypoints_xy = _decode_keypoint_predictions(anchor_boxes[None, ...], keypoints_xy)
        boxes_2d = tf.reshape(boxes, [-1, 4])    
        scores = tf.reshape(objectness, [-1, 1])
        cls = objectness * 0 + 1
        cls = tf.reshape(cls, [-1, 1])

        keypoints_xy_3d = tf.reshape(keypoints_xy, [-1, NUM_KEYPOINT, 2])
        keypoints_cls_3d = tf.reshape(keypoints_cls, [-1, NUM_KEYPOINT, 1])    
        keypoints_3d = tf.concat((keypoints_xy_3d, keypoints_cls_3d), -1)
        keypoints_2d = tf.reshape(keypoints_3d, [-1, NUM_KEYPOINT * 3])

        ccbox = tf.concat((cls, scores, boxes_2d, keypoints_2d), -1)
        ccbox_check = tf.concat((boxes_2d, cls, keypoints_2d), -1)
        
        confidence_threshold=0.5
        nms_iou_threshold=0.2
        max_detections_per_class=100
        max_detections=150

        selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(    
            ccbox[:, 2:2+4], ccbox[:, 1], max_detections, nms_iou_threshold, confidence_threshold)
        
        positive_box = tf.gather(ccbox_check, selected_indices)
        positive_keypoints = positive_box[:, 5:]        
        positive_keypoints_cls = positive_keypoints[:, 2::3]
                        
        cls_sum_max = tf.reduce_max(tf.reduce_sum(positive_keypoints_cls, -1))
        cond_select = tf.reduce_sum(positive_keypoints_cls, -1) > cls_sum_max / 2
        positive_box = tf.gather_nd(ccbox_check, tf.where(cond_select))#todo fix. without, Nan  
       
        positive_kp = positive_box[:, 5:]
        positive_kp_x = positive_kp[:, 0::3]
        positive_kp_y = positive_kp[:, 1::3]
        positive_kp_cls = positive_kp[:, 2::3]
        positive_kp_xy = tf.stack((positive_kp_x, positive_kp_y), -1)        
        
        coord_map = coordinate_map_norm(image_h, image_w)
        coord_map = tf.expand_dims(coord_map, 0) + tf.zeros_like(h_heatmap[:,:,:,:2])        
        crop_size = [100, 100]#h, w
        
        #h_heatmap_displace_coord = tf.concat((h_heatmap_displace, coord_map), -1)
        crop_heatmap_score = crop_image_by_box(h_heatmap, image_h, image_w, positive_box, crop_size)
        crop_displace = crop_image_by_box(h_displace, image_h, image_w, positive_box, crop_size)
        crop_coord_map = crop_image_by_box(coord_map, image_h, image_w, positive_box, crop_size)
                
        kp_xy_norm, visible = get_kp_from_heatmap_displacex2(crop_heatmap_score, crop_displace, crop_size, crop_coord_map)
                
        kp_offset = positive_box[:, None, :2]    
        kp_scale = positive_box[:, None, 2:4] - positive_box[:, None, 0:2]
        kp_xy = kp_scale * kp_xy_norm + kp_offset
        positive_kp_xy_norm = (positive_kp_xy - kp_offset) / kp_scale
        
        weight_scale =  (1 + tf.reduce_max(kp_scale, -1, True)/image_h)
        loss_weight = tf.reduce_sum(positive_kp_cls, -1, True) + weight_scale
        loss_kp_xy_abs = tf.reduce_mean(tf.abs(positive_kp_xy - kp_xy)/crop_size[0], -1)
        loss_kp_xy_abs = loss_weight * loss_kp_xy_abs * tf.cast(positive_kp_cls > VISIBILITY_OCCLUDED, tf.float32)#박스 내부에 나타난 kp만 학습        
        
        loss_kp_xy_norm = positive_kp_xy_norm - kp_xy_norm
        loss_kp_xy_norm = tf.where(tf.abs(loss_kp_xy_norm) < 0.05, 0.0, loss_kp_xy_norm)#marginal loss        
        loss_kp = tf.reduce_mean(tf.abs(loss_kp_xy_norm) + tf.square(loss_kp_xy_norm), -1)        
        loss_kp = loss_weight * loss_kp * tf.cast(positive_kp_cls > VISIBILITY_OCCLUDED, tf.float32)#박스 내부에 나타난 kp만 학습
        loss_kp = tf.reduce_sum(loss_kp, -1)
        
        # visibility 0 이 나오면 안됨
        positive_kp_cls_4d = tf.reshape(positive_kp_cls, [-1, 1, 1, NUM_KEYPOINT])
        crop_heatmap_logit = tf.nn.softmax(crop_heatmap_score)
        crop_heatmap_logit_no_bg = crop_heatmap_logit[:, :, :, 1:]
        
        loss = tf.reduce_mean(loss_kp) + tf.reduce_mean(loss_kp_xy_abs)
        return loss         

In [None]:
class DisplaceLoss(tf.losses.Loss):   
    
    def __init__(self):
        super(DisplaceLoss, self).__init__(reduction="auto", name="DisplaceLoss")
        pass
    
    def call(self, gt, h):            
        # gt : (m, h, w, 2) float32
        # h :  (m, h, w, 2) sigmoid - 0.5             
        diff = gt - h
        loss = tf.square(diff) + tf.abs(diff)# * tf.cast(tf.abs(gt)>0.01, tf.float32)
        loss = tf.boolean_mask(loss, tf.abs(gt) > 0.01)
        return loss

In [None]:
anchor_ch = 4+1+num_classes+NUM_KEYPOINT*(2+1)
anchor_ch

In [None]:
def recall(y_true, y_pred):    
    y_cls = tf.cast(y_true[:, :, 4], tf.int32)
    y_positive = tf.cast(y_cls > 0, tf.int32)
    y_bg = tf.cast(tf.abs(y_cls)==0, tf.int32)
    h_score = y_pred[:, :, 4]
    h_prob = tf.nn.sigmoid(h_score)    
    h_postive = tf.cast(tf.round(h_prob), tf.int32)
    
    true_positives = tf.cast(tf.logical_and(y_cls > 0, h_postive>0), tf.float32)
    false_negative = y_positive * (1 - h_postive)
                
    tp = tf.reduce_sum(true_positives, axis=1)# + 0.01
    fn = tf.reduce_sum(false_negative, axis=1)
    tp = tf.cast(tp, tf.float32)
    fn = tf.cast(fn, tf.float32)
    
    rec = tp / (tp + fn + 1e-8)
    return rec

def precision(y_true, y_pred):
    
    y_cls_symbol = tf.cast(y_true[:, :, 4], dtype=tf.int32)    
    y_cls_symbol = tf.cast(y_cls_symbol != 0, tf.int32)
    h_obj_prob = tf.nn.sigmoid(y_pred[:, :, 4])
    h_cls_symbol = tf.round(h_obj_prob)    
    h_cls_symbol = tf.cast(h_cls_symbol, tf.int32)
    
    true_positives = y_cls_symbol * h_cls_symbol
    false_positive = (1 - y_cls_symbol) * h_cls_symbol
    
    ones = tf.ones_like(true_positives)
    zeeros = tf.zeros_like(true_positives)
    true_positives = tf.cast(tf.equal(true_positives, ones), tf.float32)
    false_positive = tf.cast(tf.equal(false_positive, ones), tf.float32)
    
    tp = tf.reduce_sum(true_positives, axis=1)# + 0.01
    fp = tf.reduce_sum(false_positive, axis=1)
    tp = tf.cast(tp, tf.float32)
    fp = tf.cast(fp, tf.float32)
    prec = tp / (tp + fp + 1e-8)
    return prec

def acc(y_true, y_pred):    
    y_cls = tf.cast(y_true[:, :, 4], tf.int32)
    y_positive = y_cls > 0    
    h_cls = tf.math.argmax(y_pred[:, :, 5:5+num_classes], -1, output_type=tf.int32)        
    acc = tf.boolean_mask(tf.equal(y_cls, h_cls), y_positive)    
    #acc = tf.equal(y_cls, h_cls)
    return acc

In [None]:
def kp_cls_acc(y_true, y_pred):#keypoint
    y_box, y_cls, y_keypoint = split_gt(y_true)    
    h_box, h_obj, h_cls, h_keypoint = split_hyperthesis(y_pred)
    
    gt_x, gt_y, gt_cls = split_gt_keypoint(y_keypoint)
    h_x, h_y, h_kp_cls_score, h_kp_cls = split_h_keypoint(h_keypoint)

    gt_xy = tf.concat((gt_x, gt_y), -1)
    h_xy = tf.concat((h_x, h_y), -1) 
    m = tf.shape(y_true)[0]
        
    #[1,14160,17] vs. [1,12240,17]    
    gt_cls = tf.cast(gt_cls, tf.int32)
    h_kp_cls = tf.cast(h_kp_cls, tf.int32)
    acc = tf.equal(gt_cls, h_kp_cls)
    #is_exist_kp_label = tf.cast(tf.reduce_any(gt_cls > 0, -1, True), tf.float32)
    cond = tf.logical_and(y_cls > 0, tf.reduce_any(gt_cls > 0, -1))
    acc = tf.boolean_mask(acc, cond)
    return acc   

In [None]:
def heatmap_acc(y_true, y_pred):#keypoint
    
    y_heatmap, heatmap_visible, y_displace = split_gt_heatmap_displace(y_true)
    h_heatmap, h_visible, h_displace = split_heatmap_displace(y_pred)
    
    h_cls = tf.argmax(h_heatmap, -1)    
    h_cls = tf.cast(h_cls, tf.int32)
    y_cls = tf.cast(y_heatmap, tf.int32)
    
    acc = tf.equal(y_cls, h_cls) 
    acc = tf.boolean_mask(acc, y_heatmap > 0)
    return acc

def visible_acc(y_true, y_pred):
    y_heatmap, y_visible, y_displace = split_gt_heatmap_displace(y_true)
    h_heatmap, h_visible, h_displace = split_heatmap_displace(y_pred)
    
    h_cls = tf.argmax(h_visible, -1)    
    h_cls = tf.cast(h_cls, tf.int32)
    y_cls = tf.cast(y_visible, tf.int32)
    
    acc = tf.equal(y_cls, h_cls) 
    acc = tf.boolean_mask(acc, y_visible > 0)
    return acc    

In [None]:
np.set_printoptions(precision=3, linewidth=200)
image_height, image_width = padded_image_shape

img_check = 0
for image, multi_y in val_dataset:
    output_map = multi_y['detect']
    heatmap = multi_y['heatmap']
    print('output_map', output_map.shape)
    cbbox = output_map    
    bbox = cbbox[:, :, :4]
    cls_gt = cbbox[:,:,4]
    img_m, image_height, image_width, image_ch = image.shape
    anchor_feature_size = [(np.ceil(image_height / 2 ** i), np.ceil(image_width / 2 ** i)) 
                           for i in range(level_start, level_end)]
    print('anchor_feature_size', anchor_feature_size)    
    m = len(cbbox)    
    positive_count = np.sum(cls_gt>0)
    print('cbbox', cbbox.shape)
    print('cls_sum',np.sum(cls_gt < 0.0), np.sum(cls_gt == 0.0), 
          np.sum(cls_gt == 1.0), np.sum(cls_gt > 1.0))
    print('cls_mean',np.mean(cls_gt < 0.0), np.mean(cls_gt == 0.0), 
          np.mean(cls_gt == 1.0), np.mean(cls_gt > 0.0))
    print('shape',image.shape, cbbox.shape,'unique', np.unique(cls_gt))
    print('anchor_feature_size', anchor_feature_size)
    offset = 0
    positive_maps = []
    for anchor_feature_size_1 in anchor_feature_size:        
        fm_h, fm_w = anchor_feature_size_1
        fm_h = int(fm_h)
        fm_w = int(fm_w)        
        fm_wh = int(fm_h * fm_w * anchor_k)
        cbbox_anchor = cbbox[:, offset:offset+fm_wh, 4]
        cbbox_anchor = np.reshape(cbbox_anchor, [m, fm_h, fm_w, anchor_k])
        coount_m1 = np.count_nonzero(cbbox_anchor==-1)
        coount_0 = np.count_nonzero(cbbox_anchor==0)
        coount_1 = np.count_nonzero(cbbox_anchor==1)
        coount_1_over = np.count_nonzero(cbbox_anchor>1)
        positive_ratio = np.mean(cbbox_anchor>0)
        positive_maps.append(cbbox_anchor>0)
        print('cbbox_anchor', cbbox_anchor.shape, coount_m1, coount_0, coount_1, coount_1_over, 'ratio', positive_ratio)
        sample_0_cbbox = cbbox_anchor[0]
        sample_0_cbbox_sum = np.max(sample_0_cbbox, -1).astype(np.int)       
      
        offset += fm_wh
        if False:            
            file_name = str(fm_h)+ '_' + str(fm_w)+ '.txt'
            np.savetxt(file_name,sample_0_cbbox_sum, fmt='%d',delimiter='')
    img_check = image
    break

In [None]:
pmap0 = np.array(Image.fromarray(np.max(positive_maps[0][0],-1)).resize((image_width, image_height)))
pmap1 = np.array(Image.fromarray(np.max(positive_maps[1][0],-1)).resize((image_width, image_height)))
pmap2 = np.array(Image.fromarray(np.max(positive_maps[2][0],-1)).resize((image_width, image_height)))
#pmap3 = np.array(Image.fromarray(np.max(positive_maps[3][0],-1)).resize((image_width, image_height)))
#pmap4 = np.array(Image.fromarray(np.max(positive_maps[4][0],-1)).resize((image_width, image_height)))
pmap0 = pmap0.astype(np.uint8)
pmap1 = pmap1.astype(np.uint8)
pmap2 = pmap2.astype(np.uint8)
pmap3 = 0#pmap3.astype(np.uint8)
pmap4 = 0#pmap4.astype(np.uint8)

In [None]:
pmap_with_img = np.array(img_check)[0]#*255
pmap_with_img = pmap_with_img.astype(np.uint8)
pmap_add = np.expand_dims(pmap0+pmap1+pmap2+pmap3+pmap4, -1)
pmap = pmap_add*(255//np.max(pmap_add))
mix_rgb = np.concatenate((pmap, pmap_with_img[:,:,1:]),-1)
plt.figure(figsize=(10,10))
plt.imshow(mix_rgb)
plt.title(str(np.mean(pmap_add)))

In [None]:
def stack_image(img0, img1):
    a = np.array(img0)
    b = np.array(img1)
    return Image.fromarray(np.concatenate((a, b), axis=1))

def stack_image_3(img0, img1, img2):
    a = np.array(img0)
    b = np.array(img1)
    c = np.array(img2)
    return Image.fromarray(np.concatenate((a, b, c), axis=1))

In [None]:
def compare_gt_h(check_dataset, stride=1, is_show_class=False):
    i = 0
    for image, target_dict in check_dataset:        
        i += 1
        if stride > 1 and i%stride!=0:
            continue

        x = np.array(image[0], np.uint8)
        y_detect = target_dict['detect']
        y_heatmap_displace = target_dict['heatmap']
        y_heatmap, heatmap_visible, y_displace = split_gt_heatmap_displace(y_heatmap_displace)
        
        detections_gt = decode_debug(image, y_detect, confidence_threshold=0.15, nms_iou_threshold=0.2)    
        model_out = inference_model.predict(image)  
        detections = model_out['detect']
        heatmap = model_out['heatmap']
        #heatmap, displace = split_heatmap_displace(heatmap_displace)
        #displace = model_out['person_mask']
        y_heatmap = y_heatmap[0]
        heatmap = heatmap[0]        
        #displace = displace[0]
        
        #print('x', x.shape, 'detections', detections.shape, 'heatmap', heatmap.shape, 'displace.max()', np.max(displace))
        
        h = np.array(detections)  
        gt = np.array(detections_gt)
        print(i, 'gt', gt.shape, 'h', h.shape)
        print('y_heatmap', y_heatmap.shape, 'heatmap', heatmap.shape, 'max', np.max(heatmap), 'min', np.min(heatmap))
        
        heatmap_scale = (255//(1+NUM_KEYPOINT))
        if len(np.array(heatmap).shape) < 3:            
            heatmap_rev = (256 - heatmap.astype(np.float)*heatmap_scale).astype(np.uint8)
            heatmap_arr = np.array(heatmap).astype(np.uint8) 
            heatmap_arr = np.stack((heatmap_arr* heatmap_scale, heatmap_rev, np.power(heatmap_arr*5, 2)), -1)
        else:
            heatmap_arr = np.array(heatmap).astype(np.uint8) 
                
        img_heatmap = Image.fromarray(heatmap_arr)
        
        img = Image.fromarray(x)
        img_copy = x.copy()
        img_copy[:, :, 0] = tf.cast(np.array(y_heatmap*heatmap_scale), tf.uint8)
        
        img_gt = Image.fromarray(img_copy)
        draw_box_keypoint(img, h, is_show_class=is_show_class)
        draw_box_keypoint(img_gt, gt, is_show_class=is_show_class)
        draw_box_keypoint(img_heatmap, gt, is_show_class=is_show_class)        
        
        #print('heatmap_arr', heatmap_arr.shape)
        display(stack_image_3(img, img_gt, img_heatmap))    
        #display(stack_image(img, img_gt))    

In [None]:
len(np.array([1,2]).shape)

In [None]:
def load_weight():           
    print('latest_checkpoint', path_weight)
    model.load_weights(path_weight)

In [None]:
num_classes, NUM_KEYPOINT_CLASS

In [None]:
#filters
anchor_ch = 4+1+num_classes+NUM_KEYPOINT*(2+1)
anchor_ch

In [None]:
#with strategy.scope():
optimizer = tf.optimizers.SGD(learning_rate=1e-2, momentum=0.25)
loss_detect = NetLoss(num_classes, NUM_KEYPOINT_CLASS)
loss_heatmap = HeatmapLoss(NUM_KEYPOINT + 1)
loss_heatmap_coord = HeatmapCoordLoss(NUM_KEYPOINT + 1, anchor_ch)
model = createNet(num_classes, NUM_KEYPOINT, 1, anchor_k, is_backbone_train=True)
metric_det = [recall, precision]

losses = {"detect": loss_detect, 'heatmap': loss_heatmap, 'heatmap_coord':loss_heatmap_coord}
metrics = {"detect": metric_det, 'heatmap': [heatmap_acc, visible_acc]}#, 'heatmap_coord':nms_kp_dist} 
loss_weights = {"detect": 0.1, 'heatmap': 1.0, 'heatmap_coord':0.2}

model.compile(loss=losses, optimizer=optimizer, metrics=metrics, loss_weights=loss_weights)
callbacks_list = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=path_weight,
        monitor="loss",
        save_best_only=False,
        save_weights_only=True,
        verbose=0,
        save_freq=200
    )
]

In [None]:
model.summary()#b1:13,906,006, b2:15,431,112, b3t:25,731,444, b4:25M

In [None]:
print(len(list_x_train), len(list_x_test), batch_size)

In [None]:
#with strategy.scope():
load_weight()

In [None]:
out = model.evaluate(train_dataset.take(30))

In [None]:
#3s 116ms/step - loss: 23.5998 - tf_op_layer_concat_loss: 44.9855 - tf_op_layer_concat_3_loss: 0.9294 - tf_op_layer_concat_3_1_loss: 72.6875 - tf_op_layer_concat_recall: 0.6485 - tf_op_layer_concat_precision: 0.8724 - tf_op_layer_concat_3_heatmap_acc: 0.1274 - tf_op_layer_concat_3_visible_acc: 0.1581
out = model.evaluate(val_dataset.take(30))

In [None]:
'''
2s 177ms/step - loss: 0.5565 - tf_op_layer_concat_283_loss: 0.4345 - tf_op_layer_ResizeBilinear_16_loss: 0.0122 - tf_op_layer_concat_283_recall: 1.0000 - tf_op_layer_concat_283_precision: 1.0000 - tf_op_layer_concat_283_kp_L1: 0.7677 - tf_op_layer_concat_283_kp_cls_acc: 0.5468 - tf_op_layer_ResizeBilinear_16_hitmap_acc: 0.9748
4s 182ms/step - loss: 0.1565 - tf_op_layer_concat_51_loss: 0.0050 - tf_op_layer_concat_52_loss: 0.1560 - tf_op_layer_concat_52_1_loss: 20.4920 - tf_op_layer_concat_51_recall: 1.0000 - tf_op_layer_concat_51_precision: 1.0000 - tf_op_layer_concat_51_kp_L1: 4.4291 - tf_op_layer_concat_51_kp_cls_acc: 0.3286 - tf_op_layer_concat_52_heatmap_acc: 0.9500
4s 185ms/step - loss: 0.1053 - tf_op_layer_concat_449_loss: 0.0402 - tf_op_layer_concat_450_loss: 0.1013 - tf_op_layer_concat_450_1_loss: 20.1714 - tf_op_layer_concat_449_recall: 1.0000 - tf_op_layer_concat_449_precision: 1.0000 - tf_op_layer_concat_449_kp_L1: 4.4300 - tf_op_layer_concat_449_kp_cls_acc: 0.3288 - tf_op_layer_concat_450_heatmap_acc: 0.9571
0s 45ms/step - loss: 0.0947 - tf_op_layer_concat_loss: 1.7922e-04 - tf_op_layer_concat_1_loss: 0.5477 - tf_op_layer_concat_1_1_loss: 0.3989 - tf_op_layer_concat_recall: 1.0000 - tf_op_layer_concat_precision: 1.0000 - tf_op_layer_concat_1_heatmap_acc: 0.8923
2s 93ms/step - loss: 0.0963 - tf_op_layer_concat_80_loss: 0.0200 - tf_op_layer_concat_81_loss: 0.4499 - tf_op_layer_concat_81_1_loss: 0.4922 - tf_op_layer_concat_80_recall: 1.0000 - tf_op_layer_concat_80_precision: 1.0000 - tf_op_layer_concat_81_heatmap_acc: 0.7975
3s 95ms/step - loss: 0.2461 - tf_op_layer_concat_343_loss: 0.3998 - tf_op_layer_concat_344_loss: 0.0961 - tf_op_layer_concat_344_1_loss: 0.3665 - tf_op_layer_concat_343_recall: 0.9974 - tf_op_layer_concat_343_precision: 0.9998 - tf_op_layer_concat_344_heatmap_acc: 0.9175
3s 98ms/step - loss: 0.1898 - tf_op_layer_concat_1016_loss: 0.3246 - tf_op_layer_concat_1017_loss: 0.0708 - tf_op_layer_concat_1017_1_loss: 0.1729 - tf_op_layer_concat_1016_recall: 0.9996 - tf_op_layer_concat_1016_precision: 0.9986 - tf_op_layer_concat_1017_heatmap_acc: 0.8858
4s 101ms/step - loss: 0.1934 - tf_op_layer_concat_63_loss: 0.2979 - tf_op_layer_concat_64_loss: 0.0594 - tf_op_layer_concat_64_1_loss: 0.2083 - tf_op_layer_concat_63_recall: 0.9989 - tf_op_layer_concat_63_precision: 0.9991 - tf_op_layer_concat_64_heatmap_acc: 0.9230
7s 145ms/step - loss: 0.1202 - tf_op_layer_concat_272_loss: 0.3086 - tf_op_layer_concat_273_loss: 0.0521 - tf_op_layer_concat_273_1_loss: 0.0745 - tf_op_layer_concat_272_recall: 0.9996 - tf_op_layer_concat_272_precision: 0.9986 - tf_op_layer_concat_273_heatmap_acc: 0.9329 - tf_op_layer_concat_273_visible_acc: 0.8672
8s 160ms/step - loss: 0.1351 - tf_op_layer_concat_52_loss: 0.4764 - tf_op_layer_concat_55_loss: 0.0610 - tf_op_layer_concat_55_1_loss: 0.1054 - tf_op_layer_concat_52_recall: 0.9983 - tf_op_layer_concat_52_precision: 0.9995 - tf_op_layer_concat_55_heatmap_acc: 0.9215 - tf_op_layer_concat_55_visible_acc: 0.8757
'''
epochs = 100000
hist = model.fit(train_dataset, epochs=epochs, callbacks=callbacks_list, verbose=1)

In [None]:
model.save_weights(path_weight)
path_weight

In [None]:
def decode_heatmap(images, predictions, h_heatmap_score, displace,
                      num_classes=num_classes,
                      confidence_threshold=0.5,
                      nms_iou_threshold=0.2,
                      max_detections_per_class=100,
                      max_detections=150,
                      box_variance=[0.1, 0.1, 0.2, 0.2]):
    
    _anchor_box = AnchorBox()   
    m = tf.shape(images)[0]    
    image_shape = tf.cast(tf.shape(images), dtype=tf.float32)
    img_h = image_shape[1]
    img_w = image_shape[2]
    anchor_boxes = _anchor_box.get_anchors(image_shape[1], image_shape[2])   
    h_box, objectness, h_cls, h_keypoint = split_hyperthesis(predictions) 
    
    boxes = _decode_box_predictions(anchor_boxes[None, ...], h_box)     
    boxes_2d = tf.reshape(boxes, [-1, 4])    
    scores = tf.reshape(objectness, [-1, 1])
    cls = objectness * 0 + 1
    cls = tf.reshape(cls, [-1, 1])
    
    ccbox = tf.concat((cls, scores, boxes_2d), -1)
    ccbox_check = tf.concat((boxes_2d, cls), -1)
    
    selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(    
        ccbox[:, 2:2+4],
        ccbox[:, 1],        
        max_detections,
        nms_iou_threshold,
        confidence_threshold,        
    )
    output = tf.gather(ccbox_check, selected_indices)    
    
    x1 = output[:, 0] / img_w
    y1 = output[:, 1] / img_h
    x2 = output[:, 2] / img_w
    y2 = output[:, 3] / img_h
    boxes = tf.stack((y1, x1, y2, x2), -1)    #[num_boxes, 4], normalized coordinates ` [y1, x1, y2, x2]
    box_indices = tf.zeros_like(boxes[:, 0], tf.int32)
    crop_size = [100, 100]#h, w
    crop_image = tf.image.crop_and_resize(images, boxes, box_indices, crop_size)
    crop_heatmap_score = tf.image.crop_and_resize(h_heatmap_score, boxes, box_indices, crop_size)
    crop_displace = tf.image.crop_and_resize(displace, boxes, box_indices, crop_size)    
    
    h_heatmap_cls = tf.argmax(crop_heatmap_score, -1) * 12
    h_heatmap_cls = tf.cast(h_heatmap_cls, tf.float32)    
    
    coord_map = coordinate_map_norm(img_h, img_w)
    coord_map = tf.expand_dims(coord_map, 0) + tf.zeros_like(h_heatmap_score[:,:,:,:2])    
    crop_coord_map = tf.image.crop_and_resize(coord_map, boxes, box_indices, crop_size)

    kp_xy, visible = get_kp_from_heatmap_displacex2(crop_heatmap_score, crop_displace, crop_size, crop_coord_map)
    #kp_xy, visible, valid_group_show = get_kp_from_heatmap_displace(crop_heatmap_score, crop_displace, crop_size)
    
    kp_offset = output[:, None, :2]    
    kp_scale = output[:, None, 2:4] - output[:, None, 0:2]
    kp_xy = kp_scale * kp_xy + kp_offset
    kp_cls = tf.expand_dims(visible, -1)
    kp = tf.concat((kp_xy, kp_cls), -1)
    kp = tf.reshape(kp, [-1, NUM_KEYPOINT * NUM_KEYPOINT_CH])
    output = tf.concat((output, kp), -1)
    
    return output, crop_image

In [None]:
image = tf.keras.Input(shape=[None, None, 3], name="image")
predictions = model(image, training=False)
h_detect = predictions['detect']
h_heatmap_score_displace = predictions['heatmap']
h_heatmap_score, h_visible_score, displace = split_heatmap_displace(h_heatmap_score_displace)
h_heatmap = tf.argmax(h_heatmap_score, -1)
detections, crop_image = decode_heatmap(image, h_detect, h_heatmap_score, displace, confidence_threshold=0.85, nms_iou_threshold=0.15)
model_out = {'detect':detections, 'heatmap':h_heatmap, 'crop_image':crop_image, 'crop_heatmap':crop_image}
inference_model = tf.keras.Model(inputs=image, outputs=model_out)

In [None]:
compare_gt_h(train_dataset, stride=1, is_show_class=False)

In [None]:
compare_gt_h(val_dataset, stride=1, is_show_class=False)

In [None]:
compare_gt_h(val_dataset, stride=1, is_show_class=False)

In [None]:
compare_gt_h(val_dataset, stride=1, is_show_class=False)

In [None]:
compare_gt_h(val_dataset, stride=1, is_show_class=False)

In [None]:
compare_gt_h(val_dataset, stride=1, is_show_class=False)

세그멘테이션 마스크 레이블이 있다면 사람이 겹치는 상황에서 세그멘테이션 내부에서 키포인트 위치 추정를 추정해서 다른 사람의 신체에 혼동없이 위치를 학습시킬 수있다.

i = 0
for image, target_dict in val_dataset:        
    i += 1   
    x = np.array(image[0], np.uint8)
    y_detect = target_dict['detect']
    y_heatmap = target_dict['heatmap']    
    model_out = inference_model.predict(image)  
    crop_img = model_out['crop_image']
    crop_heatmap = model_out['crop_heatmap']
    heatmap = model_out['heatmap']
    y_heatmap = y_heatmap[0]
    heatmap = heatmap[0]
    
    print('crop_img', crop_img.shape, np.max(crop_img))
    print('crop_heatmap', crop_heatmap.shape, np.max(crop_heatmap))
    crop_img_concat = np.concatenate(crop_img, 1)
    
    crop_heatmap_rgb = np.stack((crop_heatmap, crop_heatmap, crop_heatmap), -1)
    crop_heatmap_concat = np.concatenate(crop_heatmap_rgb, 1)
    print('crop_heatmap_concat', crop_heatmap_concat.shape)
    
    crop_img_concat = crop_img_concat.astype(np.uint8)
    crop_heatmap_concat = crop_heatmap_concat.astype(np.uint8)
    crop_img_show = np.concatenate((crop_img_concat, crop_heatmap_concat), 0)
    
    #plt.figure(figsize=(10, 10))
    plt.imshow(crop_img_show)
    plt.show()