In [1]:
import os
import time
import json
import math
from tqdm import tqdm

import cv2
import numpy as np
import tensorflow as tf

# 데이터 전처리

In [2]:
def getUnionBBox(aBB, bBB, ih, iw):
    margin = 10
    return [max(0, min(aBB[0], bBB[0]) - margin),
            max(0, min(aBB[1], bBB[1]) - margin),
            min(iw, max(aBB[2], bBB[2]) + margin),
            min(ih, max(aBB[3], bBB[3]) + margin)]

In [3]:
def getAppr(im, bb):
    subim = im[bb[1] : bb[3], bb[0] : bb[2], :]
    subim = cv2.resize(subim, None, None, 224.0 / subim.shape[1], 224.0 / subim.shape[0], interpolation=cv2.INTER_LINEAR)
    subim = tf.keras.applications.vgg16.preprocess_input(subim)
    return subim

In [4]:
def getDualMask(ih, iw, bb):
    rh = 32.0 / ih
    rw = 32.0 / iw
    x1 = max(0, int(math.floor(bb[0] * rw)))
    x2 = min(32, int(math.ceil(bb[2] * rw)))
    y1 = max(0, int(math.floor(bb[1] * rh)))
    y2 = min(32, int(math.ceil(bb[3] * rh)))
    mask = np.zeros((32, 32))
    mask[y1 : y2, x1 : x2] = 1
    assert(mask.sum() == (y2 - y1) * (x2 - x1))
    return mask

In [5]:
def forward_batch(model, ims, poses, qas, qbs):
    test_set = []
    for i in range(ims.shape[0]):
        test_set.append({'qa': qas[i], 'qb': qbs[i], 'im': ims[i], 'posdata': poses[i]})

    test_elements = tuple(test_set)
    test_dataset = tf.data.Dataset.from_generator(
        lambda: test_elements, {'qa': tf.int32, 'qb': tf.int32, 'im': tf.float32, 'posdata': tf.float32}
    )
    test_dataset = test_dataset.cache().batch(ims.shape[0]).prefetch(buffer_size=AUTOTUNE)
    
    for sample in test_dataset:
        itr_pred = model(sample['qa'], sample['qb'], sample['im'], sample['posdata'])
    
    return itr_pred

In [6]:
def test_model(model, out_path):
    num_img = len(image_paths)
    num_class = 101
    thresh = 0.05
    batch_size = 20
    pred = []
    pred_bboxes = []

    for i in range(num_img):
        im = cv2.imread(image_paths[i]).astype(np.float32, copy=False)
        ih = im.shape[0]
        iw = im.shape[1]
        gts = np.array(all_gts[i])
        gt_bboxes = np.array(all_gt_bboxes[i])
        num_gts = gts.shape[0]
        pred.append([])
        pred_bboxes.append([])
        ims = []
        poses = []
        qas = []
        qbs = []
        for j in range(num_gts):
            sub = gt_bboxes[j, 0, :]
            obj = gt_bboxes[j, 1, :]
            rBB = getUnionBBox(sub, obj, ih, iw)
            rAppr = getAppr(im, rBB)
            rMask = np.array([getDualMask(ih, iw, sub), getDualMask(ih, iw, obj)])
            ims.append(rAppr)
            poses.append(rMask)
            qa = np.zeros(num_class - 1)
            qa[gts[j, 0] - 1] = 1
            qb = np.zeros(num_class - 1)
            qb[gts[j, 2] - 1] = 1
            qas.append(qa)
            qbs.append(qb)
        if len(ims) == 0:
            continue
        ims = np.array(ims)
        poses = np.array(poses)
        qas = np.array(qas)
        qbs = np.array(qbs)
        poses = poses.transpose((0, 2, 3, 1))
        _cursor = 0
        itr_pred = None
        num_ins = ims.shape[0]
        while _cursor < num_ins:
            _end_batch = min(_cursor + batch_size, num_ins)
            itr_pred_batch = forward_batch(model, ims[_cursor : _end_batch], poses[_cursor : _end_batch], qas[_cursor : _end_batch], qbs[_cursor : _end_batch])
            if itr_pred is None:
                itr_pred = itr_pred_batch
            else:
                itr_pred = np.vstack((itr_pred, itr_pred_batch))
            _cursor = _end_batch

        for j in range(num_gts):
            sub = gt_bboxes[j, 0, :]
            obj = gt_bboxes[j, 1, :]
            for k in range(itr_pred.shape[1]):
                if itr_pred[j, k] < thresh: 
                    continue
                pred[i].append([itr_pred[j, k], 1, 1, gts[j, 0], k, gts[j, 2]])
                pred_bboxes[i].append([sub, obj])
        pred[i] = np.array(pred[i])
        pred_bboxes[i] = np.array(pred_bboxes[i])

    print("writing file..")
    np.savez(out_path, pred=pred, pred_bboxes=pred_bboxes)

In [7]:
def computeArea(bb):
    return max(0, bb[2] - bb[0] + 1) * max(0, bb[3] - bb[1] + 1)

In [8]:
def computeIoU(bb1, bb2):
    ibb = [max(bb1[0], bb2[0]), \
        max(bb1[1], bb2[1]), \
        min(bb1[2], bb2[2]), \
        min(bb1[3], bb2[3])]
    iArea = computeArea(ibb)
    uArea = computeArea(bb1) + computeArea(bb2) - iArea
    return (iArea + 0.0) / uArea

In [9]:
def computeOverlap(detBBs, gtBBs):
    aIoU = computeIoU(detBBs[0, :], gtBBs[0, :])
    bIoU = computeIoU(detBBs[1, :], gtBBs[1, :])
    return min(aIoU, bIoU)

In [10]:
def eval_recall(det_file_path, num_dets=50, ov_thresh=0.5):
    det_file = np.load(det_file_path, allow_pickle=True)
    dets = det_file['pred']
    det_bboxes = det_file['pred_bboxes']
    num_img = len(dets)
    tp = []
    fp = []
    score = []
    total_num_gts = 0
    for i in range(num_img):
        gts = np.array(all_gts[i])
        gt_bboxes = np.array(all_gt_bboxes[i])
        num_gts = gts.shape[0]
        total_num_gts += num_gts
        gt_detected = np.zeros(num_gts)
        if isinstance(dets[i], np.ndarray) and dets[i].shape[0] > 0:
            det_score = np.log(dets[i][:, 0]) + np.log(dets[i][:, 1]) + np.log(dets[i][:, 2])
            inds = np.argsort(det_score)[::-1]
            if num_dets > 0 and num_dets < len(inds):
                inds = inds[:num_dets]
            top_dets = dets[i][inds, 3:]
            top_scores = det_score[inds]
            top_det_bboxes = det_bboxes[i][inds, :]
            temp_num_dets = len(inds)
            for j in range(temp_num_dets):
                ov_max = 0
                arg_max = -1
                for k in range(num_gts):
                    if gt_detected[k] == 0 and top_dets[j, 0] == gts[k, 0] and top_dets[j, 1] == gts[k, 1] and top_dets[j, 2] == gts[k, 2]:
                        ov = computeOverlap(top_det_bboxes[j, :, :], gt_bboxes[k, :, :])
                        if ov >= ov_thresh and ov > ov_max:
                            ov_max = ov
                            arg_max = k
                if arg_max != -1:
                    gt_detected[arg_max] = 1
                    tp.append(1)
                    fp.append(0)
                else:
                    tp.append(0)
                    fp.append(1)
                score.append(top_scores[j])
    score = np.array(score)
    tp = np.array(tp)
    fp = np.array(fp)
    inds = np.argsort(score)
    inds = inds[::-1]
    tp = tp[inds]
    fp = fp[inds]
    tp = np.cumsum(tp)
    fp = np.cumsum(fp)
    recall = (tp + 0.0) / total_num_gts
    top_recall = recall[-1]
    print('Recall:', top_recall)
    return top_recall

In [11]:
dataset = './reltrain.json'
nclass = 100

samples = json.load(open(dataset))
num_instance = len(samples)

In [12]:
qas = []
qbs = []
ims = []
poses = []
labels = []

for i in range(num_instance):
    sample = samples[i]
    im = cv2.imread(sample["imPath"]).astype(np.float32, copy=False)
    ih = im.shape[0]
    iw = im.shape[1]
    qa = np.zeros(nclass)
    qa[sample["aLabel"] - 1] = 1
    qas.append(qa)
    qb = np.zeros(nclass)
    qb[sample["bLabel"] - 1] = 1
    qbs.append(qb)
    ims.append(getAppr(im, sample["rBBox"]))
    poses.append([getDualMask(ih, iw, sample["aBBox"]), 
                  getDualMask(ih, iw, sample["bBBox"])])
    labels.append(sample["rLabel"])

In [13]:
poses = np.array(poses).transpose((0, 2, 3, 1))

qa = np.array(qas)
qb = np.array(qbs)
im = np.array(ims)
posdata = np.array(poses)
labels = np.array(labels)

In [14]:
train_set = []

for i in range(num_instance):
    train_set.append({'qa': qa[i], 'qb': qb[i], 'im': im[i], 'posdata': posdata[i], 'labels': labels[i]})
    
train_set, valid_set = train_set[:-7500], train_set[-7500:]
train_elements = tuple(train_set)
valid_elements = tuple(valid_set)

In [15]:
train_dataset = tf.data.Dataset.from_generator(
    lambda: train_elements, {'qa': tf.int32, 'qb': tf.int32, 'im': tf.float32, 'posdata': tf.float32, 'labels': tf.int32}
)

In [16]:
valid_dataset = tf.data.Dataset.from_generator(
    lambda: valid_elements, {'qa': tf.int32, 'qb': tf.int32, 'im': tf.float32, 'posdata': tf.float32, 'labels': tf.int32}
)

In [17]:
for i, sample in enumerate(train_dataset.take(3)):
    print(i+1, ':', sample)

1 : {'qa': <tf.Tensor: id=73, shape=(100,), dtype=int32, numpy=
array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])>, 'qb': <tf.Tensor: id=74, shape=(100,), dtype=int32, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])>, 'im': <tf.Tensor: id=70, shape=(224, 224, 3), dtype=float32, numpy=
array([[[127.84672  , 124.00672  , 131.32     ],
        [130.03198  , 127.19198  , 130.29099  ],
        [133.05893  , 129.12054  , 

In [18]:
for i, sample in enumerate(valid_dataset.take(3)):
    print(i+1, ':', sample)

1 : {'qa': <tf.Tensor: id=95, shape=(100,), dtype=int32, numpy=
array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])>, 'qb': <tf.Tensor: id=96, shape=(100,), dtype=int32, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])>, 'im': <tf.Tensor: id=92, shape=(224, 224, 3), dtype=float32, numpy=
array([[[ -31.561043 ,  -72.40103  , -103.30203  ],
        [ -34.46836  ,  -75.26817  , -106.161194 ],
        [ -36.227394 ,  -75.0

In [19]:
BUFFER_SIZE = num_instance
BATCH_SIZE = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE
EPOCHS = 100

In [20]:
train_dataset = train_dataset.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True).prefetch(buffer_size=AUTOTUNE)
valid_dataset = valid_dataset.cache().batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

In [21]:
test_dataset = './reltest.json'
nclass = 100

test_samples = json.load(open(test_dataset))
test_num_instance = len(test_samples)

In [22]:
image_paths = []
gt_label = []
gt_box = []
j = 0

for i in range(test_num_instance):
    if(i == 0):
        gt_label.append([])
        gt_box.append([])
        img_path = test_samples[i]['imPath']
        image_paths.append(img_path)
        gt_label[j].append([test_samples[i]['aLabel'], test_samples[i]['rLabel'], test_samples[i]['bLabel']])
        gt_box[j].append([test_samples[i]['aBBox'], test_samples[i]['bBBox']])
    else:
        if(img_path == test_samples[i]['imPath']):
            gt_label[j].append([test_samples[i]['aLabel'], test_samples[i]['rLabel'], test_samples[i]['bLabel']])
            gt_box[j].append([test_samples[i]['aBBox'], test_samples[i]['bBBox']])
        else:
            j += 1
            gt_label.append([])
            gt_box.append([])
            img_path = test_samples[i]['imPath']
            image_paths.append(img_path)
            gt_label[j].append([test_samples[i]['aLabel'], test_samples[i]['rLabel'], test_samples[i]['bLabel']])
            gt_box[j].append([test_samples[i]['aBBox'], test_samples[i]['bBBox']])

In [23]:
all_gts = np.array(gt_label)
all_gt_bboxes = np.array(gt_box)

# 모델

In [24]:
class AppearanceSubnet(tf.keras.layers.Layer):
    def __init__(self, input_shape):
        super(AppearanceSubnet, self).__init__()
        
        self.vgg16 = tf.keras.applications.vgg16.VGG16(include_top=False, 
                                                       weights='imagenet', 
                                                       input_shape=input_shape)
        
        self.fc = tf.keras.Sequential([
            tf.keras.layers.Flatten(name='flat'),
            tf.keras.layers.Dense(4096, name='fc6'),
            tf.keras.layers.ReLU(name='relu6'),
            tf.keras.layers.Dense(4096, name='fc7'),
            tf.keras.layers.ReLU(name='relu7'),
            tf.keras.layers.Dense(256, kernel_initializer='glorot_normal', name='fc8'),
            tf.keras.layers.ReLU(name='relu8')
        ]) 
        
    def call(self, x):
        out = self.vgg16(x)
        out = self.fc(out)
        return out

In [25]:
class SpatialSubnet(tf.keras.layers.Layer):
    def __init__(self, input_shape):
        super(SpatialSubnet, self).__init__()
        
        self.conv = tf.keras.Sequential([
            tf.keras.layers.Conv2D(96, 5, 2, padding='same', input_shape=input_shape, name='conv1_p'),
            tf.keras.layers.ReLU(name='relu1_p'),
            tf.keras.layers.Conv2D(128, 5, 2, padding='same', name='conv2_p'),
            tf.keras.layers.Conv2D(64, 8, name='conv3_p'),
            tf.keras.layers.ReLU(name='relu3_p')
        ])
        
    def call(self, x):
        out = self.conv(x)
        return out

In [26]:
class CombineSubnets(tf.keras.layers.Layer):
    def __init__(self):
        super(CombineSubnets, self).__init__()
        
        self.concat1_c = tf.keras.layers.Concatenate(name='concat1_c')
        
        self.fc = tf.keras.Sequential([
            tf.keras.layers.Dense(128, name='fc2_c'),
            tf.keras.layers.ReLU(name='relu2_c'),
            tf.keras.layers.Dense(70, kernel_initializer='glorot_normal', name='PhiR_0'),
            tf.keras.layers.ReLU(name='relu_0')
        ])
        
    def call(self, x1, x2):
        x2 = x2[:, 0, 0, :]
        out = self.concat1_c([x1, x2])
        out = self.fc(out) # qr0
        return out

In [27]:
class DRLayer(tf.keras.layers.Layer):
    def __init__(self, i, activation=True):
        super(DRLayer, self).__init__()
        
        self.activation = activation
        
        self.PhiA = tf.keras.layers.Dense(70, kernel_initializer='glorot_normal', name='PhiA_%d'%(i)) # qar_i
        self.PhiB = tf.keras.layers.Dense(70, kernel_initializer='glorot_normal', name='PhiB_%d'%(i)) # qbr_i
        self.PhiR = tf.keras.layers.Dense(70, kernel_initializer='glorot_normal', name='PhiR_%d'%(i)) # q_i_r
        self.QSum = tf.keras.layers.Add(name='QSum_%d'%(i)) # qr_i_un
        if(activation == True):
            self.relu = tf.keras.layers.ReLU(name='relu_%d'%(i)) # qr_i
        
    def call(self, qa, qb, qr):
        qar = self.PhiA(qa)
        qbr = self.PhiB(qb)
        qr = self.PhiR(qr)
        qrun = self.QSum([qar, qbr, qr])
        if self.activation:
            qr = self.relu(qrun)
        else:
            qr = qrun
        return qr

In [28]:
class DRModule(tf.keras.layers.Layer):
    def __init__(self, num_layers):
        super(DRModule, self).__init__()
        
        self.num_layers = num_layers
        
        self.dr_layers = [DRLayer(i+1, activation=True) if((i+1) != num_layers) 
                          else DRLayer(i+1, activation=False) for i in range(num_layers)]
        
    def call(self, qa, qb, qr):
        for i in range(self.num_layers):
            qr = self.dr_layers[i](qa, qb, qr)
        return qr

In [29]:
class DRNet(tf.keras.Model):
    def __init__(self, num_layers=8, im_shape=(224, 224, 3), posdata_shape=(32, 32, 2)):
        super(DRNet, self).__init__()
        
        self.appr = AppearanceSubnet(input_shape=im_shape)
        self.spatial = SpatialSubnet(input_shape=posdata_shape)
        self.combine = CombineSubnets()
        self.dr = DRModule(num_layers=num_layers)
        self.softmax = tf.keras.layers.Softmax()
        
    def call(self, qa, qb, im, posdata):
        fc8 = self.appr(im)
        conv3_p = self.spatial(posdata)
        qr0 = self.combine(fc8, conv3_p)
        qr = self.dr(qa, qb, qr0)
        out = self.softmax(qr)
        return out

# 학습

## Appearance + Spatial

In [30]:
class DRNet(tf.keras.Model):
    def __init__(self, num_layers=8, im_shape=(224, 224, 3), posdata_shape=(32, 32, 2)):
        super(DRNet, self).__init__()
        
        self.appr = AppearanceSubnet(input_shape=im_shape)
        #self.spatial = SpatialSubnet(input_shape=posdata_shape)
        #self.combine = CombineSubnets()
        #self.dr = DRModule(num_layers=num_layers)
        self.temp_fc = tf.keras.layers.Dense(70)
        self.softmax = tf.keras.layers.Softmax()
        
    def call(self, qa, qb, im, posdata):
        fc8 = self.appr(im)
        #conv3_p = self.spatial(posdata)
        #qr0 = self.combine(fc8, conv3_p)
        #qr = self.dr(qa, qb, qr0)
        qr = self.temp_fc(fc8)
        out = self.softmax(qr)
        return out

In [31]:
model_A = DRNet()

In [32]:
for sample in train_dataset:
    print(model_A(sample['qa'], sample['qb'], sample['im'], sample['posdata']))
    break

tf.Tensor(
[[2.91619767e-06 4.44451360e-13 1.05399537e-04 ... 2.86573765e-09
  4.12932966e-10 4.26556687e-16]
 [2.48048978e-04 6.38475376e-06 2.83182773e-04 ... 1.40345469e-09
  1.00871875e-04 8.52990290e-10]
 [9.66729799e-15 2.57160092e-18 8.01315438e-12 ... 1.08908086e-23
  5.76295129e-14 1.56510139e-25]
 ...
 [1.46318115e-02 2.91954738e-10 3.20148183e-06 ... 3.61358161e-07
  9.69319262e-06 7.58933751e-08]
 [1.79659866e-04 2.97755653e-10 1.12504184e-08 ... 3.84406293e-13
  3.37353434e-08 2.91698123e-14]
 [4.72869033e-08 7.48415652e-13 9.07016563e-12 ... 8.92637310e-13
  4.71332923e-15 8.43971975e-21]], shape=(32, 70), dtype=float32)


In [33]:
checkpoint_path = "./checkpoints/model_A"
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
ckpt = tf.train.Checkpoint(model_A=model_A, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print ('Latest checkpoint restored!!')

Latest checkpoint restored!!


In [34]:
model_A.get_layer(index=0).weights

[<tf.Variable 'block1_conv1/kernel:0' shape=(3, 3, 3, 64) dtype=float32, numpy=
 array([[[[ 0.43049687,  0.11179236,  0.03296695, ..., -0.12999196,
           -0.05383801,  0.00646047],
          [ 0.5505768 ,  0.01797121,  0.09722268, ..., -0.08226837,
           -0.05173847,  0.0351446 ],
          [ 0.4801737 , -0.17290743,  0.03527067, ..., -0.12403691,
           -0.05083949,  0.03282929]],
 
         [[ 0.37458295,  0.15592553,  0.00062949, ..., -0.14577243,
           -0.2357477 , -0.06394353],
          [ 0.44038996,  0.04339506,  0.0497102 , ..., -0.09614545,
           -0.2967169 , -0.0725975 ],
          [ 0.40881994, -0.17108168, -0.00731635, ..., -0.11916465,
           -0.27686924, -0.04094899]],
 
         [[-0.06025299,  0.1292552 , -0.11693046, ..., -0.13795918,
           -0.37816146, -0.301669  ],
          [-0.0809814 ,  0.03768076, -0.10365398, ..., -0.09174562,
           -0.5062025 , -0.38599432],
          [-0.06475799, -0.15498775, -0.1406253 , ..., -0.12631123

In [35]:
class DRNet(tf.keras.Model):
    def __init__(self, num_layers=8, im_shape=(224, 224, 3), posdata_shape=(32, 32, 2)):
        super(DRNet, self).__init__()
        
        #self.appr = AppearanceSubnet(input_shape=im_shape)
        self.spatial = SpatialSubnet(input_shape=posdata_shape)
        #self.combine = CombineSubnets()
        #self.dr = DRModule(num_layers=num_layers)
        self.temp_fc = tf.keras.layers.Dense(70)
        self.softmax = tf.keras.layers.Softmax()
        
    def call(self, qa, qb, im, posdata):
        #fc8 = self.appr(im)
        conv3_p = self.spatial(posdata)
        #qr0 = self.combine(fc8, conv3_p)
        #qr = self.dr(qa, qb, qr0)
        qr = self.temp_fc(conv3_p[:, 0, 0, :])
        out = self.softmax(qr)
        return out

In [36]:
model_S = DRNet()

In [37]:
for sample in train_dataset:
    print(model_S(sample['qa'], sample['qb'], sample['im'], sample['posdata']))
    break

tf.Tensor(
[[0.01472906 0.01452271 0.01434179 ... 0.01411616 0.01448155 0.01432762]
 [0.01422785 0.01433527 0.01430521 ... 0.0143196  0.01418864 0.01430278]
 [0.01386235 0.01423891 0.01702301 ... 0.01353547 0.01550338 0.01526098]
 ...
 [0.01457525 0.01405028 0.01513151 ... 0.01367773 0.01550545 0.01444594]
 [0.01399999 0.01500814 0.0159215  ... 0.01415354 0.01476369 0.01508181]
 [0.01438598 0.01452217 0.01454745 ... 0.01284428 0.01511166 0.0139914 ]], shape=(32, 70), dtype=float32)


In [38]:
checkpoint_path = "./checkpoints/model_S"
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
ckpt = tf.train.Checkpoint(model_S=model_S, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print ('Latest checkpoint restored!!')

Latest checkpoint restored!!


In [39]:
model_S.get_layer(index=0).weights

[<tf.Variable 'conv1_p/kernel:0' shape=(5, 5, 2, 96) dtype=float32, numpy=
 array([[[[-0.1241697 , -0.05270788, -0.1267548 , ...,  0.01479302,
            0.04633494, -0.00020846],
          [-0.04180038, -0.00129113, -0.05814721, ..., -0.047959  ,
           -0.11852884, -0.04021626]],
 
         [[-0.14611076, -0.02750257, -0.11035122, ..., -0.06599113,
            0.02868353,  0.04737476],
          [-0.06107435, -0.00265562, -0.07090953, ..., -0.12397142,
           -0.08573793,  0.05248689]],
 
         [[-0.06805864,  0.00125905, -0.0479855 , ..., -0.07684041,
            0.05089528,  0.07406264],
          [-0.08589292, -0.02652778, -0.04420991, ..., -0.10174345,
           -0.06137142, -0.02470316]],
 
         [[-0.05319491, -0.03154482,  0.02615462, ..., -0.08565231,
            0.02068939,  0.01484833],
          [-0.05418264, -0.09602941, -0.04432062, ..., -0.06760663,
           -0.01870468,  0.01105661]],
 
         [[-0.05224829, -0.04173286,  0.02709271, ..., -0.0950200

In [40]:
class DRNet(tf.keras.Model):
    def __init__(self, num_layers=8, im_shape=(224, 224, 3), posdata_shape=(32, 32, 2)):
        super(DRNet, self).__init__()
        
        self.appr = AppearanceSubnet(input_shape=im_shape)
        self.spatial = SpatialSubnet(input_shape=posdata_shape)
        self.combine = CombineSubnets()
        #self.dr = DRModule(num_layers=num_layers)
        self.temp_fc = tf.keras.layers.Dense(70)
        self.softmax = tf.keras.layers.Softmax()
        
    def call(self, qa, qb, im, posdata):
        fc8 = self.appr(im)
        conv3_p = self.spatial(posdata)
        qr0 = self.combine(fc8, conv3_p)
        #qr = self.dr(qa, qb, qr0)
        qr = self.temp_fc(qr0)
        out = self.softmax(qr)
        return out

In [41]:
model_AS = DRNet()

In [42]:
for sample in train_dataset:
    print(model_AS(sample['qa'], sample['qb'], sample['im'], sample['posdata']))
    break

tf.Tensor(
[[1.8362303e-03 7.9515978e-04 9.4688672e-05 ... 4.0749641e-04
  7.7613797e-03 7.3067960e-04]
 [4.1122566e-05 1.2189728e-05 4.4219632e-05 ... 3.4638913e-05
  2.7193928e-06 5.8435416e-04]
 [1.3452189e-03 9.5784519e-04 1.5676432e-04 ... 1.0474960e-02
  5.6122214e-04 3.7816674e-02]
 ...
 [5.2263799e-06 1.1744506e-05 2.6231105e-04 ... 1.0142261e-06
  9.2683067e-06 1.5035406e-02]
 [2.6174667e-04 3.6457498e-03 6.5644151e-03 ... 1.2867734e-03
  2.8744531e-03 1.7081503e-02]
 [2.9629712e-06 2.2019110e-05 1.5312384e-05 ... 2.3659684e-06
  1.2626136e-05 1.6489839e-03]], shape=(32, 70), dtype=float32)


In [43]:
for i in range(len(model_AS.get_layer(index=0).weights)):
    model_AS.get_layer(index=0).weights[i].assign(model_A.get_layer(index=0).weights[i])

In [44]:
model_AS.get_layer(index=0).weights

[<tf.Variable 'block1_conv1_1/kernel:0' shape=(3, 3, 3, 64) dtype=float32, numpy=
 array([[[[ 0.43049687,  0.11179236,  0.03296695, ..., -0.12999196,
           -0.05383801,  0.00646047],
          [ 0.5505768 ,  0.01797121,  0.09722268, ..., -0.08226837,
           -0.05173847,  0.0351446 ],
          [ 0.4801737 , -0.17290743,  0.03527067, ..., -0.12403691,
           -0.05083949,  0.03282929]],
 
         [[ 0.37458295,  0.15592553,  0.00062949, ..., -0.14577243,
           -0.2357477 , -0.06394353],
          [ 0.44038996,  0.04339506,  0.0497102 , ..., -0.09614545,
           -0.2967169 , -0.0725975 ],
          [ 0.40881994, -0.17108168, -0.00731635, ..., -0.11916465,
           -0.27686924, -0.04094899]],
 
         [[-0.06025299,  0.1292552 , -0.11693046, ..., -0.13795918,
           -0.37816146, -0.301669  ],
          [-0.0809814 ,  0.03768076, -0.10365398, ..., -0.09174562,
           -0.5062025 , -0.38599432],
          [-0.06475799, -0.15498775, -0.1406253 , ..., -0.126311

In [45]:
for i in range(len(model_AS.get_layer(index=1).weights)):
    model_AS.get_layer(index=1).weights[i].assign(model_S.get_layer(index=0).weights[i])

In [46]:
model_AS.get_layer(index=1).weights

[<tf.Variable 'conv1_p_1/kernel:0' shape=(5, 5, 2, 96) dtype=float32, numpy=
 array([[[[-0.1241697 , -0.05270788, -0.1267548 , ...,  0.01479302,
            0.04633494, -0.00020846],
          [-0.04180038, -0.00129113, -0.05814721, ..., -0.047959  ,
           -0.11852884, -0.04021626]],
 
         [[-0.14611076, -0.02750257, -0.11035122, ..., -0.06599113,
            0.02868353,  0.04737476],
          [-0.06107435, -0.00265562, -0.07090953, ..., -0.12397142,
           -0.08573793,  0.05248689]],
 
         [[-0.06805864,  0.00125905, -0.0479855 , ..., -0.07684041,
            0.05089528,  0.07406264],
          [-0.08589292, -0.02652778, -0.04420991, ..., -0.10174345,
           -0.06137142, -0.02470316]],
 
         [[-0.05319491, -0.03154482,  0.02615462, ..., -0.08565231,
            0.02068939,  0.01484833],
          [-0.05418264, -0.09602941, -0.04432062, ..., -0.06760663,
           -0.01870468,  0.01105661]],
 
         [[-0.05224829, -0.04173286,  0.02709271, ..., -0.09502

In [47]:
for sample in train_dataset:
    print(model_AS(sample['qa'], sample['qb'], sample['im'], sample['posdata']))
    break

tf.Tensor(
[[0.0116085  0.01360103 0.01614402 ... 0.00376284 0.01437064 0.01363365]
 [0.00851723 0.00636575 0.00387807 ... 0.00708488 0.00524758 0.01022758]
 [0.00672731 0.00382333 0.00435723 ... 0.00351955 0.01020344 0.00959444]
 ...
 [0.00534426 0.00431713 0.01370706 ... 0.00372187 0.01510412 0.01131842]
 [0.01438593 0.01157646 0.01442169 ... 0.00414957 0.01877781 0.01608297]
 [0.01106332 0.00730403 0.01339084 ... 0.00577279 0.00848817 0.01411255]], shape=(32, 70), dtype=float32)


In [48]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
eval_loss = tf.keras.metrics.Mean(name='eval_loss')
eval_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='eval_accuracy')

In [49]:
checkpoint_path = "./checkpoints/model_AS"
ckpt = tf.train.Checkpoint(model_AS=model_AS, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print ('Latest checkpoint restored!!')



In [50]:
@tf.function
def train_step(model, qa, qb, im, posdata, label):
    loss = 0
    
    with tf.GradientTape() as tape:
        y = model(qa, qb, im, posdata)
        loss += loss_object(label, y)
        
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_accuracy(label, y)

In [51]:
@tf.function
def eval_step(model, qa, qb, im, posdata, label):
    loss = 0
    
    y = model(qa, qb, im, posdata)
    loss += loss_object(label, y)
    
    eval_loss(loss)
    eval_accuracy(label, y)

In [52]:
def test_step(model, out_path='temp.npz', num_dets=50, ov_thresh=0.5):
    test_model(model, out_path)
    test_recall = eval_recall(out_path, num_dets, ov_thresh)
    return test_recall

In [53]:
max_acc = 0
early_stop_cnt = 0

for epoch in tqdm(range(EPOCHS)):
    start = time.time()
    
    train_loss.reset_states()
    train_accuracy.reset_states()
    eval_loss.reset_states()
    eval_accuracy.reset_states()

    for sample in train_dataset:
        train_step(model_AS, sample['qa'], sample['qb'], sample['im'], sample['posdata'], sample['labels'])
        
    for sample in valid_dataset:
        eval_step(model_AS, sample['qa'], sample['qb'], sample['im'], sample['posdata'], sample['labels'])
    
    end = time.time()
    eval_acc = eval_accuracy.result()
    print ('Epoch {} Loss {:.4f} Accuracy {:.4f} Eval_Loss {:.4f} Eval_Accuracy {:.4f}'.format(epoch + 1, 
                                                                                               train_loss.result(), 
                                                                                               train_accuracy.result(),
                                                                                               eval_loss.result(),
                                                                                               eval_accuracy.result())) 
    
    print ('Time taken for 1 epoch: {} secs\n'.format(end - start))
    
    if(eval_acc > max_acc):
        max_acc = eval_acc
        early_stop_cnt = 0
        ckpt_save_path = ckpt_manager.save()
        print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,
                                                             ckpt_save_path))
    else:
        early_stop_cnt += 1
        
    if early_stop_cnt == 20:
        break

  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

Epoch 1 Loss 2.4273 Accuracy 0.3995 Eval_Loss 2.1663 Eval_Accuracy 0.4247
Time taken for 1 epoch: 151.7528555393219 secs



  1%|▊                                                                              | 1/100 [02:34<4:15:06, 154.61s/it]

Saving checkpoint for epoch 1 at ./checkpoints/model_AS\ckpt-1
Epoch 2 Loss 1.9708 Accuracy 0.4679 Eval_Loss 2.0733 Eval_Accuracy 0.4280
Time taken for 1 epoch: 148.8979959487915 secs



  2%|█▌                                                                             | 2/100 [05:06<4:11:07, 153.75s/it]

Saving checkpoint for epoch 2 at ./checkpoints/model_AS\ckpt-2
Epoch 3 Loss 1.8322 Accuracy 0.4913 Eval_Loss 2.0493 Eval_Accuracy 0.4425
Time taken for 1 epoch: 150.19229698181152 secs



  3%|██▎                                                                            | 3/100 [07:39<4:08:10, 153.51s/it]

Saving checkpoint for epoch 3 at ./checkpoints/model_AS\ckpt-3


  4%|███▏                                                                           | 4/100 [10:09<4:04:06, 152.57s/it]

Epoch 4 Loss 1.7215 Accuracy 0.5170 Eval_Loss 2.0676 Eval_Accuracy 0.4251
Time taken for 1 epoch: 150.34867429733276 secs



  5%|███▉                                                                           | 5/100 [12:37<3:59:09, 151.04s/it]

Epoch 5 Loss 1.6065 Accuracy 0.5426 Eval_Loss 2.0993 Eval_Accuracy 0.4241
Time taken for 1 epoch: 147.46659326553345 secs



  6%|████▋                                                                          | 6/100 [15:04<3:54:57, 149.97s/it]

Epoch 6 Loss 1.4568 Accuracy 0.5787 Eval_Loss 2.1547 Eval_Accuracy 0.4223
Time taken for 1 epoch: 147.43310379981995 secs



  7%|█████▌                                                                         | 7/100 [17:30<3:50:24, 148.65s/it]

Epoch 7 Loss 1.2916 Accuracy 0.6137 Eval_Loss 2.2347 Eval_Accuracy 0.4209
Time taken for 1 epoch: 145.5341625213623 secs



  8%|██████▎                                                                        | 8/100 [19:56<3:46:50, 147.94s/it]

Epoch 8 Loss 1.1038 Accuracy 0.6639 Eval_Loss 2.3988 Eval_Accuracy 0.4007
Time taken for 1 epoch: 146.27276420593262 secs



  9%|███████                                                                        | 9/100 [22:23<3:44:07, 147.78s/it]

Epoch 9 Loss 0.9463 Accuracy 0.7083 Eval_Loss 2.5586 Eval_Accuracy 0.4024
Time taken for 1 epoch: 147.35860347747803 secs



 10%|███████▊                                                                      | 10/100 [24:50<3:41:20, 147.56s/it]

Epoch 10 Loss 0.7969 Accuracy 0.7442 Eval_Loss 2.6350 Eval_Accuracy 0.4024
Time taken for 1 epoch: 147.02576518058777 secs



 11%|████████▌                                                                     | 11/100 [27:21<3:40:19, 148.53s/it]

Epoch 11 Loss 0.7025 Accuracy 0.7692 Eval_Loss 2.7359 Eval_Accuracy 0.3724
Time taken for 1 epoch: 150.7631540298462 secs



 12%|█████████▎                                                                    | 12/100 [29:51<3:38:26, 148.94s/it]

Epoch 12 Loss 0.6289 Accuracy 0.7877 Eval_Loss 2.7078 Eval_Accuracy 0.3868
Time taken for 1 epoch: 149.86667704582214 secs



 13%|██████████▏                                                                   | 13/100 [32:19<3:35:19, 148.50s/it]

Epoch 13 Loss 0.5676 Accuracy 0.8002 Eval_Loss 2.7634 Eval_Accuracy 0.4036
Time taken for 1 epoch: 147.44915771484375 secs



 14%|██████████▉                                                                   | 14/100 [34:44<3:31:38, 147.65s/it]

Epoch 14 Loss 0.5245 Accuracy 0.8125 Eval_Loss 2.8464 Eval_Accuracy 0.3991
Time taken for 1 epoch: 145.64879727363586 secs



 15%|███████████▋                                                                  | 15/100 [37:11<3:28:46, 147.37s/it]

Epoch 15 Loss 0.4994 Accuracy 0.8188 Eval_Loss 2.8403 Eval_Accuracy 0.3947
Time taken for 1 epoch: 146.69392108917236 secs



 16%|████████████▍                                                                 | 16/100 [39:36<3:25:31, 146.81s/it]

Epoch 16 Loss 0.4677 Accuracy 0.8244 Eval_Loss 2.8280 Eval_Accuracy 0.4137
Time taken for 1 epoch: 145.4681134223938 secs



 17%|█████████████▎                                                                | 17/100 [42:02<3:22:31, 146.40s/it]

Epoch 17 Loss 0.4465 Accuracy 0.8300 Eval_Loss 3.1298 Eval_Accuracy 0.4081
Time taken for 1 epoch: 145.43352937698364 secs



 18%|██████████████                                                                | 18/100 [44:34<3:22:28, 148.15s/it]

Epoch 18 Loss 0.4323 Accuracy 0.8301 Eval_Loss 3.0195 Eval_Accuracy 0.3901
Time taken for 1 epoch: 152.1855547428131 secs



 19%|██████████████▊                                                               | 19/100 [47:07<3:21:53, 149.55s/it]

Epoch 19 Loss 0.4127 Accuracy 0.8331 Eval_Loss 3.0889 Eval_Accuracy 0.4093
Time taken for 1 epoch: 152.78071308135986 secs



 20%|███████████████▌                                                              | 20/100 [49:39<3:20:33, 150.42s/it]

Epoch 20 Loss 0.4021 Accuracy 0.8357 Eval_Loss 3.0359 Eval_Accuracy 0.3908
Time taken for 1 epoch: 152.42431259155273 secs



 21%|████████████████▍                                                             | 21/100 [52:12<3:18:55, 151.09s/it]

Epoch 21 Loss 0.3909 Accuracy 0.8388 Eval_Loss 3.2648 Eval_Accuracy 0.3883
Time taken for 1 epoch: 152.61852979660034 secs



 22%|█████████████████▏                                                            | 22/100 [54:45<3:17:01, 151.56s/it]

Epoch 22 Loss 0.3797 Accuracy 0.8408 Eval_Loss 3.2099 Eval_Accuracy 0.4065
Time taken for 1 epoch: 152.64733600616455 secs



 22%|█████████████████▏                                                            | 22/100 [57:18<3:23:09, 156.28s/it]

Epoch 23 Loss 0.3664 Accuracy 0.8411 Eval_Loss 3.3408 Eval_Accuracy 0.4055
Time taken for 1 epoch: 152.81140327453613 secs






In [54]:
checkpoint_path = "./checkpoints/model_AS"
ckpt = tf.train.Checkpoint(model_AS=model_AS, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print ('Latest checkpoint restored!!')

Latest checkpoint restored!!


In [55]:
test_step(ckpt.model_AS)

writing file..
Recall: 0.7344077568134172


0.7344077568134172