In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from PIL import Image
print(tf.VERSION)
print(tf.keras.__version__)
import numpy as np

1.12.0
2.1.6-tf


## section 1
firstly, a list which contains weights is given. You need to load the weight into the model correctly. Then test the model on the given dataset.

In [2]:
'''
Weights_list is the parameter sets of the networks

It's structure is like:
[
[],
[layer a's weights, layer a's bias,...],
[layer b's weights]
]
'''
import pickle
with open ('params_sets', 'rb') as fp:
    weights_list = pickle.load(fp,encoding="bytes")

In [3]:
#this is the model for the testing part
model = tf.keras.models.Sequential(
    [
    #first dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(48,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #second dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(96,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #third dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(192,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #fourth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(384,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #fifth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(512,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #output
    layers.Conv2D(10,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    ]
)

In [4]:
model.trainable = False

In [5]:
'''
The function is to convert the image into the input type.
'''
def load_input(path):
    img = Image.open(path)
    img = img.resize((320,160))
    input_img = np.asarray(img).astype(np.float32)
    input_img = (input_img/255 - 0.5)/0.25
    return input_img[np.newaxis,:]

In [6]:
'''
This is the function to get the predict box (x,y,w,h)
'''
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def get_box(output):
    anchors = [1.4940052559648322, 2.3598481287086823, 4.0113013115312155, 5.760873975661669]
    h = output.shape[2]
    w = output.shape[3]
    output = output.reshape(2,5,800).transpose(1,0,2).flatten().reshape(5,1600)
    grid_x = np.tile(np.tile(np.linspace(0,w-1,w),h).reshape(h,w),(2,1,1)).flatten()
    grid_y =np.tile(np.tile(np.linspace(0,h-1,h),w).reshape(w,h).T,(2,1,1)).flatten()
    xs = sigmoid(output[0]) + grid_x
    ys = sigmoid(output[1]) + grid_y
    anchor_w = np.zeros(1600)
    anchor_h = np.zeros(1600)
    anchor_w[0:800] = anchors[0]
    anchor_w[800:1600] = anchors[2]
    anchor_h[0:800] = anchors[1]
    anchor_h[800:1600] = anchors[3]
    ws = np.exp(output[2]) * anchor_w
    hs = np.exp(output[3]) * anchor_h
    ind = np.argmax(output[4])
    bcx = xs[ind]
    bcy = ys[ind]
    bw = ws[ind]
    bh = hs[ind]
    box = [bcx/w, bcy/h, bw/w, bh/h]
    return box

In [7]:
input_img = load_input('images/2.jpg')
output = model.predict(input_img).transpose(0,3,1,2)
count = 1
for i in model.layers:
    if i.get_config()['name'].startswith('re_lu') or i.get_config()['name'].startswith('max_pooling'):
        continue
    else:
        i.set_weights(weights_list[count])
        count += 1

In [8]:
'''
This is the cell to test your weights correctness.

The output should be :
[0.8880645155906677, 0.6772263944149017, 0.02124013871572325, 0.058586649582813566]
'''
input_img = load_input('images/2.jpg')
output = model.predict(input_img).transpose(0,3,1,2)
print (get_box(output))

[0.8880645155906677, 0.6772263944149017, 0.02124013871572325, 0.058586649582813566]


In [9]:
'''
Now finish the function to compute the iou between two given box.

You can refer to the website: https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/

'''

def bbox_iou(box1, box2):
    '''your code here'''
    
    
    
    
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    
    # return the intersection over union value
    return iou

In [23]:
'''
Given dataset compute the iou
'''
import json
with open('groundtruth.txt', 'r') as outfile:
    lines = json.load(outfile)

In [20]:
import cv2
image = cv2.imread(lines[0][0])
 
# draw the ground-truth bounding box along with the predicted
# bounding box
detectiongt = lines[0][1]
cv2.rectangle(image, tuple(detectiongt[:2]), 
    tuple(detectiongt[2:]), (0, 255, 0), 2)
detectionpred = get_box(model.predict(input_img).transpose(0,3,1,2))
cv2.rectangle(image, tuple(detectionpred[:2]), 
    tuple(detectionpred[2:]), (0, 0, 255), 2)

# compute the intersection over union and display it
iou = bbox_iou(detectiongt, detectionpred)
cv2.putText(image, "IoU: {:.4f}".format(iou), (10, 30),
    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
print("{}: {:.4f}".format(image, iou))

# show the output image
cv2.imshow("Image", image)
cv2.waitKey(0)

TypeError: integer argument expected, got float

In [27]:
'''
The iou should be about 67%
'''
avg_iou = 0
for line in lines:
    input_img = load_input(line[0])
    output = model.predict(input_img).transpose(0,3,1,2)
    cur_iou = bbox_iou(get_box(output),line[1])
    print(cur_iou)
    avg_iou+= cur_iou
avg_iou = avg_iou/len(lines)
print(avg_iou)

0.951736822400643
0.94857064779409
0.9731527700341455
0.9750984926117859
0.9905525351305893
0.954411112492113
0.9794809880013137
0.97068124313231
0.9813808443399756
0.9884230068533355
0.9165642034672067
0.9827304611644302
0.9608683145972255
0.9724563039712526
0.9592606062752358
0.9602515675961311
0.8370722604767621
0.9464989204087023
0.8796097533518571
0.8887995804726144
0.9630095437439465
0.9197719179452705
0.979321499880439
0.9800708111798213
0.986229694298195
0.8488353508130956
0.9728128619208227
0.5541060130224168
0.9834891662016207
0.977670216276692
0.8898355358128887
0.9749107408432032
0.9702762821334907
0.4924885855838093
0.8030788886829884
0.9098996133582014
0.9596904537985328
0.9605413735970415
0.6093458582085839
0.19051099006511052
0.9703568882910256
0.43623071390306173
0.9280485307980458
0.9353907888918831
0.8265670151638929
0.9740813591506317
0.9035519237618067
0.921936466263269
0.9395662787803205
0.7184306878686398
0.8494773136398805
0.3731649707201902
0.9544141991661504
0

0.9721788948653858
0.6412786473523674
0.8712519881504597
0.9812635044222847
0.9635622636974553
0.9614945019763463
0.9489137590427311
0.9457191419584171
0.9567697672560623
0.9322479295648619
0.9584117915213979
0.2880343182689145
0.975559522832453
0.985055877893315
0.9798406202723013
0.9823604327684624
0.9660781780116827
0.9294249726435984
0.9842730899119315
0.9409003406441222
0.9667366322764708
0.876502904925786
0.9695944897249075
0.9846655364493687
0.941608069801656
0.9586075543564752
0.9604897575291624
0.9784252123657409
0.9636077689650908
0.19590255476226817
0.9260396590840859
0.9620875424852185
0.94725382752145
0.9866804036455173
0.9244108407353578
0.9556003609612739
0.9903430178678292
0.958157402669828
0.9842082197293406
0.9721621052064755
0.9059229614768518
0.9603495893951778
0.9674615310569428
0.960832366294982
0.9915002993022131
0.984956250583533
0.9717782365565569
0.30610771698978506
0.9258556694749203
0.8915899689351864
0.5808709779785043
0.907926692597654
0.9512942469199335
0

0.6989810895570707
0.9698829090804983
0.9446901953643277
0.8850169149024024
0.7535910605639887
0.6588624858639909
0.9667470497081498
0.2569778320991741
0.11739378664311746
0.9802154481940043
0.16716541877347413
0.9557291072218297
0.9388984089695135
0.1832644186215246
0.9354466941147261
0.986536646361984
0.8336245178042997
0.9410148410601996
0.934907004708337
0.8848566700066961
0.9579251540398613
0.9318106316090231
0.8837153575193523
0.9243296100033457
0.9466360455643366
0.9863338825999626
0.9154852865750407
0.9591993281114072
0.9820021769541818
0.9698308465014125
0.9676320643426416
0.9474478801379278
0.9813258925783586
0.9143617023117515
0.37769349147352393
0.9777075195490021
0.9319479292562984
0.31181770231656897
0.8542546640692179
0.9707569550211104
0.985061990606539
0.9264141541980637
0.9578263502443928
0.2826467697842283
0.9202394429812841
0.2899886431942622
0.9245417475387677
0.9756758434131297
0.9832345861076061
0.9446512486644988
0.9707752983638954
0.9263776539166743
0.966450930

## section 2
In this section, you need to convert the model into a model without batch normalization layers. The output of two model should be the same. Then you are required to quantize the model without batch normalization.

In [None]:
model_no_bn = tf.keras.models.Sequential(
    [
    #first dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(48,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #second dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(96,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #third dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(192,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #fourth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(384,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #fifth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(512,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #output
    layers.Conv2D(10,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    ]
)

In [None]:
'''
Write down the code to absorb bn layer into conv layer and maintain the same output as the original model. (please refer to HW2 Q4)
'''

In [None]:
model_no_bn.trainable = False

In [None]:
input_img = load_input('images/2.jpg')
output = model_no_bn.predict(input_img).transpose(0,3,1,2)
get_box(output)

In [None]:
'''
Based on the model_no_bn quantize the weights to 16 bits, 8 bits respectively.

The requirement of quantization is given below:

* For each layer's weights, set the upper bound as the minimum 2^n which is larger than the maximum value of unsigned weights. (eg: if the maximum value is 4.2375 and the minimum value is -7.83421, then the upper bound is 2^3 = 8)

* Note that for each layer, the distribution of weights could be different.

* The sign takes one bit. For example, if the upper bound is 8 and 5 bits is given for floating part, it actually takes 9 bits.

* Do not quantize the bias!

and get the accuracy report
'''

In [None]:
'''
You should report the average IoU for each quantized model you get
'''
avg_iou = 0
for line in lines:
    input_img = load_input(line[0])
    output = model_no_bn.predict(input_img).transpose(0,3,1,2)
    avg_iou+= bbox_iou(get_box(output),line[1])
avg_iou = avg_iou/1000
print (avg_iou)

In [None]:
'''
Based on the model_no_bn

Now you can quantize both weights and bias parts.

Explore eight different combination of weights and parts and specify your methods' details, and get the accuracy report
'''