In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from PIL import Image
print(tf.VERSION)
print(tf.keras.__version__)
import numpy as np

ModuleNotFoundError: No module named 'PIL'

## section 1
firstly, a list which contains weights is given. You need to load the weight into the model correctly. Then test the model on the given dataset.

In [None]:
'''
Weights_list is the parameter sets of the networks

It's structure is like:
[
[],
[layer a's weights, layer a's bias,...],
[layer b's weights]
]
'''
import pickle
with open ('params_sets', 'rb') as fp:
    weights_list = pickle.load(fp)

In [None]:
#this is the model for the testing part
model = tf.keras.models.Sequential(
    [
    #first dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(48,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #second dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(96,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #third dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(192,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #fourth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(384,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #fifth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=False),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    layers.Conv2D(512,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    layers.BatchNormalization(momentum=0.1,
    epsilon=1e-5,trainable=False),
    layers.ReLU(4.0),
    #output
    layers.Conv2D(10,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    ]
)

In [None]:
model.trainable = False

In [None]:
'''
write down your code to load the model
'''

In [None]:
'''
The function is to convert the image into the input type.
'''
def load_input(path):
    img = Image.open(path)
    img = img.resize((320,160))
    input_img = np.asarray(img).astype(np.float32)
    input_img = (input_img/255 - 0.5)/0.25
    return input_img[np.newaxis,:]

In [None]:
'''
This is the function to get the predict box (x,y,w,h)
'''
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def get_box(output):
    anchors = [1.4940052559648322, 2.3598481287086823, 4.0113013115312155, 5.760873975661669]
    h = output.shape[2]
    w = output.shape[3]
    output = output.reshape(2,5,800).transpose(1,0,2).flatten().reshape(5,1600)
    grid_x = np.tile(np.tile(np.linspace(0,w-1,w),h).reshape(h,w),(2,1,1)).flatten()
    grid_y =np.tile(np.tile(np.linspace(0,h-1,h),w).reshape(w,h).T,(2,1,1)).flatten()
    xs = sigmoid(output[0]) + grid_x
    ys = sigmoid(output[1]) + grid_y
    anchor_w = np.zeros(1600)
    anchor_h = np.zeros(1600)
    anchor_w[0:800] = anchors[0]
    anchor_w[800:1600] = anchors[2]
    anchor_h[0:800] = anchors[1]
    anchor_h[800:1600] = anchors[3]
    ws = np.exp(output[2]) * anchor_w
    hs = np.exp(output[3]) * anchor_h
    ind = np.argmax(output[4])
    bcx = xs[ind]
    bcy = ys[ind]
    bw = ws[ind]
    bh = hs[ind]
    box = [bcx/w, bcy/h, bw/w, bh/h]
    return box

In [None]:
'''
This is the cell to test your weights correctness.

The output should be :
[0.8880645155906677, 0.6772263944149017, 0.02124013871572325, 0.058586649582813566]
'''
input_img = load_input('images/2.jpg')
output = model.predict(input_img).transpose(0,3,1,2)
print (get_box(output))

In [None]:
'''
Now finish the function to compute the iou between two given box.

You can refer to the website: https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/

'''

def bbox_iou(box1, box2):
    '''your code here'''

In [None]:
'''
Given dataset compute the iou
'''
import json
with open('groundtruth.txt', 'r') as outfile:
    lines = json.load(outfile)

In [None]:
'''
The iou should be about 67%
'''
avg_iou = 0
for line in lines:
    input_img = load_input(line[0])
    output = model.predict(input_img).transpose(0,3,1,2)
    avg_iou+= bbox_iou(get_box(output),line[1])
avg_iou = avg_iou/1000
print (avg_iou)

## section 2
In this section, you need to convert the model into a model without batch normalization layers. The output of two model should be the same. Then you are required to quantize the model without batch normalization.

In [None]:
model_no_bn = tf.keras.models.Sequential(
    [
    #first dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(48,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #second dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(96,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #third dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(192,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #maxpooling
    layers.MaxPool2D(strides =(2,2)),
    #fourth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(384,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #fifth dw module
    layers.DepthwiseConv2D((3, 3),padding='same',depth_multiplier=1,strides=(1,1),use_bias=True),
    layers.ReLU(4.0),
    layers.Conv2D(512,(1,1), padding='same',use_bias=True,strides=(1, 1)),
    layers.ReLU(4.0),
    #output
    layers.Conv2D(10,(1,1), padding='same',use_bias=False,strides=(1, 1)),
    ]
)

In [None]:
'''
Write down the code to absorb bn layer into conv layer and maintain the same output as the original model. (please refer to HW2 Q4)
'''

In [None]:
model_no_bn.trainable = False

In [None]:
input_img = load_input('images/2.jpg')
output = model_no_bn.predict(input_img).transpose(0,3,1,2)
get_box(output)

In [None]:
'''
Based on the model_no_bn quantize the weights to 16 bits, 8 bits respectively.

The requirement of quantization is given below:

* For each layer's weights, set the upper bound as the minimum 2^n which is larger than the maximum value of unsigned weights. (eg: if the maximum value is 4.2375 and the minimum value is -7.83421, then the upper bound is 2^3 = 8)

* Note that for each layer, the distribution of weights could be different.

* The sign takes one bit. For example, if the upper bound is 8 and 5 bits is given for floating part, it actually takes 9 bits.

* Do not quantize the bias!

and get the accuracy report
'''

In [None]:
'''
You should report the average IoU for each quantized model you get
'''
avg_iou = 0
for line in lines:
    input_img = load_input(line[0])
    output = model_no_bn.predict(input_img).transpose(0,3,1,2)
    avg_iou+= bbox_iou(get_box(output),line[1])
avg_iou = avg_iou/1000
print (avg_iou)

In [None]:
'''
Based on the model_no_bn

Now you can quantize both weights and bias parts.

Explore eight different combination of weights and parts and specify your methods' details, and get the accuracy report
'''