In [1]:
from nms import cuda_nms
from nms import template
import numpy as np
from pycuda.compiler import SourceModule
import pycuda.autoinit
import pycuda.driver as drv
import string
import tensorflow as tf
import time
import pandas as pd

THETA=0.7
    
#y1, x1, y2, x2
boxes = np.loadtxt('boxes.txt', dtype=np.float32)
scores = np.loadtxt('scores.txt', dtype=np.float32)

boxes = boxes[:1000]
scores = scores[:1000]

template = string.Template(template)
template = template.substitute(THETA=THETA)
modules = SourceModule(template) 
# python function will change array's value, so use .copy()

cuda_times = []
for _ in range(50):
    cuda_start = time.time()
    cuda_result = cuda_nms(modules, boxes.copy(), scores.copy())
    cuda_end = time.time()
    cuda_times.append(cuda_end-cuda_start)

print('Statistics on PyCUDA version of NMS running time:')
cuda_times = pd.Series(cuda_times)
print(cuda_times.describe())
print()

tf_times = []
with tf.Session() as sess:
     for _ in range(50):
        nms = tf.image.non_max_suppression(boxes, scores, max_output_size=boxes.shape[0],iou_threshold=0.7)
        tf_start = time.time()
        tf_result = sess.run(nms)
        tf_end = time.time()
        tf_times.append(tf_end-tf_start)

print('Statistics on tf version of NMS running time:')
tf_times = pd.Series(tf_times)
print(tf_times.describe())
print()

print('cuda - tf:', set(cuda_result)-set(tf_result))
print('tf - cuda:', set(tf_result)-set(cuda_result))

print('cuda version runs %f times faster as tf version!'%(tf_times.mean(axis=0)/cuda_times.mean(axis=0)))

Statistics on PyCUDA version of NMS running time:
count    50.000000
mean      0.002375
std       0.000168
min       0.002307
25%       0.002315
50%       0.002325
75%       0.002371
max       0.003174
dtype: float64

Statistics on tf version of NMS running time:
count    50.000000
mean      0.011008
std       0.001389
min       0.008626
25%       0.009733
50%       0.010893
75%       0.012536
max       0.013332
dtype: float64

cuda - tf: set()
tf - cuda: {913, 74}
cuda version runs 4.635274 times faster as tf version!
