In [1]:
from nms import cuda_nms
from nms import template
import numpy as np
from pycuda.compiler import SourceModule
import pycuda.autoinit
import pycuda.driver as drv
import string
import tensorflow as tf
import time
import pandas as pd

THETA=0.7
    
#y1, x1, y2, x2
boxes = np.loadtxt('boxes.txt', dtype=np.float32)
scores = np.loadtxt('scores.txt', dtype=np.float32)

count = 6000
boxes = boxes[:count]
scores = scores[:count]

template = string.Template(template)
template = template.substitute(THETA=THETA)
modules = SourceModule(template) 
# python function will change array's value, so use .copy()

print('cuda starts...')
cuda_start = time.time()
cuda_result = cuda_nms(modules, boxes.copy(), scores.copy())
cuda_end = time.time()
cuda_time = cuda_end - cuda_start

print('tf starts...')
with tf.Session() as sess:
    nms = tf.image.non_max_suppression(boxes, scores, max_output_size=boxes.shape[0],iou_threshold=0.7)
    tf_start = time.time()
    tf_result = sess.run(nms)
    tf_end = time.time()
    tf_time = tf_end - tf_start

tf.reset_default_graph()

print('cuda - tf:', set(cuda_result)-set(tf_result))
print('tf - cuda:', set(tf_result)-set(cuda_result))

print('cuda version runs %f times faster as tf version!'%(tf_time/cuda_time))

cuda starts...
tf starts...
cuda - tf: set()
tf - cuda: {2595, 74, 1388, 2349, 5708, 913, 1362, 1621, 1048, 5181}
cuda version runs 34.434866 times faster as tf version!


In [2]:
print('Suppressed box id in PyCUDA version:')
cuda_suppressed = sorted(list(set(range(count))-set(cuda_result)))
print(cuda_suppressed)

Suppressed box id in PyCUDA version:
[16, 27, 49, 54, 59, 61, 74, 159, 199, 229, 232, 233, 238, 244, 246, 249, 251, 254, 267, 272, 276, 286, 292, 302, 320, 330, 334, 336, 337, 342, 358, 371, 374, 404, 411, 414, 419, 425, 430, 438, 440, 443, 445, 458, 472, 496, 507, 511, 527, 536, 540, 562, 565, 577, 581, 629, 630, 637, 650, 692, 698, 718, 720, 768, 779, 794, 805, 810, 816, 819, 823, 826, 830, 834, 868, 872, 876, 880, 882, 890, 913, 915, 936, 939, 951, 980, 982, 988, 989, 990, 991, 998, 1002, 1027, 1035, 1048, 1056, 1058, 1066, 1093, 1098, 1114, 1123, 1129, 1184, 1187, 1189, 1191, 1197, 1205, 1206, 1217, 1232, 1257, 1278, 1286, 1299, 1303, 1306, 1311, 1320, 1321, 1327, 1343, 1361, 1362, 1376, 1377, 1388, 1407, 1418, 1443, 1460, 1468, 1475, 1483, 1493, 1514, 1515, 1516, 1520, 1525, 1531, 1551, 1552, 1554, 1572, 1573, 1575, 1621, 1622, 1624, 1627, 1644, 1647, 1664, 1667, 1681, 1685, 1709, 1721, 1731, 1737, 1751, 1754, 1758, 1761, 1762, 1769, 1773, 1788, 1796, 1817, 1826, 1830, 1843, 1846,

In [3]:
print('Suppressed box id in TF version:')
tf_suppressed = sorted(list(set(range(count))-set(tf_result)))
print(tf_suppressed)

Suppressed box id in TF version:
[16, 27, 49, 54, 59, 61, 159, 199, 229, 232, 233, 238, 244, 246, 249, 251, 254, 267, 272, 276, 286, 292, 302, 320, 330, 334, 336, 337, 342, 358, 371, 374, 404, 411, 414, 419, 425, 430, 438, 440, 443, 445, 458, 472, 496, 507, 511, 527, 536, 540, 562, 565, 577, 581, 629, 630, 637, 650, 692, 698, 718, 720, 768, 779, 794, 805, 810, 816, 819, 823, 826, 830, 834, 868, 872, 876, 880, 882, 890, 915, 936, 939, 951, 980, 982, 988, 989, 990, 991, 998, 1002, 1027, 1035, 1056, 1058, 1066, 1093, 1098, 1114, 1123, 1129, 1184, 1187, 1189, 1191, 1197, 1205, 1206, 1217, 1232, 1257, 1278, 1286, 1299, 1303, 1306, 1311, 1320, 1321, 1327, 1343, 1361, 1376, 1377, 1407, 1418, 1443, 1460, 1468, 1475, 1483, 1493, 1514, 1515, 1516, 1520, 1525, 1531, 1551, 1552, 1554, 1572, 1573, 1575, 1622, 1624, 1627, 1644, 1647, 1664, 1667, 1681, 1685, 1709, 1721, 1731, 1737, 1751, 1754, 1758, 1761, 1762, 1769, 1773, 1788, 1796, 1817, 1826, 1830, 1843, 1846, 1851, 1854, 1868, 1872, 1874, 1887, 