-
Notifications
You must be signed in to change notification settings - Fork 0
/
count_books.py
193 lines (149 loc) · 6.18 KB
/
count_books.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import json
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import cv2
while True:
try:
print('Image should be 4:3 or 3:4')
path = input("Please enter image path from current directory (ex ./assets/IMG_8393.JPG): ")
f = open(path, 'r')
f.close()
break
except IOError:
print("Couldn't file file")
continue
tf.get_logger().setLevel('ERROR')
print('Must use TensorFlow v2.2.0 or higher')
print(f'Using TensorFlow v{tf.__version__}')
with open('./assets/instances_val2017.json') as f:
dic = json.load(f)
cats = dic['categories']
# tolerance (pixels) between lines
# higher tolerance is less divisions, less books counted
tolerance_hough = 8
img = cv2.imread(path)
asp_ratio = img.shape[0] / img.shape[1]
if asp_ratio == 4/3:
img = cv2.resize(img, (606, 808))
elif asp_ratio == 3/4:
img = cv2.resize(img, (808, 606))
img_tensor = np.expand_dims(img, axis=0)
im_draw = img.copy()
kernel = np.ones((3, 3), np.uint8)
img_blur = cv2.GaussianBlur(img, (15, 15), 0)
edges = cv2.Canny(img_blur, 10, 20, apertureSize=3)
img_lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 10, minLineLength=100, maxLineGap=5)
# initialize vars for hough transform
inits_so_far = []
all_lines = []
hough_count = 0
# checks if a line is close enough to other lines to be considered the same line
def check_same(new_y, pnts_so_far) -> bool:
same = False
for init in pnts_so_far:
same = abs(new_y - init[1]) <= tolerance_hough
if same:
break
return same
for line in img_lines:
x1, y1, x2, y2 = line[0]
midpoint_y = (y1 + y2) / 2
# angle of hough line
theta = np.arctan2(y1 - y2, x1 - x2)
same_line = check_same(midpoint_y, inits_so_far)
inits_so_far.append([x1, midpoint_y])
# makes sure hough lines aren't too close and horizontal enough
if not same_line and (3.08 < theta < 3.15 or -3.08 < theta < -3.15):
all_lines.append([(x1, y1), (x2, y2)])
hough_count += 1
_ = cv2.line(im_draw, (x1, y1), (x2, y2), (0, 0, 255), 2)
# account for top and bottom line double counting
hough_count = hough_count - 1
print('Loading model...')
detector = hub.load("https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512_kpts/1")
print('Model loaded')
# draws bounding rectangle and labels image for book detector
def bound_label(width, height, det_box, det_idx) -> None:
x1, y1 = int(det_box[0][1] * width), int(det_box[0][0] * height)
x2, y2 = int(det_box[0][3] * width), int(det_box[0][2] * height)
_ = cv2.rectangle(im_draw,
(x1, y1),
(x2, y2),
(0, 255, 0),
thickness=2)
_ = cv2.putText(im_draw,
get_lbl(det_idx),
(int((x2 + x1) / 2) - 30, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8, (0, 255, 0), 2)
box_pts_so_far.append([x1, y1])
# gets the text label for a current index from json dict
def get_lbl(curr) -> str:
res_idx = result['detection_classes'].numpy()[0][curr]
res = filter(lambda x: x['id'] == res_idx, cats)
return list(res)[0]['name']
# check if a bounding box is within a certain x, y tolerance to be counted as the same book
def good_box(bx) -> bool:
good = True
x1, y1 = int(bx[0][1] * width), int(bx[0][0] * height)
for pt in box_pts_so_far:
good = abs(x1 - pt[0]) >= tolerance and abs(y1 - pt[1]) >= tolerance
if not good:
break
return good
# check if any line segments intersect a given box
def bx_line_intersec(top_left: tuple, bot_right: tuple) -> bool:
# box segments, going counterclockwise from top left
seg_left = (top_left, (top_left[0], bot_right[1]))
seg_bot = ((top_left[0], bot_right[1]), bot_right)
seg_right = (bot_right, (bot_right[0], top_left[1]))
seg_top = ((bot_right[0], top_left[1]), top_left)
box_segments = [seg_left, seg_bot, seg_right, seg_top]
for line in all_lines:
for seg in box_segments:
# if it passes through or if it's fully contained
intersections = pass_through(line[0], line[1], seg[0], seg[1]) or \
(top_left[0] < line[0][0] < bot_right[0] and top_left[1] < line[0][1] < bot_right[1] and
top_left[0] < line[1][0] < bot_right[0] and top_left[1] < line[1][1] < bot_right[1])
if intersections:
return intersections
# check if line intersects another line
def pass_through(a: tuple, b: tuple, c: tuple, d: tuple) -> bool:
return pt_help(a, c, d) != pt_help(b, c, d) and pt_help(a, b, c) != pt_help(a, b, d)
# helper for pass through
def pt_help(a: tuple, b: tuple, c: tuple) -> bool:
return (c[1] - a[1]) * (b[0] - a[0]) > (b[1] - a[1]) * (c[0] - a[0])
# threshold (only take classifications with this or greater confidence)
thresh = 0.18
# tolerance (pixels) between top left detections
# higher tolerance is less books counted
tolerance = 0
# initialize vars for cn model
cn_count = 0
box_pts_so_far = []
result = detector(img_tensor)
width = im_draw.shape[1]
height = im_draw.shape[0]
# only classify confident images
res_scores = result['detection_scores'].numpy()[0]
res_scores_confident = np.extract(res_scores >= thresh, res_scores)
boxes = result['detection_boxes'].numpy()
for det_idx in range(len(res_scores_confident)):
box = boxes[:, det_idx, :]
# get box bounds from model
top_left = (int(box[0][1] * width), int(box[0][0] * height))
bot_right = (int(box[0][3] * width), int(box[0][2] * height))
# ensure box isn't double counted with a hough line or too close to another box
clean_box = not bx_line_intersec(top_left, bot_right)
if get_lbl(det_idx) == "book" and good_box(box) and clean_box:
bound_label(width, height, box, det_idx)
cn_count += 1
# ensure hough count isn't -1
book_ct = hough_count + cn_count if hough_count > 0 else cn_count
print(f'Final count: {book_ct}')
cv2.putText(im_draw, f'Count: {book_ct}', (im_draw.shape[1] - 180, im_draw.shape[0] - 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("Analyzed books", im_draw)
print('See popup window and press any key to exit')
cv2.waitKey(0)