-
Notifications
You must be signed in to change notification settings - Fork 0
/
6.crop_image_by_SSD512.py
198 lines (164 loc) · 6.79 KB
/
6.crop_image_by_SSD512.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# Object Detection using SSD512(Single Shot Multibox Detector)
# Libraries
from keras import backend as K
from keras.preprocessing import image
from keras.optimizers import Adam
import imageio
import numpy as np
import os
import cv2
# Dependencies
from ssd512.keras_ssd512 import ssd_512
from ssd512.keras_ssd_loss import SSDLoss
# Defining the width and height of the image
height = 512
width = 512
# Defining confidence threshold
confidence_threshold = 0.5
# Different Classes of objects in VOC dataset
classes = ['background',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat',
'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
K.clear_session() # Clear previous models from memory.
# creating model and loading pretrained weights
model = ssd_512(image_size=(height, width, 3), # dimensions of the input images (fixed for SSD512)
n_classes=20, # Number of classes in VOC 2007 & 2012 dataset
mode='inference',
l2_regularization=0.0005,
scales=[0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05],
aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
[1.0, 2.0, 0.5],
[1.0, 2.0, 0.5]],
two_boxes_for_ar1=True,
steps=[8, 16, 32, 64, 128, 256, 512],
offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
clip_boxes=False,
variances=[0.1, 0.1, 0.2, 0.2],
normalize_coords=True,
subtract_mean=[123, 117, 104],
swap_channels=[2, 1, 0],
confidence_thresh=0.5,
iou_threshold=0.45,
top_k=200,
nms_max_output_size=400)
# path of the pre trained model weights
weights_path = 'ssd512/VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'
model.load_weights(weights_path, by_name=True)
# Compiling the model
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
# Transforming image size
def transform(input_image):
return cv2.resize(input_image, (512, 512), interpolation = cv2.INTER_CUBIC)
# Function to detect objects in image
def detect_object(copy_image, dog_name, file):
original_image_height, original_image_width = copy_image.shape[:2]
input_image = transform(copy_image)
input_image = np.reshape(input_image, (1, 512, 512, 3))
y_pred = model.predict(input_image)
actual_prediction = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]
for i,box in enumerate(actual_prediction[0]):
# print(box)
# Coordinates of diagonal points of bounding box
# x0 = box[-4] * original_image_width / width
# y0 = box[-3] * original_image_height / height
# x1 = box[-2] * original_image_width / width
# y1 = box[-1] * original_image_height / height
left = box[-4] * original_image_width / width
top = box[-3] * original_image_height / height
right = box[-2] * original_image_width / width
bottom = box[-1] * original_image_height / height
detect_width = right - left
detect_height = bottom - top
#center
x = (right - left)/2
y = (bottom - top)/2
if classes[int(box[0])]=='dog':
if width >= height:
left = int(x - detect_width / 2)
top = int(y - detect_width / 2)
right= int(x + detect_width / 2)
bottom= int(y + detect_width / 2)
else:
left = int(x - detect_height / 2)
top = int(y - detect_height / 2)
right= int(x + detect_height / 2)
bottom= int(y + detect_height / 2)
left -= 100
right += 100
top -=100
bottom +=100
if left <0:
margin = np.abs(left)
left = 0
top = int(top + margin/2)
bottom = int(bottom - margin/2)
if top <0:
margin = np.abs(top)
top = 0
left = int(left + margin/2)
right = int(right - margin/2)
crop_image = copy_image[int(top):int(bottom), int(left):int(right)]
crop_image = cv2.cvtColor(crop_image, cv2.COLOR_RGB2BGR)
file = file.replace('./jpeg','')
save_path = f"{new_dir}/{dog_name}/{file}_{i}.jpeg"
cv2.imwrite(save_path, crop_image)
# Detecting objects in images
base_path = '../data/'
old_dir = os.path.join(base_path, '5.merge(stanfold,identification,crawling)_120').replace('\\','/')
new_dir = '../data/7.image crop by ssd512'
dogs_dir = os.listdir(old_dir)
for root, dirs, files in os.walk(old_dir):
dog_name = os.path.split(root)[1]
# make dir
new_dog_dir = os.path.join(new_dir, dog_name)
try:
os.mkdir(new_dog_dir)
except Exception as err:
print(err)
# img crop and save
for file in files:
input_image_path = os.path.join(root, file)
original_image = cv2.imread(input_image_path, 1)
if original_image is not None:
original_image= cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
copy_image = original_image.copy()
detect_object(copy_image, dog_name, file) # detecting objects
# Detecting objects in video
# for file in os.listdir(input_video_path):
# print('Reading', file)
# video_reader = imageio.get_reader(os.path.join(input_video_path, file)) # Reading video
# fps = video_reader.get_meta_data()['fps'] # gettinf fps of the image
# video_writer = imageio.get_writer(os.path.join(output_video_path, file), fps = fps) # Writing back output image
# for i, frame in enumerate(video_reader):
# output_frame = detect_object(frame) # detecting objects frame by frame
# video_writer.append_data(output_frame) # appending frame to vidoe
# print('frame ', i, 'done')
# video_writer.close()
# This section is for realtime object detection on any video or movie or through webcam or any camera.
# If you want to do, follow these steps
# 1. Uncomment this part.
# 2. Put the path of video or movie here. You can also use your webcam or other camera(if there).
# 3. for webcam use just put 0 in path (without quotes)
# 4. for secondary camera, use1 in path.
# 5. Once the video started, you can see the realtime object detection
# 6. Press 'q' button on the keyboard to exit.
"""
video_capture = cv2.VideoCapture(0) #Put path in bracket here
while video_capture.isOpened():
_, frame = video_capture.read()
canvas = detect_object(frame)
cv2.imshow('Video', canvas)
if cv2.waitKey(1) & 0xFF == ord('q'): # To stop the loop.
break
video_capture.release() # We turn the webcam/video off.
cv2.destroyAllWindows() # We destroy all the windows inside which the images were displayed.
"""