diff --git a/examples/classification_sample.py b/examples/classification_sample.py index e82e995dc..d76ff5dfb 100644 --- a/examples/classification_sample.py +++ b/examples/classification_sample.py @@ -32,7 +32,7 @@ import tensorflow as tf import openvino_tensorflow as ovtf import time -from subprocess import check_output, call +import cv2 def load_graph(model_file): @@ -47,37 +47,24 @@ def load_graph(model_file): return graph -def read_tensor_from_image_file(file_name, +def read_tensor_from_image_file(image_file, input_height=299, input_width=299, input_mean=0, input_std=255): - input_name = "file_reader" - output_name = "normalized" - file_reader = tf.io.read_file(file_name, input_name) - if file_name.endswith(".png"): - image_reader = tf.image.decode_png( - file_reader, channels=3, name="png_reader") - elif file_name.endswith(".gif"): - image_reader = tf.squeeze( - tf.image.decode_gif(file_reader, name="gif_reader")) - elif file_name.endswith(".bmp"): - image_reader = tf.image.decode_bmp(file_reader, name="bmp_reader") - else: - image_reader = tf.image.decode_jpeg( - file_reader, channels=3, name="jpeg_reader") - float_caster = tf.cast(image_reader, tf.float32) - dims_expander = tf.expand_dims(float_caster, 0) - resized = tf.compat.v1.image.resize_bilinear(dims_expander, - [input_height, input_width]) - normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) - result = normalized.eval() - + assert os.path.exists(image_file), "Could not find image file path" + image = cv2.imread(image_file) + resized = cv2.resize(image, (input_height, input_width)) + img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + resized_image = img.astype(np.float32) + normalized_image = (resized_image - input_mean) / input_std + result = np.expand_dims(normalized_image, 0) return result def load_labels(label_file): label = [] + assert os.path.exists(label_file), "Could not find label file path" proto_as_ascii_lines = tf.io.gfile.GFile(label_file).readlines() for l in proto_as_ascii_lines: label.append(l.rstrip()) @@ -97,16 +84,33 @@ def load_labels(label_file): backend_name = "CPU" parser = argparse.ArgumentParser() - parser.add_argument("--graph", help="graph/model to be executed") - parser.add_argument("--input_layer", help="name of input layer") - parser.add_argument("--output_layer", help="name of output layer") - parser.add_argument("--labels", help="name of file containing labels") - parser.add_argument("--image", help="image to be processed") - parser.add_argument("--input_height", type=int, help="input height") - parser.add_argument("--input_width", type=int, help="input width") - parser.add_argument("--input_mean", type=int, help="input mean") - parser.add_argument("--input_std", type=int, help="input std") - parser.add_argument("--backend", help="backend option. Default is CPU") + parser.add_argument( + "--graph", help="Optional. Path to graph/model to be executed.") + parser.add_argument("--input_layer", help="Optional. Name of input layer.") + parser.add_argument( + "--output_layer", help="Optional. Name of output layer.") + parser.add_argument( + "--labels", help="Optional. Path to labels mapping file.") + parser.add_argument( + "--image", help="Optional. Input image to be processed. ") + parser.add_argument( + "--input_height", + type=int, + help="Optional. Specify input height value. ") + parser.add_argument( + "--input_width", type=int, help="Optional. Specify input width value.") + parser.add_argument( + "--input_mean", type=int, help="Optioanl. Specify input mean value.") + parser.add_argument( + "--input_std", type=int, help="Optional. Specify input std value.") + parser.add_argument( + "--backend", + help="Optional. Specify the target device to infer on;" + "CPU, GPU, MYRIAD, or VAD-M is acceptable. Default value is CPU.") + parser.add_argument( + "--disable_ovtf", + help="Optional. Disable openvino_tensorflow pass and run on stock TF.", + action='store_true') args = parser.parse_args() if args.graph: @@ -143,12 +147,15 @@ def load_labels(label_file): input_operation = graph.get_operation_by_name(input_name) output_operation = graph.get_operation_by_name(output_name) - #Print list of available backends - print('Available Backends:') - backends_list = ovtf.list_backends() - for backend in backends_list: - print(backend) - ovtf.set_backend(backend_name) + if not args.disable_ovtf: + #Print list of available backends + print('Available Backends:') + backends_list = ovtf.list_backends() + for backend in backends_list: + print(backend) + ovtf.set_backend(backend_name) + else: + ovtf.disable() # Initialize session and run config = tf.compat.v1.ConfigProto() @@ -169,7 +176,7 @@ def load_labels(label_file): results = sess.run(output_operation.outputs[0], {input_operation.outputs[0]: t}) elapsed = time.time() - start - print('Inference time in ms: %f' % (elapsed * 1000)) + print('Inference time in ms: %.2f' % (elapsed * 1000)) results = np.squeeze(results) # print labels diff --git a/examples/classification_sample_video.py b/examples/classification_sample_video.py index 85f94dd21..a3656abaf 100644 --- a/examples/classification_sample_video.py +++ b/examples/classification_sample_video.py @@ -51,13 +51,11 @@ def read_tensor_from_image_file(frame, input_width=299, input_mean=0, input_std=255): - frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) - float_caster = tf.cast(frame, tf.float32) - dims_expander = tf.expand_dims(float_caster, 0) - resized = tf.compat.v1.image.resize_bilinear(dims_expander, - [input_height, input_width]) - normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) - result = normalized.eval() + resized = cv2.resize(frame, (input_height, input_width)) + img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + resized_image = img.astype(np.float32) + normalized_image = (resized_image - input_mean) / input_std + result = np.expand_dims(normalized_image, 0) return result @@ -88,19 +86,35 @@ def load_labels(label_file): font_thickness = 2 parser = argparse.ArgumentParser() - parser.add_argument("--graph", help="graph/model to be executed") - parser.add_argument("--input_layer", help="name of input layer") - parser.add_argument("--output_layer", help="name of output layer") - parser.add_argument("--labels", help="name of file containing labels") parser.add_argument( - "--input", - help="input (0 - for camera / absolute video file path) to be processed" - ) - parser.add_argument("--input_height", type=int, help="input height") - parser.add_argument("--input_width", type=int, help="input width") - parser.add_argument("--input_mean", type=int, help="input mean") - parser.add_argument("--input_std", type=int, help="input std") - parser.add_argument("--backend", help="backend option. Default is CPU") + "--graph", help="Optional. Path to graph/model to be executed.") + parser.add_argument("--input_layer", help="Optional. Name of input layer.") + parser.add_argument( + "--output_layer", help="Optional. Name of output layer.") + parser.add_argument( + "--labels", help="Optional. Path to labels mapping file.") + parser.add_argument( + "--input", help="Optional. An input video file to process.") + parser.add_argument( + "--input_height", + type=int, + help="Optional. Specify input height value.") + parser.add_argument( + "--input_width", type=int, help="Optional. Specify input width value.") + parser.add_argument( + "--input_mean", type=int, help="Optional. Specify input mean value.") + parser.add_argument( + "--input_std", type=int, help="Optional. Specify input std value.") + parser.add_argument( + "--backend", + help="Optional. Specify the target device to infer on; " + "CPU, GPU, MYRIAD or VAD-M is acceptable. Default value is CPU.") + parser.add_argument( + "--no_show", help="Optional. Don't show output.", action='store_true') + parser.add_argument( + "--disable_ovtf", + help="Optional. Disable openvino_tensorflow pass and run on stock TF.", + action='store_true') args = parser.parse_args() if args.graph: @@ -137,17 +151,22 @@ def load_labels(label_file): input_operation = graph.get_operation_by_name(input_name) output_operation = graph.get_operation_by_name(output_name) - #Print list of available backends - print('Available Backends:') - backends_list = ovtf.list_backends() - for backend in backends_list: - print(backend) - ovtf.set_backend(backend_name) + if not args.disable_ovtf: + #Print list of available backends + print('Available Backends:') + backends_list = ovtf.list_backends() + for backend in backends_list: + print(backend) + ovtf.set_backend(backend_name) + else: + ovtf.disable() #Load the labels if label_file: labels = load_labels(label_file) + # Read input video file + assert os.path.exists(input_file), "Could not find input video file path" cap = cv2.VideoCapture(input_file) # Initialize session and run @@ -156,6 +175,7 @@ def load_labels(label_file): while cap.isOpened(): ret, frame = cap.read() if ret is True: + # preprocessing t = read_tensor_from_image_file( frame, input_height=input_height, @@ -163,13 +183,13 @@ def load_labels(label_file): input_mean=input_mean, input_std=input_std) - # Run + # run start = time.time() results = sess.run(output_operation.outputs[0], {input_operation.outputs[0]: t}) elapsed = time.time() - start fps = 1 / elapsed - print('Inference time in ms: %f' % (elapsed * 1000)) + print('Inference time in ms: %.2f' % (elapsed * 1000)) results = np.squeeze(results) # print labels @@ -194,10 +214,12 @@ def load_labels(label_file): print( "No label file provided. Cannot print classification results" ) - cv2.imshow("results", frame) - if cv2.waitKey(1) & 0XFF == ord('q'): - break + if not args.no_show: + cv2.imshow("results", frame) + if cv2.waitKey(1) & 0XFF == ord('q'): + break else: - print("Completed") + break + sess.close() cap.release() cv2.destroyAllWindows() diff --git a/examples/object_detection_sample.py b/examples/object_detection_sample.py index 7dd355067..a0c5c9f15 100644 --- a/examples/object_detection_sample.py +++ b/examples/object_detection_sample.py @@ -48,6 +48,7 @@ def load_graph(model_file): def letter_box_image(image_path, input_height, input_width, fill_value) -> np.ndarray: + assert os.path.exists(image_path), "Could not find image path" image = Image.open(image_path) height_ratio = float(input_height) / image.size[1] width_ratio = float(input_width) / image.size[0] @@ -67,10 +68,10 @@ def letter_box_image(image_path, input_height, input_width, return to_return, image -def load_coco_names(file_name): +def load_coco_names(label_file): names = {} - assert os.path.exists(file_name), "path doesn't exist {0}".format(file_name) - with open(file_name) as f: + assert os.path.exists(label_file), "could not find label file path" + with open(label_file) as f: for id, name in enumerate(f): names[id] = name return names @@ -195,27 +196,46 @@ def non_max_suppression(predictions_with_boxes, iou_threshold = 0.5 parser = argparse.ArgumentParser() - parser.add_argument("--graph", help="graph/model to be executed") - parser.add_argument("--input_layer", help="name of input layer") - parser.add_argument("--output_layer", help="name of output layer") - parser.add_argument("--labels", help="name of file containing labels") - parser.add_argument("--image", help="image to be processed") - parser.add_argument("--input_height", type=int, help="input height") - parser.add_argument("--input_width", type=int, help="input width") - parser.add_argument("--input_mean", type=int, help="input mean") - parser.add_argument("--input_std", type=int, help="input std") - parser.add_argument("--backend", help="name of backend. Default is CPU") + parser.add_argument( + "--graph", help="Optional. Path to graph/model to be executed.") + parser.add_argument("--input_layer", help="Optional. Name of input layer.") + parser.add_argument( + "--output_layer", help="Optional. Name of output layer.") + parser.add_argument( + "--labels", help="Optional. Path to labels mapping file.") + parser.add_argument( + "--image", help="Optional. Input image to be processed.") + parser.add_argument( + "--input_height", + type=int, + help="Optional. Specify input height value.") + parser.add_argument( + "--input_width", type=int, help="Optional. Specify input width value.") + parser.add_argument( + "--input_mean", type=int, help="Optional. Specify input mean value.") + parser.add_argument( + "--input_std", type=int, help="Optional. Specify input std value.") + parser.add_argument( + "--backend", + help="Optional. Specify the target device to infer on;" + "CPU, GPU, MYRIAD, or VAD-M is acceptable. Default value is CPU.") parser.add_argument( "--output_dir", - help="Directory that stores updated image." - " Default is directory from where this sample is launched.") + help="Optional. Directory that stores the output" + " image with bounding boxes. Default is directory from where this sample is launched." + ) parser.add_argument( "--conf_threshold", type=float, - help="confidence threshold. Default is 0.6") + help="Optional. Specify confidence threshold. Default is 0.6.") parser.add_argument( - "--iou_threshold", type=float, help="iou threshold. Default is 0.5") - + "--iou_threshold", + type=float, + help="Optional. Specify iou threshold. Default is 0.5.") + parser.add_argument( + "--disable_ovtf", + help="Optional. Disable openvino_tensorflow pass and run on stock TF", + action='store_true') args = parser.parse_args() if args.graph: model_file = args.graph @@ -265,12 +285,15 @@ def non_max_suppression(predictions_with_boxes, input_operation = graph.get_operation_by_name(input_name) output_operation = graph.get_operation_by_name(output_name) - #Print list of available backends - print('Available Backends:') - backends_list = ovtf.list_backends() - for backend in backends_list: - print(backend) - ovtf.set_backend(backend_name) + if not args.disable_ovtf: + #Print list of available backends + print('Available Backends:') + backends_list = ovtf.list_backends() + for backend in backends_list: + print(backend) + ovtf.set_backend(backend_name) + else: + ovtf.disable() # Initialize session and run config = tf.compat.v1.ConfigProto() @@ -284,7 +307,7 @@ def non_max_suppression(predictions_with_boxes, detected_boxes = sess.run(output_operation.outputs[0], {input_operation.outputs[0]: [img_resized]}) elapsed = time.time() - start - print('Inference time in ms: %f' % (elapsed * 1000)) + print('Inference time in ms: %.2f' % (elapsed * 1000)) # apply non max suppresion, draw boxes and save updated image filtered_boxes = non_max_suppression(detected_boxes, conf_threshold, diff --git a/examples/object_detection_sample_video.py b/examples/object_detection_sample_video.py index 4c40f2024..f02f1fbc1 100644 --- a/examples/object_detection_sample_video.py +++ b/examples/object_detection_sample_video.py @@ -47,30 +47,9 @@ def load_graph(model_file): return graph -def letter_box_image(image_path, input_height, input_width, - fill_value) -> np.ndarray: - image = Image.open(image_path) - height_ratio = float(input_height) / image.size[1] - width_ratio = float(input_width) / image.size[0] - fit_ratio = min(width_ratio, height_ratio) - fit_height = int(image.size[1] * fit_ratio) - fit_width = int(image.size[0] * fit_ratio) - fit_image = np.asarray( - image.resize((fit_width, fit_height), resample=Image.BILINEAR)) - - fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype) - to_return = np.tile(fill_value, (input_height, input_width, 1)) - pad_top = int(0.5 * (input_height - fit_height)) - pad_left = int(0.5 * (input_width - fit_width)) - to_return[pad_top:pad_top + fit_height, pad_left:pad_left + - fit_width] = fit_image - - return to_return, image - - def load_coco_names(file_name): names = {} - assert os.path.exists(file_name), "path doesn't exist {0}".format(file_name) + assert os.path.exists(file_name), "could not find label file path" with open(file_name) as f: for coco_id, name in enumerate(f): names[coco_id] = name @@ -116,7 +95,7 @@ def draw_boxes(boxes, img, cls_names, detection_size, is_letter_box_image): box[:2], '{} {:.2f}%'.format(cls_names[cls], score * 100), fill=color) - + print('{},{:.2f}'.format(cls_names[cls].rstrip(), score * 100)) # converting PIL image back to OpenCV format im_np = np.asarray(img) im_np = cv2.cvtColor(im_np, cv2.COLOR_RGB2BGR) @@ -207,30 +186,44 @@ def non_max_suppression(predictions_with_boxes, font_thickness = 2 parser = argparse.ArgumentParser() - parser.add_argument("--graph", help="graph/model to be executed") - parser.add_argument("--input_layer", help="name of input layer") - parser.add_argument("--output_layer", help="name of output layer") - parser.add_argument("--labels", help="name of file containing labels") parser.add_argument( - "--input", - help="input (0 - for camera / absolute video file path) to be processed" - ) - parser.add_argument("--input_height", type=int, help="input height") - parser.add_argument("--input_width", type=int, help="input width") - parser.add_argument("--input_mean", type=int, help="input mean") - parser.add_argument("--input_std", type=int, help="input std") - parser.add_argument("--backend", help="name of backend. Default is CPU") + "--graph", help="Optional. Path to graph/model to be executed.") + parser.add_argument("--input_layer", help="Optional. Name of input layer.") + parser.add_argument( + "--output_layer", help="Optional. Name of output layer.") + parser.add_argument( + "--labels", help="Optional. Path to labels mapping file.") + parser.add_argument( + "--input", help="Optional. An input video file to be processed.") + parser.add_argument( + "--input_height", + type=int, + help="Optional. Specify input height value.") + parser.add_argument( + "--input_width", type=int, help="Optional. Specify input width value.") + parser.add_argument( + "--input_mean", type=int, help="Optional. Specify input mean value.") parser.add_argument( - "--output_dir", - help="Directory that stores updated image." - " Default is directory from where this sample is launched.") + "--input_std", type=int, help="Optional. Specify input std value.") + parser.add_argument( + "--backend", + help="Optional. Specify the target device to infer on; " + "CPU, GPU, MYRIAD, or VAD-M is acceptable. Default value is CPU.") + parser.add_argument( + "--no_show", help="Optional. Don't show output.", action='store_true') parser.add_argument( "--conf_threshold", type=float, - help="confidence threshold. Default is 0.6") + help="Optional. Specify confidence threshold. Default is 0.6.") parser.add_argument( - "--iou_threshold", type=float, help="iou threshold. Default is 0.5") - + "--iou_threshold", + type=float, + help="Optional. Specify iou threshold. Default is 0.5.") + parser.add_argument( + "--disable_ovtf", + help="Optional." + "Disable openvino_tensorflow pass and run on stock TF.", + action='store_true') args = parser.parse_args() if args.graph: model_file = args.graph @@ -258,8 +251,6 @@ def non_max_suppression(predictions_with_boxes, input_std = args.input_std if args.backend: backend_name = args.backend - if args.output_dir: - output_dir = args.output_dir if args.conf_threshold: conf_threshold = args.conf_threshold if args.iou_threshold: @@ -275,14 +266,18 @@ def non_max_suppression(predictions_with_boxes, input_operation = graph.get_operation_by_name(input_name) output_operation = graph.get_operation_by_name(output_name) - # Print list of available backends - print('Available Backends:') - backends_list = ovtf.list_backends() - for backend in backends_list: - print(backend) - ovtf.set_backend(backend_name) + if not args.disable_ovtf: + # Print list of available backends + print('Available Backends:') + backends_list = ovtf.list_backends() + for backend in backends_list: + print(backend) + ovtf.set_backend(backend_name) + else: + ovtf.disable() # open capturing device + assert os.path.exists(input_file), "Could not find input video file path" cap = cv2.VideoCapture(input_file) # Initialize session and run @@ -294,6 +289,7 @@ def non_max_suppression(predictions_with_boxes, # pre-processing steps img = frame img_resized = cv2.resize(frame, (input_height, input_width)) + # Run frameID = cap.get(cv2.CAP_PROP_POS_FRAMES) start = time.time() @@ -302,7 +298,7 @@ def non_max_suppression(predictions_with_boxes, {input_operation.outputs[0]: [img_resized]}) elapsed = time.time() - start fps = 1 / elapsed - + print('Inference time in ms: %.2f' % (elapsed * 1000)) # post-processing - apply non max suppression, draw boxes and save updated image filtered_boxes = non_max_suppression( detected_boxes, conf_threshold, iou_threshold) @@ -323,10 +319,12 @@ def non_max_suppression(predictions_with_boxes, img_bbox, 'FPS : {0} | Inference Time : {1}ms'.format( int(fps), round((elapsed * 1000), 2)), (30, 80), font, font_size, color, font_thickness) - cv2.imshow("detections", img_bbox) - if cv2.waitKey(1) & 0XFF == ord('q'): - break + if not args.no_show: + cv2.imshow("detections", img_bbox) + if cv2.waitKey(1) & 0XFF == ord('q'): + break else: - print("Completed") + break + sess.close() cap.release() cv2.destroyAllWindows()