From 7cecee805eea5fb4944f4a0433ac05a2744a4df0 Mon Sep 17 00:00:00 2001 From: khalid-davis Date: Thu, 28 Jan 2021 15:51:29 +0800 Subject: [PATCH] Update the lib code and example for review comment Signed-off-by: khalid-davis --- .../tensorflow/tensorflow-2.3.Dockerfile | 2 +- examples/helmet_detection/training/train.py | 70 ------ .../README.md | 77 +++++-- .../training/data_gen.py | 25 ++- .../training/eval.py | 11 +- .../training/inference.py | 2 +- .../training/interface.py | 4 +- .../training/resnet18.py | 38 ++-- .../training/train.py | 87 ++++++++ .../training/validate_utils.py | 32 +-- .../training/yolo3_multiscale.py | 208 +++--------------- .../incremental_learning.py | 15 +- lib/requirements.txt | 2 - 13 files changed, 234 insertions(+), 339 deletions(-) delete mode 100644 examples/helmet_detection/training/train.py rename examples/{helmet_detection => helmet_detection_incremental_train}/README.md (59%) rename examples/{helmet_detection => helmet_detection_incremental_train}/training/data_gen.py (93%) rename examples/{helmet_detection => helmet_detection_incremental_train}/training/eval.py (61%) rename examples/{helmet_detection => helmet_detection_incremental_train}/training/inference.py (100%) rename examples/{helmet_detection => helmet_detection_incremental_train}/training/interface.py (98%) rename examples/{helmet_detection => helmet_detection_incremental_train}/training/resnet18.py (83%) create mode 100644 examples/helmet_detection_incremental_train/training/train.py rename examples/{helmet_detection => helmet_detection_incremental_train}/training/validate_utils.py (85%) rename examples/{helmet_detection => helmet_detection_incremental_train}/training/yolo3_multiscale.py (79%) diff --git a/build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile b/build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile index 9bb5217a9..4927e12c1 100644 --- a/build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile +++ b/build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile @@ -10,4 +10,4 @@ ENV PYTHONPATH "/home/lib" WORKDIR /home/work COPY ./lib /home/lib -ENTRYPOINT ["python"] \ No newline at end of file +ENTRYPOINT ["python"] diff --git a/examples/helmet_detection/training/train.py b/examples/helmet_detection/training/train.py deleted file mode 100644 index 85c01d3d4..000000000 --- a/examples/helmet_detection/training/train.py +++ /dev/null @@ -1,70 +0,0 @@ -import logging - -import tensorflow as tf - -import neptune -from interface import Interface -from neptune.incremental_learning.incremental_learning import IncrementalConfig - -LOG = logging.getLogger(__name__) -MODEL_URL = IncrementalConfig().model_url - - -def main(): - tf.set_random_seed(22) - - class_names = neptune.context.get_parameters("class_names") - - # load dataset. - train_data = neptune.load_train_dataset(data_format='txt', with_image=False) - - # read parameters from deployment config. - obj_threshold = neptune.context.get_parameters("obj_threshold") - nms_threshold = neptune.context.get_parameters("nms_threshold") - input_shape = neptune.context.get_parameters("input_shape") - epochs = neptune.context.get_parameters('epochs') - batch_size = neptune.context.get_parameters('batch_size') - - tf.flags.DEFINE_string('train_url', default=MODEL_URL, help='train url for model') - tf.flags.DEFINE_string('log_url', default=None, help='log url for model') - tf.flags.DEFINE_string('checkpoint_url', default=None, help='checkpoint url for model') - tf.flags.DEFINE_string('model_name', default=None, help='url for train annotation files') - tf.flags.DEFINE_list('class_names', default=class_names.split(','), # 'helmet,helmet-on,person,helmet-off' - help='label names for the training datasets') - tf.flags.DEFINE_list('input_shape', default=[int(x) for x in input_shape.split(',')], - help='input_shape') # [352, 640] - tf.flags.DEFINE_integer('max_epochs', default=epochs, help='training number of epochs') - tf.flags.DEFINE_integer('batch_size', default=batch_size, help='training batch size') - tf.flags.DEFINE_boolean('load_imagenet_weights', default=False, help='if load imagenet weights or not') - tf.flags.DEFINE_string('inference_device', - default='GPU', - help='which type of device is used to do inference, only CPU, GPU or 310D') - tf.flags.DEFINE_boolean('copy_to_local', default=True, help='if load imagenet weights or not') - tf.flags.DEFINE_integer('num_gpus', default=1, help='use number of gpus') - tf.flags.DEFINE_boolean('finetuning', default=False, help='use number of gpus') - tf.flags.DEFINE_boolean('label_changed', default=False, help='whether number of labels is changed or not') - tf.flags.DEFINE_string('learning_rate', default='0.001', help='label names for the training datasets') - tf.flags.DEFINE_string('obj_threshold', default=obj_threshold, help='label names for the training datasets') - tf.flags.DEFINE_string('nms_threshold', default=nms_threshold, help='label names for the training datasets') - tf.flags.DEFINE_string('net_type', default='resnet18', help='resnet18 or resnet18_nas') - tf.flags.DEFINE_string('nas_sequence', default='64_1-2111-2-1112', help='resnet18 or resnet18_nas') - tf.flags.DEFINE_string('deploy_model_format', default=None, help='the format for the converted model') - tf.flags.DEFINE_string('result_url', default=None, help='result url for training') - - model = Interface() - - model = neptune.incremental_learning.train(model=model, - train_data=train_data, - epochs=epochs, - batch_size=batch_size, - class_names=class_names, - input_shape=input_shape, - obj_threshold=obj_threshold, - nms_threshold=nms_threshold) - - # Save the model based on the config. - # neptune.save_model(model) - - -if __name__ == '__main__': - main() diff --git a/examples/helmet_detection/README.md b/examples/helmet_detection_incremental_train/README.md similarity index 59% rename from examples/helmet_detection/README.md rename to examples/helmet_detection_incremental_train/README.md index 89382b782..6a9397c84 100644 --- a/examples/helmet_detection/README.md +++ b/examples/helmet_detection_incremental_train/README.md @@ -1,34 +1,51 @@ # Using Incremental Learning Job in Helmet Detection Scenario -This document introduces how to use incremental learning job in helmet detectioni scenario. Using the incremental learning job, our application can automatically retrains, evaluates, and updates models based on the data generated at the edge. +This document introduces how to use incremental learning job in helmet detectioni scenario. +Using the incremental learning job, our application can automatically retrains, evaluates, +and updates models based on the data generated at the edge. ## Helmet Detection Experiment +### Prepare Worker Image +Build the worker image by referring to the [dockerfile](/build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile) +and put the image to the `gm-config.yaml`'s `imageHub` in [Install Neptune](#install-neptune) +In this demo, we need to replace the requirement.txt to +``` +flask==1.1.2 +keras==2.4.3 +opencv-python==4.4.0.44 +websockets==8.1 +Pillow==8.0.1 +requests==2.24.0 +tqdm==4.56.0 +matplotlib==3.3.3 +``` ### Install Neptune Follow the [Neptune installation document](/docs/setup/install.md) to install Neptune. ### Prepare Data and Model -Download dataset and model to your node: -* step 1: download [dataset](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz) +* step 1: create dataset directory: ``` mkdir -p /data/helmet_detection -cd /data/helmet_detection -tar -zxvf dataset.tar.gz ``` + * step 2: download [base model](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/model.tar.gz) ``` mkdir /model cd /model +wget https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz tar -zxvf model.tar.gz ``` ### Prepare Script -Download the [scripts](/examples/helmet_detection/training) to the path `code` of your node +Download the [scripts](/examples/helmet_detection_incremental_train/training) to the path `code` of your node ### Create Incremental Job +Create Namespace `kubectl create ns neptune-test` + Create Dataset ``` @@ -45,7 +62,7 @@ spec: EOF ``` -Create Initial Model +Create Initial Model to simulate the initial model in incremental learning scenario. ``` kubectl create -f - <" + threshold: 500 + metric: num_of_samples +``` +In a real word, we need to label the hard examples in `HE_SAVED_URL` with annotation tools and then put the examples to `Dataset`'s url. +Without annotation tools, we can simulate the condition of `num_of_samples` in the following ways: +Download [dataset](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz) to our cloud0 node. +``` +cd /data/helmet_detection +wget https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz +tar -zxvf dataset.tar.gz +``` +The LocalController component will check the number of the sample, realize trigger conditions are met and notice the GlobalManager Component to start train worker. +When the train worker finish, we can view the updated model in the `/output` directory in cloud0 node. +Then the eval worker will start to evaluate the model that train worker generated. -after the job completed, we can view the updated model in the /output directory in cloud0 node - +If the eval result satisfy the `deploySpec`'s trigger +``` +trigger: + condition: + operator: ">" + threshold: 0.1 + metric: precision_delta +``` +the deploy worker will load the new model and provide service. diff --git a/examples/helmet_detection/training/data_gen.py b/examples/helmet_detection_incremental_train/training/data_gen.py similarity index 93% rename from examples/helmet_detection/training/data_gen.py rename to examples/helmet_detection_incremental_train/training/data_gen.py index ed74c69c8..0aa2abf9a 100644 --- a/examples/helmet_detection/training/data_gen.py +++ b/examples/helmet_detection_incremental_train/training/data_gen.py @@ -215,13 +215,10 @@ def read_data(self, annotation_line, input_shape=416, random=True, max_boxes=50, return image_data, box_data def preprocess_true_boxes(self, true_boxes, in_shape=416): - """ - Introduction - ------------ - 对训练数据的ground truth box进行预处理 - Parameters - ---------- - true_boxes: ground truth box 形状为[boxes, 5], x_min, y_min, x_max, y_max, class_id + """Preprocesses the ground truth box of the training data + + :param true_boxes: ground truth box shape is [boxes, 5], x_min, y_min, + x_max, y_max, class_id """ num_layers = self.anchors.shape[0] // 3 @@ -238,20 +235,21 @@ def preprocess_true_boxes(self, true_boxes, in_shape=416): grid_shapes = [input_shape // 32, input_shape // 16, input_shape // 8] y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + self.num_classes), dtype='float32') for l in range(num_layers)] - # 这里扩充维度是为了后面应用广播计算每个图中所有box的anchor互相之间的iou + # The dimension is expanded to calculate the IOU between the + # anchors of all boxes in each graph by broadcasting anchors = np.expand_dims(self.anchors, 0) anchors_max = anchors / 2. anchors_min = -anchors_max - # 因为之前对box做了padding, 因此需要去除全0行 + # Because we padded the box before, we need to remove all 0 lines valid_mask = boxes_wh[..., 0] > 0 for b in range(m): wh = boxes_wh[b, valid_mask[b]] if len(wh) == 0: continue - # 为了应用广播扩充维度 + # Expanding dimensions for broadcasting applications wh = np.expand_dims(wh, -2) - # wh 的shape为[box_num, 1, 2] + # wh shape is [box_num, 1, 2] boxes_max = wh / 2. boxes_min = -boxes_max @@ -263,7 +261,10 @@ def preprocess_true_boxes(self, true_boxes, in_shape=416): anchor_area = anchors[..., 0] * anchors[..., 1] iou = intersect_area / (box_area + anchor_area - intersect_area) - # 找出和ground truth box的iou最大的anchor box, 然后将对应不同比例的负责该ground turth box 的位置置为ground truth box坐标 + # Find out the largest anchor box with the IOU of the ground truth + # box, and then set the corresponding positions of different + # proportions responsible for the ground turn box as the + # coordinates of the ground truth box best_anchor = np.argmax(iou, axis=-1) for t, n in enumerate(best_anchor): for l in range(num_layers): diff --git a/examples/helmet_detection/training/eval.py b/examples/helmet_detection_incremental_train/training/eval.py similarity index 61% rename from examples/helmet_detection/training/eval.py rename to examples/helmet_detection_incremental_train/training/eval.py index 1aa0ad5f4..04dc99c7f 100644 --- a/examples/helmet_detection/training/eval.py +++ b/examples/helmet_detection_incremental_train/training/eval.py @@ -19,13 +19,10 @@ def main(): model = validate - model = neptune.incremental_learning.evaluate(model=model, - test_data=test_data, - class_names=class_names, - input_shape=input_shape) - - # Save the model based on the config. - # kubeedge_ai.incremental_learning.save_model(model) + neptune.incremental_learning.evaluate(model=model, + test_data=test_data, + class_names=class_names, + input_shape=input_shape) if __name__ == '__main__': diff --git a/examples/helmet_detection/training/inference.py b/examples/helmet_detection_incremental_train/training/inference.py similarity index 100% rename from examples/helmet_detection/training/inference.py rename to examples/helmet_detection_incremental_train/training/inference.py index 0975b3ea6..7ca784fef 100644 --- a/examples/helmet_detection/training/inference.py +++ b/examples/helmet_detection_incremental_train/training/inference.py @@ -1,9 +1,9 @@ import logging +import os import time import cv2 import numpy as np -import os import neptune from neptune.incremental_learning import InferenceResult diff --git a/examples/helmet_detection/training/interface.py b/examples/helmet_detection_incremental_train/training/interface.py similarity index 98% rename from examples/helmet_detection/training/interface.py rename to examples/helmet_detection_incremental_train/training/interface.py index 3ce96a46d..e730f3d1c 100644 --- a/examples/helmet_detection/training/interface.py +++ b/examples/helmet_detection_incremental_train/training/interface.py @@ -165,7 +165,7 @@ def avg_checkpoints(self): logging.info("average checkpoints end .......") - def save_model_pb(self): + def save_model_pb(self, saved_model_name): """ save model as a single pb file from checkpoint """ @@ -189,6 +189,6 @@ def save_model_pb(self): print('output_tensors : ', output_tensors) output_tensors = [t.op.name for t in output_tensors] graph = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_tensors) - tf.train.write_graph(graph, model.model_dir, 'model.pb', False) + tf.train.write_graph(graph, model.model_dir, saved_model_name, False) logging.info("save model as .pb end .......") diff --git a/examples/helmet_detection/training/resnet18.py b/examples/helmet_detection_incremental_train/training/resnet18.py similarity index 83% rename from examples/helmet_detection/training/resnet18.py rename to examples/helmet_detection_incremental_train/training/resnet18.py index 9724bc5bd..24fce80fb 100644 --- a/examples/helmet_detection/training/resnet18.py +++ b/examples/helmet_detection_incremental_train/training/resnet18.py @@ -19,7 +19,8 @@ def _residual_block_first(x, is_training, out_channel, strides, name="unit"): if strides == 1: shortcut = tf.identity(x) else: - shortcut = tf.nn.max_pool(x, [1, strides, strides, 1], [1, strides, strides, 1], 'VALID') + shortcut = tf.nn.max_pool(x, [1, strides, strides, 1], + [1, strides, strides, 1], 'VALID') else: shortcut = _conv(x, 1, out_channel, strides, name='shortcut') # Residual @@ -58,7 +59,6 @@ def _residual_block(x, is_training, name="unit"): return x -# def _conv(x, filter_size, out_channel, strides, name="conv"): """ Helper functions(counts FLOPs and number of weights) @@ -66,20 +66,26 @@ def _conv(x, filter_size, out_channel, strides, name="conv"): in_shape = x.get_shape() with tf.variable_scope(name): # Main operation: conv2d - kernel = tf.get_variable('kernel', [filter_size, filter_size, in_shape[3], out_channel], tf.float32, + kernel = tf.get_variable('kernel', + [filter_size, filter_size, in_shape[3], + out_channel], tf.float32, initializer=tf.random_normal_initializer( - stddev=np.sqrt(2.0 / filter_size / filter_size / out_channel))) + stddev=np.sqrt( + 2.0 / filter_size / filter_size / out_channel))) if kernel not in tf.get_collection(WEIGHT_DECAY_KEY): tf.add_to_collection(WEIGHT_DECAY_KEY, kernel) if strides == 1: - conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], padding='SAME') + conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], + padding='SAME') else: kernel_size_effective = filter_size pad_total = kernel_size_effective - 1 pad_beg = pad_total // 2 pad_end = pad_total - pad_beg - x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) - conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], padding='VALID') + x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], + [0, 0]]) + conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], + padding='VALID') return conv @@ -88,8 +94,9 @@ def _fc(x, out_dim, name="fc"): # Main operation: fc with tf.device('/CPU:0'): w = tf.get_variable('weights', [x.get_shape()[1], out_dim], - tf.float32, initializer=tf.random_normal_initializer( - stddev=np.sqrt(1.0 / out_dim))) + tf.float32, + initializer=tf.random_normal_initializer( + stddev=np.sqrt(1.0 / out_dim))) b = tf.get_variable('biases', [out_dim], tf.float32, initializer=tf.constant_initializer(0.0)) if w not in tf.get_collection(WEIGHT_DECAY_KEY): @@ -100,7 +107,9 @@ def _fc(x, out_dim, name="fc"): def _bn(x, is_training, name="bn"): bn = tf.layers.batch_normalization(inputs=x, momentum=0.99, epsilon=1e-5, - center=True, scale=True, training=is_training, name=name, fused=True) + center=True, scale=True, + training=is_training, name=name, + fused=True) return bn @@ -140,17 +149,20 @@ def _build_network(self, images, is_training, num_classes=None): self.end_points['conv2_output'] = x # conv3_x - x = _residual_block_first(x, is_training, filters[2], strides[2], name='conv3_1') + x = _residual_block_first(x, is_training, filters[2], strides[2], + name='conv3_1') x = _residual_block(x, is_training, name='conv3_2') self.end_points['conv3_output'] = x # conv4_x - x = _residual_block_first(x, is_training, filters[3], strides[3], name='conv4_1') + x = _residual_block_first(x, is_training, filters[3], strides[3], + name='conv4_1') x = _residual_block(x, is_training, name='conv4_2') self.end_points['conv4_output'] = x # conv5_x - x = _residual_block_first(x, is_training, filters[4], strides[4], name='conv5_1') + x = _residual_block_first(x, is_training, filters[4], strides[4], + name='conv5_1') x = _residual_block(x, is_training, name='conv5_2') self.end_points['conv5_output'] = x diff --git a/examples/helmet_detection_incremental_train/training/train.py b/examples/helmet_detection_incremental_train/training/train.py new file mode 100644 index 000000000..cd01ad270 --- /dev/null +++ b/examples/helmet_detection_incremental_train/training/train.py @@ -0,0 +1,87 @@ +import logging + +import tensorflow as tf + +import neptune +from interface import Interface +from neptune.incremental_learning import IncrementalConfig + +LOG = logging.getLogger(__name__) +MODEL_URL = IncrementalConfig().model_url + + +def main(): + tf.set_random_seed(22) + + class_names = neptune.context.get_parameters("class_names") + + # load dataset. + train_data = neptune.load_train_dataset(data_format='txt', + with_image=False) + + # read parameters from deployment config. + obj_threshold = neptune.context.get_parameters("obj_threshold") + nms_threshold = neptune.context.get_parameters("nms_threshold") + input_shape = neptune.context.get_parameters("input_shape") + epochs = neptune.context.get_parameters('epochs') + batch_size = neptune.context.get_parameters('batch_size') + + tf.flags.DEFINE_string('train_url', default=MODEL_URL, + help='train url for model') + tf.flags.DEFINE_string('log_url', default=None, help='log url for model') + tf.flags.DEFINE_string('checkpoint_url', default=None, + help='checkpoint url for model') + tf.flags.DEFINE_string('model_name', default=None, + help='url for train annotation files') + tf.flags.DEFINE_list('class_names', default=class_names.split(','), + # 'helmet,helmet-on,person,helmet-off' + help='label names for the training datasets') + tf.flags.DEFINE_list('input_shape', + default=[int(x) for x in input_shape.split(',')], + help='input_shape') # [352, 640] + tf.flags.DEFINE_integer('max_epochs', default=epochs, + help='training number of epochs') + tf.flags.DEFINE_integer('batch_size', default=batch_size, + help='training batch size') + tf.flags.DEFINE_boolean('load_imagenet_weights', default=False, + help='if load imagenet weights or not') + tf.flags.DEFINE_string('inference_device', + default='GPU', + help='which type of device is used to do inference,' + ' only CPU, GPU or 310D') + tf.flags.DEFINE_boolean('copy_to_local', default=True, + help='if load imagenet weights or not') + tf.flags.DEFINE_integer('num_gpus', default=1, help='use number of gpus') + tf.flags.DEFINE_boolean('finetuning', default=False, + help='use number of gpus') + tf.flags.DEFINE_boolean('label_changed', default=False, + help='whether number of labels is changed or not') + tf.flags.DEFINE_string('learning_rate', default='0.001', + help='label names for the training datasets') + tf.flags.DEFINE_string('obj_threshold', default=obj_threshold, + help='label names for the training datasets') + tf.flags.DEFINE_string('nms_threshold', default=nms_threshold, + help='label names for the training datasets') + tf.flags.DEFINE_string('net_type', default='resnet18', + help='resnet18 or resnet18_nas') + tf.flags.DEFINE_string('nas_sequence', default='64_1-2111-2-1112', + help='resnet18 or resnet18_nas') + tf.flags.DEFINE_string('deploy_model_format', default=None, + help='the format for the converted model') + tf.flags.DEFINE_string('result_url', default=None, + help='result url for training') + + model = Interface() + + neptune.incremental_learning.train(model=model, + train_data=train_data, + epochs=epochs, + batch_size=batch_size, + class_names=class_names, + input_shape=input_shape, + obj_threshold=obj_threshold, + nms_threshold=nms_threshold) + + +if __name__ == '__main__': + main() diff --git a/examples/helmet_detection/training/validate_utils.py b/examples/helmet_detection_incremental_train/training/validate_utils.py similarity index 85% rename from examples/helmet_detection/training/validate_utils.py rename to examples/helmet_detection_incremental_train/training/validate_utils.py index 9d086db09..354cdc090 100644 --- a/examples/helmet_detection/training/validate_utils.py +++ b/examples/helmet_detection_incremental_train/training/validate_utils.py @@ -1,18 +1,3 @@ -# -*- coding: utf-8 -*- -# Copyright 2019 ModelArts Service of Huawei Cloud. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -40,14 +25,14 @@ def add_path(path): def init_yolo(model_path, input_shape): print('model_path : ', model_path) - # 初始化session,需绑定对应的Graph + # initialize the session and bind the corresponding graph yolo_graph = tf.Graph() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.1 yolo_session = tf.Session(graph=yolo_graph, config=config) - # 初始化yoloInference对象 + # initialize yoloInference object yolo_infer = YOLOInference(yolo_session, model_path, input_shape) return yolo_infer, yolo_session @@ -148,8 +133,6 @@ def validate_img_file(yolo_infer, yolo_session, img_file, bbox_list_ground, fold img_file = img_file.split("/")[-1] cv2.imwrite(os.path.join(folder_out, img_file), img) - # print ('\tbbox_list_pred : ', bbox_list_pred) - # print ('\tbbox_list_ground : ', bbox_list_ground) count_correct = [0 for ix in range(class_num)] count_ground = [0 for ix in range(class_num)] count_pred = [0 for ix in range(class_num)] @@ -163,15 +146,12 @@ def validate_img_file(yolo_infer, yolo_session, img_file, bbox_list_ground, fold for iy in range(count_pred_all): bbox_pred = [bbox_list_pred[iy][1], bbox_list_pred[iy][0], bbox_list_pred[iy][3], bbox_list_pred[iy][2]] - # bbox_draw_on_img_cv(img_data, bbox_pred, colors[labels[iy]]) - # if bbox_pred[2]-bbox_pred[0] > 30 and bbox_pred[3]-bbox_pred[1] > 30: LOG.debug(f'count_pred={count_pred}, labels[iy]={labels[iy]}') count_pred[labels[iy]] += 1 for ix in range(count_ground_all): bbox_ground = [int(x) for x in bbox_list_ground[ix].split(',')] class_ground = bbox_ground[4] - # bbox_draw_on_img_cv(img_data, bbox_ground, (40, 39, 214)) if labels[iy] == class_ground: iou = calc_iou(bbox_pred, bbox_ground) @@ -225,8 +205,8 @@ def draw_boxes(img, labels, scores, bboxes, class_names, colors): def calc_iou(bbox_pred, bbox_ground): - """ - 自定义函数,计算两矩形 IOU,传入为均为矩形对角线,(x,y) 坐标。 + """user-define function for calculating the IOU of two matrixes. The + input parameters are rectangle diagonals """ x1 = bbox_pred[0] y1 = bbox_pred[1] @@ -247,9 +227,9 @@ def calc_iou(bbox_pred, bbox_ground): height = height1 + height2 - (endy - starty) if width <= 0 or height <= 0: - iou = 0 # 重叠率为 0 + iou = 0 else: - area = width * height # 两矩形相交面积 + area = width * height area1 = width1 * height1 area2 = width2 * height2 iou = area * 1. / (area1 + area2 - area) diff --git a/examples/helmet_detection/training/yolo3_multiscale.py b/examples/helmet_detection_incremental_train/training/yolo3_multiscale.py similarity index 79% rename from examples/helmet_detection/training/yolo3_multiscale.py rename to examples/helmet_detection_incremental_train/training/yolo3_multiscale.py index 81760a4ff..b73ba20dd 100644 --- a/examples/helmet_detection/training/yolo3_multiscale.py +++ b/examples/helmet_detection_incremental_train/training/yolo3_multiscale.py @@ -14,13 +14,6 @@ class Yolo3: def __init__(self, sess, is_training, config): - """ - Introduction - ------------ - 初始化函数 - ---------- - """ - LOG.info('is_training: %s' % is_training) LOG.info('model dir: %s' % flags.train_url) LOG.info('input_shape: (%d, %d)' % (flags.input_shape[0], flags.input_shape[1])) @@ -112,8 +105,7 @@ def load_weights(self, sess, fpath): sess.run(v.assign(data[vname])) def step(self, sess, batch_data, is_training): - """ - step, read one batch, generate gradients + """step, read one batch, generate gradients """ # Input feed @@ -132,21 +124,19 @@ def step(self, sess, batch_data, is_training): return outputs[0] # loss def _batch_normalization_layer(self, input_layer, name=None, training=True, norm_decay=0.997, norm_epsilon=1e-5): - ''' - Introduction - ------------ - 对卷积层提取的feature map使用batch normalization - Parameters - ---------- - input_layer: 输入的四维tensor - name: batchnorm层的名字 - trainging: 是否为训练过程 - norm_decay: 在预测时计算moving average时的衰减率 - norm_epsilon: 方差加上极小的数,防止除以0的情况 - Returns - ------- - bn_layer: batch normalization处理之后的feature map - ''' + """Batch normalization is used for feature map extracted from + convolution layer + + :param input_layer: four dimensional tensor of input + :param name: the name of batchnorm layer + :param training: is training or not + :param norm_decay: The decay rate of moving average is calculated + during prediction + :param norm_epsilon: Variance plus a minimal number to prevent + division by 0 + + :return bn_layer: batch normalization处理之后的feature map + """ bn_layer = tf.layers.batch_normalization(inputs=input_layer, momentum=norm_decay, epsilon=norm_epsilon, center=True, scale=True, training=training, name=name, fused=True) @@ -154,29 +144,20 @@ def _batch_normalization_layer(self, input_layer, name=None, training=True, norm # return tf.nn.leaky_relu(bn_layer, alpha = 0.1) def _conv2d_layer(self, inputs, filters_num, kernel_size, name, use_bias=False, strides=1): - """ - Introduction - ------------ - 使用tf.layers.conv2d减少权重和偏置矩阵初始化过程,以及卷积后加上偏置项的操作 - 经过卷积之后需要进行batch norm,最后使用leaky ReLU激活函数 - 根据卷积时的步长,如果卷积的步长为2,则对图像进行降采样 - 比如,输入图片的大小为416*416,卷积核大小为3,若stride为2时,(416 - 3 + 2)/ 2 + 1, 计算结果为208,相当于做了池化层处理 - 因此需要对stride大于1的时候,先进行一个padding操作, 采用四周都padding一维代替'same'方式 - Parameters - ---------- - inputs: 输入变量 - filters_num: 卷积核数量 - strides: 卷积步长 - name: 卷积层名字 - trainging: 是否为训练过程 - use_bias: 是否使用偏置项 - kernel_size: 卷积核大小 - Returns - ------- - conv: 卷积之后的feature map + """Use tf.layers.conv2d Reduce the weight and bias matrix + initialization process, as well as convolution plus bias operation + + :param inputs: Input variables + :param filters_num: Number of convolution kernels + :param strides: Convolution step + :param name: Convolution layer name + :param training: is a training process or not + :param use_bias: use bias or not + :param kernel_size: the kernels size + + :return conv: Feature map after convolution """ if strides > 1: # modified 0327 - # 在输入feature map的长宽维度进行padding inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [1, 0], [0, 0]], mode='CONSTANT') conv = tf.layers.conv2d(inputs=inputs, filters=filters_num, kernel_size=kernel_size, strides=[strides, strides], @@ -187,25 +168,6 @@ def _conv2d_layer(self, inputs, filters_num, kernel_size, name, use_bias=False, def _Residual_block(self, inputs, filters_num, blocks_num, conv_index, training=True, norm_decay=0.997, norm_epsilon=1e-5): - """ - Introduction - ------------ - Darknet的残差block,类似resnet的两层卷积结构,分别采用1x1和3x3的卷积核,使用1x1是为了减少channel的维度 - Parameters - ---------- - inputs: 输入变量 - filters_num: 卷积核数量 - trainging: 是否为训练过程 - blocks_num: block的数量 - conv_index: 为了方便加载预训练权重,统一命名序号 - weights_dict: 加载预训练模型的权重 - norm_decay: 在预测时计算moving average时的衰减率 - norm_epsilon: 方差加上极小的数,防止除以0的情况 - Returns - ------- - inputs: 经过残差网络处理后的结果 - """ - layer = self._conv2d_layer(inputs, filters_num, kernel_size=3, strides=2, name="conv2d_" + str(conv_index)) layer = self._batch_normalization_layer(layer, name="batch_normalization_" + str(conv_index), training=training, norm_decay=norm_decay, norm_epsilon=norm_epsilon) @@ -237,25 +199,6 @@ def _resnet18(self, inputs, training=True): def _yolo_block(self, inputs, filters_num, out_filters, conv_index, training=True, norm_decay=0.997, norm_epsilon=1e-5): - """ - Introduction - ------------ - yolo3在Darknet53提取的特征层基础上,又加了针对3种不同比例的feature map的block,这样来提高对小物体的检测率 - Parameters - ---------- - inputs: 输入特征 - filters_num: 卷积核数量 - out_filters: 最后输出层的卷积核数量 - conv_index: 卷积层数序号,方便根据名字加载预训练权重 - training: 是否为训练 - norm_decay: 在预测时计算moving average时的衰减率 - norm_epsilon: 方差加上极小的数,防止除以0的情况 - Returns - ------- - route: 返回最后一层卷积的前一层结果 - conv: 返回最后一层卷积的结果 - conv_index: conv层计数 - """ conv = self._conv2d_layer(inputs, filters_num=filters_num, kernel_size=1, strides=1, name="conv2d_" + str(conv_index)) conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training, @@ -293,18 +236,6 @@ def _yolo_block(self, inputs, filters_num, out_filters, conv_index, training=Tru return route, conv, conv_index def yolo_inference(self, features_out, filters_yolo_block, conv_index, num_anchors, num_classes, training=True): - """ - Introduction - ------------ - 构建yolo模型结构 - Parameters - ---------- - inputs: 模型的输入变量 - num_anchors: 每个grid cell负责检测的anchor数量 - num_classes: 类别数量 - training: 是否为训练模式 - """ - conv = features_out[0] conv2d_45 = features_out[1] conv2d_26 = features_out[2] @@ -368,36 +299,15 @@ def yolo_inference(self, features_out, filters_yolo_block, conv_index, num_ancho return [conv2d_59, conv2d_67, conv2d_75] def yolo_head(self, feats, anchors, num_classes, input_shape, training=True): - """ - Introduction - ------------ - 根据不同大小的feature map做多尺度的检测,三种feature map大小分别为13x13x1024, 26x26x512, 52x52x256 - Parameters - ---------- - feats: 输入的特征feature map - anchors: 针对不同大小的feature map的anchor - num_classes: 类别的数量 - input_shape: 图像的输入大小,一般为416 - trainging: 是否训练,用来控制返回不同的值 - Returns - ------- - """ - print('feats : ', feats) - print('anchors : ', anchors) - print('input_shape : ', input_shape) - num_anchors = len(anchors) anchors_tensor = tf.reshape(tf.constant(anchors, dtype=tf.float32), [1, 1, 1, num_anchors, 2]) grid_size = tf.shape(feats)[1:3] predictions = tf.reshape(feats, [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5]) - # 这里构建13*13*1*2的矩阵,对应每个格子加上对应的坐标 grid_y = tf.tile(tf.reshape(tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1]) grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [1, -1, 1, 1]), [grid_size[0], 1, 1, 1]) grid = tf.concat([grid_x, grid_y], axis=-1) grid = tf.cast(grid, tf.float32) - # 将x,y坐标归一化为占416的比例 box_xy = (tf.sigmoid(predictions[..., :2]) + grid) / tf.cast(grid_size[::-1], tf.float32) - # 将w,h也归一化为占416的比例 box_wh = tf.exp(predictions[..., 2:4]) * anchors_tensor / input_shape[::-1] box_confidence = tf.sigmoid(predictions[..., 4:5]) box_class_probs = tf.sigmoid(predictions[..., 5:]) @@ -406,18 +316,6 @@ def yolo_head(self, feats, anchors, num_classes, input_shape, training=True): return box_xy, box_wh, box_confidence, box_class_probs def yolo_boxes_scores(self, feats, anchors, num_classes, input_shape, image_shape): - """ - Introduction - ------------ - 该函数是将box的坐标修正,除去之前按照长宽比缩放填充的部分,最后将box的坐标还原成相对原始图片的 - Parameters - ---------- - feats: 模型输出feature map - anchors: 模型anchors - num_classes: 数据集类别数 - input_shape: 训练输入图片大小 - image_shape: 原始图片的大小 - """ input_shape = tf.cast(input_shape, tf.float32) image_shape = tf.cast(image_shape, tf.float32) box_xy, box_wh, box_confidence, box_class_probs = self.yolo_head(feats, anchors, num_classes, input_shape, @@ -446,18 +344,6 @@ def yolo_boxes_scores(self, feats, anchors, num_classes, input_shape, image_shap return boxes, boxes_scores def box_iou(self, box1, box2): - """ - Introduction - ------------ - 计算box tensor之间的iou - Parameters - ---------- - box1: shape=[grid_size, grid_size, anchors, xywh] - box2: shape=[box_num, xywh] - Returns - ------- - iou: - """ box1 = tf.expand_dims(box1, -2) box1_xy = box1[..., :2] box1_wh = box1[..., 2:4] @@ -480,35 +366,16 @@ def box_iou(self, box1, box2): return iou def yolo_loss(self, yolo_output, y_true, anchors, num_classes, ignore_thresh=.5): - """ - Introduction - ------------ - yolo模型的损失函数 - Parameters - ---------- - yolo_output: yolo模型的输出 - y_true: 经过预处理的真实标签,shape为[batch, grid_size, grid_size, 5 + num_classes] - anchors: yolo模型对应的anchors - num_classes: 类别数量 - ignore_thresh: 小于该阈值的box我们认为没有物体 - Returns - ------- - loss: 每个batch的平均损失值 - accuracy - """ loss = 0.0 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = tf.shape(yolo_output[0])[1: 3] * 32 input_shape = tf.cast(input_shape, tf.float32) grid_shapes = [tf.cast(tf.shape(yolo_output[l])[1:3], tf.float32) for l in range(3)] for index in range(3): - # 只有负责预测ground truth box的grid对应的为1, 才计算相对应的loss - # object_mask的shape为[batch_size, grid_size, grid_size, 3, 1] object_mask = y_true[index][..., 4:5] class_probs = y_true[index][..., 5:] grid, predictions, pred_xy, pred_wh = self.yolo_head(yolo_output[index], anchors[anchor_mask[index]], num_classes, input_shape, training=True) - # pred_box的shape为[batch, box_num, 4] pred_box = tf.concat([pred_xy, pred_wh], axis=-1) raw_true_xy = y_true[index][..., :2] * grid_shapes[index][::-1] - grid object_mask_bool = tf.cast(object_mask, dtype=tf.bool) @@ -516,16 +383,13 @@ def yolo_loss(self, yolo_output, y_true, anchors, num_classes, ignore_thresh=.5) tf.where(tf.equal(y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1], 0), tf.ones_like(y_true[index][..., 2:4]), y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1])) - # 该系数是用来调整box坐标loss的系数 box_loss_scale = 2 - y_true[index][..., 2:3] * y_true[index][..., 3:4] ignore_mask = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True) def loop_body(internal_index, ignore_mask): - # true_box的shape为[box_num, 4] true_box = tf.boolean_mask(y_true[index][internal_index, ..., 0:4], object_mask_bool[internal_index, ..., 0]) iou = self.box_iou(pred_box[internal_index], true_box) - # 计算每个true_box对应的预测的iou最大的box best_iou = tf.reduce_max(iou, axis=-1) ignore_mask = ignore_mask.write(internal_index, tf.cast(best_iou < ignore_thresh, tf.float32)) return internal_index + 1, ignore_mask @@ -535,7 +399,6 @@ def loop_body(internal_index, ignore_mask): [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = tf.expand_dims(ignore_mask, axis=-1) - # 计算四个部分的loss xy_loss = object_mask * box_loss_scale * tf.nn.sigmoid_cross_entropy_with_logits( labels=raw_true_xy, logits=predictions[..., 0:2]) @@ -557,27 +420,11 @@ def loop_body(internal_index, ignore_mask): return loss def yolo_eval(self, yolo_outputs, image_shape, max_boxes=20): - """ - Introduction - ------------ - 根据Yolo模型的输出进行非极大值抑制,获取最后的物体检测框和物体检测类别 - Parameters - ---------- - yolo_outputs: yolo模型输出 - image_shape: 图片的大小 - max_boxes: 最大box数量 - Returns - ------- - boxes_: 物体框的位置 - scores_: 物体类别的概率 - classes_: 物体类别 - """ with tf.variable_scope('boxes_scores'): anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] boxes = [] box_scores = [] input_shape = tf.shape(yolo_outputs[0])[1: 3] * 32 - # 对三个尺度的输出获取每个预测box坐标和box的分数,score计算为置信度x类别概率 for i in range(len(yolo_outputs)): _boxes, _box_scores = self.yolo_boxes_scores(yolo_outputs[i], self.anchors[anchor_mask[i]], len(self.class_names), input_shape, image_shape) @@ -627,9 +474,6 @@ class YoloConfig: norm_decay = 0.99 norm_epsilon = 1e-5 ignore_thresh = 0.5 - # learning_rate = 1e-3 - # obj_threshold = 0.3 - # nms_threshold = 0.4 class YOLOInference(object): diff --git a/lib/neptune/incremental_learning/incremental_learning.py b/lib/neptune/incremental_learning/incremental_learning.py index ef7e2607d..1cfd39d13 100644 --- a/lib/neptune/incremental_learning/incremental_learning.py +++ b/lib/neptune/incremental_learning/incremental_learning.py @@ -20,6 +20,7 @@ def __init__(self): BaseConfig.__init__(self) self.model_urls = os.getenv("MODEL_URLS") self.base_model_url = os.getenv("BASE_MODEL_URL") + self.saved_model_name = "model.pb" def train(model, train_data, epochs, batch_size, class_names, input_shape, @@ -40,12 +41,12 @@ def train(model, train_data, epochs, batch_size, class_names, input_shape, clean_folder(il_config.model_url) model.train(train_data, []) # validation data is empty. tf.reset_default_graph() - model.save_model_pb() + model.save_model_pb(il_config.saved_model_name) ckpt_model_url = remove_path_prefix(il_config.model_url, il_config.data_path_prefix) pb_model_url = remove_path_prefix( - os.path.join(il_config.model_url, 'model.pb'), + os.path.join(il_config.model_url, il_config.saved_model_name), il_config.data_path_prefix) # TODO delete metrics whether affect lc @@ -156,8 +157,8 @@ def __init__(self, model: TSModel, hard_example_mining_algorithm=None): def inference(self, img_data) -> InferenceResult: result = self.model.inference(img_data) - bboxes = deal_infer_rsl(result) - is_hard_example = self.hard_example_mining_algorithm.hard_judge(bboxes) + rsl = deal_infer_rsl(result) + is_hard_example = self.hard_example_mining_algorithm.hard_judge(rsl) if is_hard_example: return InferenceResult(True, result) else: @@ -166,9 +167,9 @@ def inference(self, img_data) -> InferenceResult: def deal_infer_rsl(model_output): all_classes, all_scores, all_bboxes = model_output - bboxes = [] + rsl = [] for c, s, bbox in zip(all_classes, all_scores, all_bboxes): bbox[0], bbox[1], bbox[2], bbox[3] = bbox[1], bbox[0], bbox[3], bbox[2] - bboxes.append(bbox.tolist() + [s, c]) + rsl.append(bbox.tolist() + [s, c]) - return bboxes + return rsl diff --git a/lib/requirements.txt b/lib/requirements.txt index 43a662c15..26d1cf3b6 100644 --- a/lib/requirements.txt +++ b/lib/requirements.txt @@ -4,5 +4,3 @@ opencv-python==4.4.0.44 websockets==8.1 Pillow==8.0.1 requests==2.24.0 -tqdm==4.56.0 -matplotlib==3.3.3 \ No newline at end of file