| @@ -0,0 +1,34 @@ | ||
| COPYRIGHT | ||
|
|
||
| All contributions by Allan Zelener: | ||
| Copyright (c) 2017, Allan Zelener. | ||
| All rights reserved. | ||
|
|
||
| All other contributions: | ||
| Copyright (c) 2017, the respective contributors. | ||
| All rights reserved. | ||
|
|
||
| Each contributor holds copyright over their respective contributions. | ||
| The project versioning (Git) records all such contribution source information. | ||
|
|
||
| LICENSE | ||
|
|
||
| The MIT License (MIT) | ||
|
|
||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| of this software and associated documentation files (the "Software"), to deal | ||
| in the Software without restriction, including without limitation the rights | ||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| copies of the Software, and to permit persons to whom the Software is | ||
| furnished to do so, subject to the following conditions: | ||
|
|
||
| The above copyright notice and this permission notice shall be included in all | ||
| copies or substantial portions of the Software. | ||
|
|
||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| SOFTWARE. |
| @@ -0,0 +1,89 @@ | ||
| # YAD2K: Yet Another Darknet 2 Keras | ||
|
|
||
| [](LICENSE) | ||
|
|
||
| ## Welcome to YAD2K | ||
|
|
||
| You only look once, but you reimplement neural nets over and over again. | ||
|
|
||
| YAD2K is a 90% Keras/10% Tensorflow implementation of YOLO_v2. | ||
|
|
||
| Original paper: [YOLO9000: Better, Faster, Stronger](https://arxiv.org/abs/1612.08242) by Joseph Redmond and Ali Farhadi. | ||
|
|
||
|  | ||
|
|
||
| -------------------------------------------------------------------------------- | ||
|
|
||
| ## Requirements | ||
|
|
||
| - [Keras](https://github.com/fchollet/keras) | ||
| - [Tensorflow](https://www.tensorflow.org/) | ||
| - [Numpy](http://www.numpy.org/) | ||
| - [h5py](http://www.h5py.org/) (For Keras model serialization.) | ||
| - [Pillow](https://pillow.readthedocs.io/) (For rendering test results.) | ||
| - [Python 3](https://www.python.org/) | ||
| - [pydot-ng](https://github.com/pydot/pydot-ng) (Optional for plotting model.) | ||
|
|
||
| ### Installation | ||
| ```bash | ||
| git clone https://github.com/allanzelener/yad2k.git | ||
| cd yad2k | ||
| # [Option 1] To replicate the conda environment: | ||
| conda env create -f environment.yml | ||
| source activate yad2k | ||
| # [Option 2] Install everything globaly. | ||
| pip install numpy h5py pillow | ||
| pip install tensorflow-gpu # CPU-only: conda install -c conda-forge tensorflow | ||
| pip install keras # Possibly older release: conda install keras | ||
| ``` | ||
|
|
||
| ## Quick Start | ||
|
|
||
| - Download Darknet model cfg and weights from the [official YOLO website](http://pjreddie.com/darknet/yolo/). | ||
| - Convert the Darknet YOLO_v2 model to a Keras model. | ||
| - Test the converted model on the small test set in `images/`. | ||
|
|
||
| ```bash | ||
| wget http://pjreddie.com/media/files/yolo.weights | ||
| wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolo.cfg | ||
| ./yad2k.py yolo.cfg yolo.weights model_data/yolo.h5 | ||
| ./test_yolo.py model_data/yolo.h5 # output in images/out/ | ||
| ``` | ||
|
|
||
| See `./yad2k.py --help` and `./test_yolo.py --help` for more options. | ||
|
|
||
| -------------------------------------------------------------------------------- | ||
|
|
||
| ## More Details | ||
|
|
||
| The YAD2K converter currently only supports YOLO_v2 style models, this include the following configurations: `darknet19_448`, `tiny-yolo-voc`, `yolo-voc`, and `yolo`. | ||
|
|
||
| `yad2k.py -p` will produce a plot of the generated Keras model. For example see [yolo.png](etc/yolo.png). | ||
|
|
||
| YAD2K assumes the Keras backend is Tensorflow. In particular for YOLO_v2 models with a passthrough layer, YAD2K uses `tf.space_to_depth` to implement the passthrough layer. The evaluation script also directly uses Tensorflow tensors and uses `tf.non_max_suppression` for the final output. | ||
|
|
||
| `voc_conversion_scripts` contains two scripts for converting the Pascal VOC image dataset with XML annotations to either HDF5 or TFRecords format for easier training with Keras or Tensorflow. | ||
|
|
||
| `yad2k/models` contains reference implementations of Darknet-19 and YOLO_v2. | ||
|
|
||
| `train_overfit` is a sample training script that overfits a YOLO_v2 model to a single image from the Pascal VOC dataset. | ||
|
|
||
| ## Known Issues and TODOs | ||
|
|
||
| - Expand sample training script to train YOLO_v2 reference model on full dataset. | ||
| - Support for additional Darknet layer types. | ||
| - Tuck away the Tensorflow dependencies with Keras wrappers where possible. | ||
| - YOLO_v2 model does not support fully convolutional mode. Current implementation assumes 1:1 aspect ratio images. | ||
|
|
||
| ## Darknets of Yore | ||
|
|
||
| YAD2K stands on the shoulders of giants. | ||
|
|
||
| - :fire: [Darknet](https://github.com/pjreddie/darknet) :fire: | ||
| - [Darknet.Keras](https://github.com/sunshineatnoon/Darknet.keras) - The original D2K for YOLO_v1. | ||
| - [Darkflow](https://github.com/thtrieu/darkflow) - Darknet directly to Tensorflow. | ||
| - [caffe-yolo](https://github.com/xingwangsfu/caffe-yolo) - YOLO_v1 to Caffe. | ||
| - [yolo2-pytorch](https://github.com/longcw/yolo2-pytorch) - YOLO_v2 in PyTorch. | ||
|
|
||
| -------------------------------------------------------------------------------- |
| @@ -0,0 +1,58 @@ | ||
| name: yad2k | ||
| channels: | ||
| - defaults | ||
| dependencies: | ||
| - cycler=0.10.0=py36_0 | ||
| - dbus=1.10.10=0 | ||
| - expat=2.1.0=0 | ||
| - fontconfig=2.12.1=3 | ||
| - freetype=2.5.5=2 | ||
| - glib=2.50.2=1 | ||
| - gst-plugins-base=1.8.0=0 | ||
| - gstreamer=1.8.0=0 | ||
| - h5py=2.7.0=np112py36_0 | ||
| - hdf5=1.8.17=1 | ||
| - icu=54.1=0 | ||
| - jbig=2.1=0 | ||
| - jpeg=9b=0 | ||
| - libffi=3.2.1=1 | ||
| - libgcc=5.2.0=0 | ||
| - libgfortran=3.0.0=1 | ||
| - libiconv=1.14=0 | ||
| - libpng=1.6.27=0 | ||
| - libtiff=4.0.6=3 | ||
| - libxcb=1.12=1 | ||
| - libxml2=2.9.4=0 | ||
| - matplotlib=2.0.0=np112py36_0 | ||
| - mkl=2017.0.1=0 | ||
| - numpy=1.12.1=py36_0 | ||
| - olefile=0.44=py36_0 | ||
| - openssl=1.0.2k=1 | ||
| - pcre=8.39=1 | ||
| - pillow=4.1.0=py36_0 | ||
| - pip=9.0.1=py36_1 | ||
| - pyparsing=2.1.4=py36_0 | ||
| - pyqt=5.6.0=py36_2 | ||
| - python=3.6.1=0 | ||
| - python-dateutil=2.6.0=py36_0 | ||
| - pytz=2017.2=py36_0 | ||
| - pyyaml=3.12=py36_0 | ||
| - qt=5.6.2=3 | ||
| - readline=6.2=2 | ||
| - scipy=0.19.0=np112py36_0 | ||
| - setuptools=27.2.0=py36_0 | ||
| - sip=4.18=py36_0 | ||
| - six=1.10.0=py36_0 | ||
| - sqlite=3.13.0=0 | ||
| - tk=8.5.18=0 | ||
| - wheel=0.29.0=py36_0 | ||
| - xz=5.2.2=1 | ||
| - yaml=0.1.6=0 | ||
| - zlib=1.2.8=3 | ||
| - pip: | ||
| - keras==2.0.3 | ||
| - protobuf==3.2.0 | ||
| - pydot-ng==1.0.0 | ||
| - tensorflow-gpu==1.0.1 | ||
| - theano==0.9.0 | ||
| prefix: /home/allan/anaconda3/envs/yad2k |
| @@ -0,0 +1,345 @@ | ||
| """ | ||
| This is a script that can be used to retrain the YOLOv2 model for your own dataset. | ||
| """ | ||
| import argparse | ||
|
|
||
| import os | ||
|
|
||
| import matplotlib.pyplot as plt | ||
| import numpy as np | ||
| import PIL | ||
| import tensorflow as tf | ||
| from keras import backend as K | ||
| from keras.layers import Input, Lambda, Conv2D | ||
| from keras.models import load_model, Model | ||
| from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping | ||
|
|
||
| from yad2k.models.keras_yolo import (preprocess_true_boxes, yolo_body, | ||
| yolo_eval, yolo_head, yolo_loss) | ||
| from yad2k.utils.draw_boxes import draw_boxes | ||
|
|
||
| # Args | ||
| argparser = argparse.ArgumentParser( | ||
| description="Retrain or 'fine-tune' a pretrained YOLOv2 model for your own data.") | ||
|
|
||
| argparser.add_argument( | ||
| '-d', | ||
| '--data_path', | ||
| help="path to numpy data file (.npz) containing np.object array 'boxes' and np.uint8 array 'images'", | ||
| default=os.path.join('..', 'DATA', 'underwater_data.npz')) | ||
|
|
||
| argparser.add_argument( | ||
| '-a', | ||
| '--anchors_path', | ||
| help='path to anchors file, defaults to yolo_anchors.txt', | ||
| default=os.path.join('model_data', 'yolo_anchors.txt')) | ||
|
|
||
| argparser.add_argument( | ||
| '-c', | ||
| '--classes_path', | ||
| help='path to classes file, defaults to pascal_classes.txt', | ||
| default=os.path.join('..', 'DATA', 'underwater_classes.txt')) | ||
|
|
||
| # Default anchor boxes | ||
| YOLO_ANCHORS = np.array( | ||
| ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), | ||
| (7.88282, 3.52778), (9.77052, 9.16828))) | ||
|
|
||
| def _main(args): | ||
| data_path = os.path.expanduser(args.data_path) | ||
| classes_path = os.path.expanduser(args.classes_path) | ||
| anchors_path = os.path.expanduser(args.anchors_path) | ||
|
|
||
| class_names = get_classes(classes_path) | ||
| anchors = get_anchors(anchors_path) | ||
|
|
||
| data = np.load(data_path) # custom data saved as a numpy file. | ||
| # has 2 arrays: an object array 'boxes' (variable length of boxes in each image) | ||
| # and an array of images 'images' | ||
|
|
||
| image_data, boxes = process_data(data['images'], data['boxes']) | ||
|
|
||
| anchors = YOLO_ANCHORS | ||
|
|
||
| detectors_mask, matching_true_boxes = get_detector_mask(boxes, anchors) | ||
|
|
||
| model_body, model = create_model(anchors, class_names) | ||
|
|
||
| train( | ||
| model, | ||
| class_names, | ||
| anchors, | ||
| image_data, | ||
| boxes, | ||
| detectors_mask, | ||
| matching_true_boxes | ||
| ) | ||
|
|
||
| draw(model_body, | ||
| class_names, | ||
| anchors, | ||
| image_data, | ||
| image_set='val', # assumes training/validation split is 0.9 | ||
| weights_name='trained_stage_3_best.h5', | ||
| save_all=False) | ||
|
|
||
|
|
||
| def get_classes(classes_path): | ||
| '''loads the classes''' | ||
| with open(classes_path) as f: | ||
| class_names = f.readlines() | ||
| class_names = [c.strip() for c in class_names] | ||
| return class_names | ||
|
|
||
| def get_anchors(anchors_path): | ||
| '''loads the anchors from a file''' | ||
| if os.path.isfile(anchors_path): | ||
| with open(anchors_path) as f: | ||
| anchors = f.readline() | ||
| anchors = [float(x) for x in anchors.split(',')] | ||
| return np.array(anchors).reshape(-1, 2) | ||
| else: | ||
| Warning("Could not open anchors file, using default.") | ||
| return YOLO_ANCHORS | ||
|
|
||
| def process_data(images, boxes=None): | ||
| '''processes the data''' | ||
| images = [PIL.Image.fromarray(i) for i in images] | ||
| orig_size = np.array([images[0].width, images[0].height]) | ||
| orig_size = np.expand_dims(orig_size, axis=0) | ||
|
|
||
| # Image preprocessing. | ||
| processed_images = [i.resize((416, 416), PIL.Image.BICUBIC) for i in images] | ||
| processed_images = [np.array(image, dtype=np.float) for image in processed_images] | ||
| processed_images = [image/255. for image in processed_images] | ||
|
|
||
| if boxes is not None: | ||
| # Box preprocessing. | ||
| # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max. | ||
| boxes = [box.reshape((-1, 5)) for box in boxes] | ||
| # Get extents as y_min, x_min, y_max, x_max, class for comparision with | ||
| # model output. | ||
| boxes_extents = [box[:, [2, 1, 4, 3, 0]] for box in boxes] | ||
|
|
||
| # Get box parameters as x_center, y_center, box_width, box_height, class. | ||
| boxes_xy = [0.5 * (box[:, 3:5] + box[:, 1:3]) for box in boxes] | ||
| boxes_wh = [box[:, 3:5] - box[:, 1:3] for box in boxes] | ||
| boxes_xy = [boxxy / orig_size for boxxy in boxes_xy] | ||
| boxes_wh = [boxwh / orig_size for boxwh in boxes_wh] | ||
| boxes = [np.concatenate((boxes_xy[i], boxes_wh[i], box[:, 0:1]), axis=1) for i, box in enumerate(boxes)] | ||
|
|
||
| # find the max number of boxes | ||
| max_boxes = 0 | ||
| for boxz in boxes: | ||
| if boxz.shape[0] > max_boxes: | ||
| max_boxes = boxz.shape[0] | ||
|
|
||
| # add zero pad for training | ||
| for i, boxz in enumerate(boxes): | ||
| if boxz.shape[0] < max_boxes: | ||
| zero_padding = np.zeros( (max_boxes-boxz.shape[0], 5), dtype=np.float32) | ||
| boxes[i] = np.vstack((boxz, zero_padding)) | ||
|
|
||
| return np.array(processed_images), np.array(boxes) | ||
| else: | ||
| return np.array(processed_images) | ||
|
|
||
| def get_detector_mask(boxes, anchors): | ||
| ''' | ||
| Precompute detectors_mask and matching_true_boxes for training. | ||
| Detectors mask is 1 for each spatial position in the final conv layer and | ||
| anchor that should be active for the given boxes and 0 otherwise. | ||
| Matching true boxes gives the regression targets for the ground truth box | ||
| that caused a detector to be active or 0 otherwise. | ||
| ''' | ||
| detectors_mask = [0 for i in range(len(boxes))] | ||
| matching_true_boxes = [0 for i in range(len(boxes))] | ||
| for i, box in enumerate(boxes): | ||
| detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416]) | ||
|
|
||
| return np.array(detectors_mask), np.array(matching_true_boxes) | ||
|
|
||
| def create_model(anchors, class_names, load_pretrained=True, freeze_body=True): | ||
| ''' | ||
| returns the body of the model and the model | ||
| # Params: | ||
| load_pretrained: whether or not to load the pretrained model or initialize all weights | ||
| freeze_body: whether or not to freeze all weights except for the last layer's | ||
| # Returns: | ||
| model_body: YOLOv2 with new output layer | ||
| model: YOLOv2 with custom loss Lambda layer | ||
| ''' | ||
|
|
||
| detectors_mask_shape = (13, 13, 5, 1) | ||
| matching_boxes_shape = (13, 13, 5, 5) | ||
|
|
||
| # Create model input layers. | ||
| image_input = Input(shape=(416, 416, 3)) | ||
| boxes_input = Input(shape=(None, 5)) | ||
| detectors_mask_input = Input(shape=detectors_mask_shape) | ||
| matching_boxes_input = Input(shape=matching_boxes_shape) | ||
|
|
||
| # Create model body. | ||
| yolo_model = yolo_body(image_input, len(anchors), len(class_names)) | ||
| topless_yolo = Model(yolo_model.input, yolo_model.layers[-2].output) | ||
|
|
||
| if load_pretrained: | ||
| # Save topless yolo: | ||
| topless_yolo_path = os.path.join('model_data', 'yolo_topless.h5') | ||
| if not os.path.exists(topless_yolo_path): | ||
| print("CREATING TOPLESS WEIGHTS FILE") | ||
| yolo_path = os.path.join('model_data', 'yolo.h5') | ||
| model_body = load_model(yolo_path) | ||
| model_body = Model(model_body.inputs, model_body.layers[-2].output) | ||
| model_body.save_weights(topless_yolo_path) | ||
| topless_yolo.load_weights(topless_yolo_path) | ||
|
|
||
| if freeze_body: | ||
| for layer in topless_yolo.layers: | ||
| layer.trainable = False | ||
| final_layer = Conv2D(len(anchors)*(5+len(class_names)), (1, 1), activation='linear')(topless_yolo.output) | ||
|
|
||
| model_body = Model(image_input, final_layer) | ||
|
|
||
| # Place model loss on CPU to reduce GPU memory usage. | ||
| with tf.device('/cpu:0'): | ||
| # TODO: Replace Lambda with custom Keras layer for loss. | ||
| model_loss = Lambda( | ||
| yolo_loss, | ||
| output_shape=(1, ), | ||
| name='yolo_loss', | ||
| arguments={'anchors': anchors, | ||
| 'num_classes': len(class_names)})([ | ||
| model_body.output, boxes_input, | ||
| detectors_mask_input, matching_boxes_input | ||
| ]) | ||
|
|
||
| model = Model( | ||
| [model_body.input, boxes_input, detectors_mask_input, | ||
| matching_boxes_input], model_loss) | ||
|
|
||
| return model_body, model | ||
|
|
||
| def train(model, class_names, anchors, image_data, boxes, detectors_mask, matching_true_boxes, validation_split=0.1): | ||
| ''' | ||
| retrain/fine-tune the model | ||
| logs training with tensorboard | ||
| saves training weights in current directory | ||
| best weights according to val_loss is saved as trained_stage_3_best.h5 | ||
| ''' | ||
| model.compile( | ||
| optimizer='adam', loss={ | ||
| 'yolo_loss': lambda y_true, y_pred: y_pred | ||
| }) # This is a hack to use the custom loss function in the last layer. | ||
|
|
||
|
|
||
| logging = TensorBoard() | ||
| checkpoint = ModelCheckpoint("trained_stage_3_best.h5", monitor='val_loss', | ||
| save_weights_only=True, save_best_only=True) | ||
| early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=15, verbose=1, mode='auto') | ||
|
|
||
| model.fit([image_data, boxes, detectors_mask, matching_true_boxes], | ||
| np.zeros(len(image_data)), | ||
| validation_split=validation_split, | ||
| batch_size=32, | ||
| epochs=5, | ||
| callbacks=[logging]) | ||
| model.save_weights('trained_stage_1.h5') | ||
|
|
||
| model_body, model = create_model(anchors, class_names, load_pretrained=False, freeze_body=False) | ||
|
|
||
| model.load_weights('trained_stage_1.h5') | ||
|
|
||
| model.compile( | ||
| optimizer='adam', loss={ | ||
| 'yolo_loss': lambda y_true, y_pred: y_pred | ||
| }) # This is a hack to use the custom loss function in the last layer. | ||
|
|
||
|
|
||
| model.fit([image_data, boxes, detectors_mask, matching_true_boxes], | ||
| np.zeros(len(image_data)), | ||
| validation_split=0.1, | ||
| batch_size=8, | ||
| epochs=30, | ||
| callbacks=[logging]) | ||
|
|
||
| model.save_weights('trained_stage_2.h5') | ||
|
|
||
| model.fit([image_data, boxes, detectors_mask, matching_true_boxes], | ||
| np.zeros(len(image_data)), | ||
| validation_split=0.1, | ||
| batch_size=8, | ||
| epochs=30, | ||
| callbacks=[logging, checkpoint, early_stopping]) | ||
|
|
||
| model.save_weights('trained_stage_3.h5') | ||
|
|
||
| def draw(model_body, class_names, anchors, image_data, image_set='val', | ||
| weights_name='trained_stage_3_best.h5', out_path="output_images", save_all=True): | ||
| ''' | ||
| Draw bounding boxes on image data | ||
| ''' | ||
| if image_set == 'train': | ||
| image_data = np.array([np.expand_dims(image, axis=0) | ||
| for image in image_data[:int(len(image_data)*.9)]]) | ||
| elif image_set == 'val': | ||
| image_data = np.array([np.expand_dims(image, axis=0) | ||
| for image in image_data[int(len(image_data)*.9):]]) | ||
| elif image_set == 'all': | ||
| image_data = np.array([np.expand_dims(image, axis=0) | ||
| for image in image_data]) | ||
| else: | ||
| ValueError("draw argument image_set must be 'train', 'val', or 'all'") | ||
| # model.load_weights(weights_name) | ||
| print(image_data.shape) | ||
| model_body.load_weights(weights_name) | ||
|
|
||
| # Create output variables for prediction. | ||
| yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) | ||
| input_image_shape = K.placeholder(shape=(2, )) | ||
| boxes, scores, classes = yolo_eval( | ||
| yolo_outputs, input_image_shape, score_threshold=0.07, iou_threshold=0) | ||
|
|
||
| # Run prediction on overfit image. | ||
| sess = K.get_session() # TODO: Remove dependence on Tensorflow session. | ||
|
|
||
| if not os.path.exists(out_path): | ||
| os.makedirs(out_path) | ||
| for i in range(len(image_data)): | ||
| out_boxes, out_scores, out_classes = sess.run( | ||
| [boxes, scores, classes], | ||
| feed_dict={ | ||
| model_body.input: image_data[i], | ||
| input_image_shape: [image_data.shape[2], image_data.shape[3]], | ||
| K.learning_phase(): 0 | ||
| }) | ||
| print('Found {} boxes for image.'.format(len(out_boxes))) | ||
| print(out_boxes) | ||
|
|
||
| # Plot image with predicted boxes. | ||
| image_with_boxes = draw_boxes(image_data[i][0], out_boxes, out_classes, | ||
| class_names, out_scores) | ||
| # Save the image: | ||
| if save_all or (len(out_boxes) > 0): | ||
| image = PIL.Image.fromarray(image_with_boxes) | ||
| image.save(os.path.join(out_path,str(i)+'.png')) | ||
|
|
||
| # To display (pauses the program): | ||
| # plt.imshow(image_with_boxes, interpolation='nearest') | ||
| # plt.show() | ||
|
|
||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| args = argparser.parse_args() | ||
| _main(args) |
| @@ -0,0 +1,194 @@ | ||
| #! /usr/bin/env python | ||
| """Run a YOLO_v2 style detection model on test images.""" | ||
| import argparse | ||
| import colorsys | ||
| import imghdr | ||
| import os | ||
| import random | ||
|
|
||
| import numpy as np | ||
| from keras import backend as K | ||
| from keras.models import load_model | ||
| from PIL import Image, ImageDraw, ImageFont | ||
|
|
||
| from yad2k.models.keras_yolo import yolo_eval, yolo_head | ||
|
|
||
| parser = argparse.ArgumentParser( | ||
| description='Run a YOLO_v2 style detection model on test images..') | ||
| parser.add_argument( | ||
| 'model_path', | ||
| help='path to h5 model file containing body' | ||
| 'of a YOLO_v2 model') | ||
| parser.add_argument( | ||
| '-a', | ||
| '--anchors_path', | ||
| help='path to anchors file, defaults to yolo_anchors.txt', | ||
| default='model_data/yolo_anchors.txt') | ||
| parser.add_argument( | ||
| '-c', | ||
| '--classes_path', | ||
| help='path to classes file, defaults to coco_classes.txt', | ||
| default='model_data/coco_classes.txt') | ||
| parser.add_argument( | ||
| '-t', | ||
| '--test_path', | ||
| help='path to directory of test images, defaults to images/', | ||
| default='images') | ||
| parser.add_argument( | ||
| '-o', | ||
| '--output_path', | ||
| help='path to output test images, defaults to images/out', | ||
| default='images/out') | ||
| parser.add_argument( | ||
| '-s', | ||
| '--score_threshold', | ||
| type=float, | ||
| help='threshold for bounding box scores, default .3', | ||
| default=.3) | ||
| parser.add_argument( | ||
| '-iou', | ||
| '--iou_threshold', | ||
| type=float, | ||
| help='threshold for non max suppression IOU, default .5', | ||
| default=.5) | ||
|
|
||
|
|
||
| def _main(args): | ||
| model_path = os.path.expanduser(args.model_path) | ||
| assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.' | ||
| anchors_path = os.path.expanduser(args.anchors_path) | ||
| classes_path = os.path.expanduser(args.classes_path) | ||
| test_path = os.path.expanduser(args.test_path) | ||
| output_path = os.path.expanduser(args.output_path) | ||
|
|
||
| if not os.path.exists(output_path): | ||
| print('Creating output path {}'.format(output_path)) | ||
| os.mkdir(output_path) | ||
|
|
||
| sess = K.get_session() # TODO: Remove dependence on Tensorflow session. | ||
|
|
||
| with open(classes_path) as f: | ||
| class_names = f.readlines() | ||
| class_names = [c.strip() for c in class_names] | ||
|
|
||
| with open(anchors_path) as f: | ||
| anchors = f.readline() | ||
| anchors = [float(x) for x in anchors.split(',')] | ||
| anchors = np.array(anchors).reshape(-1, 2) | ||
|
|
||
| yolo_model = load_model(model_path) | ||
|
|
||
| # Verify model, anchors, and classes are compatible | ||
| num_classes = len(class_names) | ||
| num_anchors = len(anchors) | ||
| # TODO: Assumes dim ordering is channel last | ||
| model_output_channels = yolo_model.layers[-1].output_shape[-1] | ||
| assert model_output_channels == num_anchors * (num_classes + 5), \ | ||
| 'Mismatch between model and given anchor and class sizes. ' \ | ||
| 'Specify matching anchors and classes with --anchors_path and ' \ | ||
| '--classes_path flags.' | ||
| print('{} model, anchors, and classes loaded.'.format(model_path)) | ||
|
|
||
| # Check if model is fully convolutional, assuming channel last order. | ||
| model_image_size = yolo_model.layers[0].input_shape[1:3] | ||
| is_fixed_size = model_image_size != (None, None) | ||
|
|
||
| # Generate colors for drawing bounding boxes. | ||
| hsv_tuples = [(x / len(class_names), 1., 1.) | ||
| for x in range(len(class_names))] | ||
| colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | ||
| colors = list( | ||
| map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), | ||
| colors)) | ||
| random.seed(10101) # Fixed seed for consistent colors across runs. | ||
| random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. | ||
| random.seed(None) # Reset seed to default. | ||
|
|
||
| # Generate output tensor targets for filtered bounding boxes. | ||
| # TODO: Wrap these backend operations with Keras layers. | ||
| yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) | ||
| input_image_shape = K.placeholder(shape=(2, )) | ||
| boxes, scores, classes = yolo_eval( | ||
| yolo_outputs, | ||
| input_image_shape, | ||
| score_threshold=args.score_threshold, | ||
| iou_threshold=args.iou_threshold) | ||
|
|
||
| for image_file in os.listdir(test_path): | ||
| try: | ||
| image_type = imghdr.what(os.path.join(test_path, image_file)) | ||
| if not image_type: | ||
| continue | ||
| except IsADirectoryError: | ||
| continue | ||
|
|
||
| image = Image.open(os.path.join(test_path, image_file)) | ||
| if is_fixed_size: # TODO: When resizing we can use minibatch input. | ||
| resized_image = image.resize( | ||
| tuple(reversed(model_image_size)), Image.BICUBIC) | ||
| image_data = np.array(resized_image, dtype='float32') | ||
| else: | ||
| # Due to skip connection + max pooling in YOLO_v2, inputs must have | ||
| # width and height as multiples of 32. | ||
| new_image_size = (image.width - (image.width % 32), | ||
| image.height - (image.height % 32)) | ||
| resized_image = image.resize(new_image_size, Image.BICUBIC) | ||
| image_data = np.array(resized_image, dtype='float32') | ||
| print(image_data.shape) | ||
|
|
||
| image_data /= 255. | ||
| image_data = np.expand_dims(image_data, 0) # Add batch dimension. | ||
|
|
||
| out_boxes, out_scores, out_classes = sess.run( | ||
| [boxes, scores, classes], | ||
| feed_dict={ | ||
| yolo_model.input: image_data, | ||
| input_image_shape: [image.size[1], image.size[0]], | ||
| K.learning_phase(): 0 | ||
| }) | ||
| print('Found {} boxes for {}'.format(len(out_boxes), image_file)) | ||
|
|
||
| font = ImageFont.truetype( | ||
| font='font/FiraMono-Medium.otf', | ||
| size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | ||
| thickness = (image.size[0] + image.size[1]) // 300 | ||
|
|
||
| for i, c in reversed(list(enumerate(out_classes))): | ||
| predicted_class = class_names[c] | ||
| box = out_boxes[i] | ||
| score = out_scores[i] | ||
|
|
||
| label = '{} {:.2f}'.format(predicted_class, score) | ||
|
|
||
| draw = ImageDraw.Draw(image) | ||
| label_size = draw.textsize(label, font) | ||
|
|
||
| top, left, bottom, right = box | ||
| top = max(0, np.floor(top + 0.5).astype('int32')) | ||
| left = max(0, np.floor(left + 0.5).astype('int32')) | ||
| bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) | ||
| right = min(image.size[0], np.floor(right + 0.5).astype('int32')) | ||
| print(label, (left, top), (right, bottom)) | ||
|
|
||
| if top - label_size[1] >= 0: | ||
| text_origin = np.array([left, top - label_size[1]]) | ||
| else: | ||
| text_origin = np.array([left, top + 1]) | ||
|
|
||
| # My kingdom for a good redistributable image drawing library. | ||
| for i in range(thickness): | ||
| draw.rectangle( | ||
| [left + i, top + i, right - i, bottom - i], | ||
| outline=colors[c]) | ||
| draw.rectangle( | ||
| [tuple(text_origin), tuple(text_origin + label_size)], | ||
| fill=colors[c]) | ||
| draw.text(text_origin, label, fill=(0, 0, 0), font=font) | ||
| del draw | ||
|
|
||
| image.save(os.path.join(output_path, image_file), quality=90) | ||
| sess.close() | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| _main(parser.parse_args()) |
| @@ -0,0 +1,185 @@ | ||
| #! /usr/bin/env python | ||
| """Overfit a YOLO_v2 model to a single image from the Pascal VOC dataset. | ||
| This is a sample training script used to test the implementation of the | ||
| YOLO localization loss function. | ||
| """ | ||
| import argparse | ||
| import io | ||
| import os | ||
|
|
||
| import h5py | ||
| import matplotlib.pyplot as plt | ||
| import numpy as np | ||
| import PIL | ||
| import tensorflow as tf | ||
| from keras import backend as K | ||
| from keras.layers import Input, Lambda | ||
| from keras.models import Model | ||
|
|
||
| from yad2k.models.keras_yolo import (preprocess_true_boxes, yolo_body, | ||
| yolo_eval, yolo_head, yolo_loss) | ||
| from yad2k.utils.draw_boxes import draw_boxes | ||
|
|
||
| YOLO_ANCHORS = np.array( | ||
| ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), | ||
| (7.88282, 3.52778), (9.77052, 9.16828))) | ||
|
|
||
| argparser = argparse.ArgumentParser( | ||
| description='Train YOLO_v2 model to overfit on a single image.') | ||
|
|
||
| argparser.add_argument( | ||
| '-d', | ||
| '--data_path', | ||
| help='path to HDF5 file containing pascal voc dataset', | ||
| default='~/datasets/VOCdevkit/pascal_voc_07_12.hdf5') | ||
|
|
||
| argparser.add_argument( | ||
| '-a', | ||
| '--anchors_path', | ||
| help='path to anchors file, defaults to yolo_anchors.txt', | ||
| default='model_data/yolo_anchors.txt') | ||
|
|
||
| argparser.add_argument( | ||
| '-c', | ||
| '--classes_path', | ||
| help='path to classes file, defaults to pascal_classes.txt', | ||
| default='model_data/pascal_classes.txt') | ||
|
|
||
|
|
||
| def _main(args): | ||
| voc_path = os.path.expanduser(args.data_path) | ||
| classes_path = os.path.expanduser(args.classes_path) | ||
| anchors_path = os.path.expanduser(args.anchors_path) | ||
|
|
||
| with open(classes_path) as f: | ||
| class_names = f.readlines() | ||
| class_names = [c.strip() for c in class_names] | ||
|
|
||
| if os.path.isfile(anchors_path): | ||
| with open(anchors_path) as f: | ||
| anchors = f.readline() | ||
| anchors = [float(x) for x in anchors.split(',')] | ||
| anchors = np.array(anchors).reshape(-1, 2) | ||
| else: | ||
| anchors = YOLO_ANCHORS | ||
|
|
||
| voc = h5py.File(voc_path, 'r') | ||
| image = PIL.Image.open(io.BytesIO(voc['train/images'][28])) | ||
| orig_size = np.array([image.width, image.height]) | ||
| orig_size = np.expand_dims(orig_size, axis=0) | ||
|
|
||
| # Image preprocessing. | ||
| image = image.resize((416, 416), PIL.Image.BICUBIC) | ||
| image_data = np.array(image, dtype=np.float) | ||
| image_data /= 255. | ||
|
|
||
| # Box preprocessing. | ||
| # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max. | ||
| boxes = voc['train/boxes'][28] | ||
| boxes = boxes.reshape((-1, 5)) | ||
| # Get extents as y_min, x_min, y_max, x_max, class for comparision with | ||
| # model output. | ||
| boxes_extents = boxes[:, [2, 1, 4, 3, 0]] | ||
|
|
||
| # Get box parameters as x_center, y_center, box_width, box_height, class. | ||
| boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3]) | ||
| boxes_wh = boxes[:, 3:5] - boxes[:, 1:3] | ||
| boxes_xy = boxes_xy / orig_size | ||
| boxes_wh = boxes_wh / orig_size | ||
| boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1) | ||
|
|
||
| # Precompute detectors_mask and matching_true_boxes for training. | ||
| # Detectors mask is 1 for each spatial position in the final conv layer and | ||
| # anchor that should be active for the given boxes and 0 otherwise. | ||
| # Matching true boxes gives the regression targets for the ground truth box | ||
| # that caused a detector to be active or 0 otherwise. | ||
| detectors_mask_shape = (13, 13, 5, 1) | ||
| matching_boxes_shape = (13, 13, 5, 5) | ||
| detectors_mask, matching_true_boxes = preprocess_true_boxes(boxes, anchors, | ||
| [416, 416]) | ||
|
|
||
| # Create model input layers. | ||
| image_input = Input(shape=(416, 416, 3)) | ||
| boxes_input = Input(shape=(None, 5)) | ||
| detectors_mask_input = Input(shape=detectors_mask_shape) | ||
| matching_boxes_input = Input(shape=matching_boxes_shape) | ||
|
|
||
| print('Boxes:') | ||
| print(boxes) | ||
| print('Box corners:') | ||
| print(boxes_extents) | ||
| print('Active detectors:') | ||
| print(np.where(detectors_mask == 1)[:-1]) | ||
| print('Matching boxes for active detectors:') | ||
| print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]]) | ||
|
|
||
| # Create model body. | ||
| model_body = yolo_body(image_input, len(anchors), len(class_names)) | ||
| model_body = Model(image_input, model_body.output) | ||
| # Place model loss on CPU to reduce GPU memory usage. | ||
| with tf.device('/cpu:0'): | ||
| # TODO: Replace Lambda with custom Keras layer for loss. | ||
| model_loss = Lambda( | ||
| yolo_loss, | ||
| output_shape=(1, ), | ||
| name='yolo_loss', | ||
| arguments={'anchors': anchors, | ||
| 'num_classes': len(class_names)})([ | ||
| model_body.output, boxes_input, | ||
| detectors_mask_input, matching_boxes_input | ||
| ]) | ||
| model = Model( | ||
| [image_input, boxes_input, detectors_mask_input, | ||
| matching_boxes_input], model_loss) | ||
| model.compile( | ||
| optimizer='adam', loss={ | ||
| 'yolo_loss': lambda y_true, y_pred: y_pred | ||
| }) # This is a hack to use the custom loss function in the last layer. | ||
|
|
||
| # Add batch dimension for training. | ||
| image_data = np.expand_dims(image_data, axis=0) | ||
| boxes = np.expand_dims(boxes, axis=0) | ||
| detectors_mask = np.expand_dims(detectors_mask, axis=0) | ||
| matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0) | ||
|
|
||
| num_steps = 1000 | ||
| # TODO: For full training, put preprocessing inside training loop. | ||
| # for i in range(num_steps): | ||
| # loss = model.train_on_batch( | ||
| # [image_data, boxes, detectors_mask, matching_true_boxes], | ||
| # np.zeros(len(image_data))) | ||
| model.fit([image_data, boxes, detectors_mask, matching_true_boxes], | ||
| np.zeros(len(image_data)), | ||
| batch_size=1, | ||
| epochs=num_steps) | ||
| model.save_weights('overfit_weights.h5') | ||
|
|
||
| # Create output variables for prediction. | ||
| yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) | ||
| input_image_shape = K.placeholder(shape=(2, )) | ||
| boxes, scores, classes = yolo_eval( | ||
| yolo_outputs, input_image_shape, score_threshold=.3, iou_threshold=.9) | ||
|
|
||
| # Run prediction on overfit image. | ||
| sess = K.get_session() # TODO: Remove dependence on Tensorflow session. | ||
| out_boxes, out_scores, out_classes = sess.run( | ||
| [boxes, scores, classes], | ||
| feed_dict={ | ||
| model_body.input: image_data, | ||
| input_image_shape: [image.size[1], image.size[0]], | ||
| K.learning_phase(): 0 | ||
| }) | ||
| print('Found {} boxes for image.'.format(len(out_boxes))) | ||
| print(out_boxes) | ||
|
|
||
| # Plot image with predicted boxes. | ||
| image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes, | ||
| class_names, out_scores) | ||
| plt.imshow(image_with_boxes, interpolation='nearest') | ||
| plt.show() | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| args = argparser.parse_args() | ||
| _main(args) |
| @@ -0,0 +1,213 @@ | ||
| #! /usr/bin/env python | ||
| """Run a YOLO_v2 style detection model on test images.""" | ||
| import argparse | ||
| import colorsys | ||
| import imghdr | ||
| import os | ||
| import random | ||
|
|
||
| import numpy as np | ||
| from keras import backend as K | ||
| from keras.models import load_model | ||
| from PIL import Image, ImageDraw, ImageFont | ||
|
|
||
| from yad2k.models.keras_yolo import yolo_eval, yolo_head | ||
|
|
||
| import cv2 | ||
|
|
||
| parser = argparse.ArgumentParser( | ||
| description='Run a YOLO_v2 style detection model on test images..') | ||
| parser.add_argument( | ||
| 'model_path', | ||
| help='path to h5 model file containing body' | ||
| 'of a YOLO_v2 model') | ||
| parser.add_argument( | ||
| '-a', | ||
| '--anchors_path', | ||
| help='path to anchors file, defaults to yolo_anchors.txt', | ||
| default='model_data/yolo_anchors.txt') | ||
| parser.add_argument( | ||
| '-c', | ||
| '--classes_path', | ||
| help='path to classes file, defaults to coco_classes.txt', | ||
| default='model_data/coco_classes.txt') | ||
| parser.add_argument( | ||
| '-t', | ||
| '--test_path', | ||
| help='path to directory of test images, defaults to images/', | ||
| default='images') | ||
| parser.add_argument( | ||
| '-o', | ||
| '--output_path', | ||
| help='path to output test images, defaults to images/out', | ||
| default='images/out') | ||
| parser.add_argument( | ||
| '-s', | ||
| '--score_threshold', | ||
| type=float, | ||
| help='threshold for bounding box scores, default .3', | ||
| default=.3) | ||
| parser.add_argument( | ||
| '-iou', | ||
| '--iou_threshold', | ||
| type=float, | ||
| help='threshold for non max suppression IOU, default .5', | ||
| default=.5) | ||
|
|
||
|
|
||
| def _main(args): | ||
| model_path = os.path.expanduser(args.model_path) | ||
| assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.' | ||
| anchors_path = os.path.expanduser(args.anchors_path) | ||
| classes_path = os.path.expanduser(args.classes_path) | ||
| test_path = os.path.expanduser(args.test_path) | ||
| output_path = os.path.expanduser(args.output_path) | ||
|
|
||
| if not os.path.exists(output_path): | ||
| print('Creating output path {}'.format(output_path)) | ||
| os.mkdir(output_path) | ||
|
|
||
| sess = K.get_session() # TODO: Remove dependence on Tensorflow session. | ||
|
|
||
| with open(classes_path) as f: | ||
| class_names = f.readlines() | ||
| class_names = [c.strip() for c in class_names] | ||
|
|
||
| with open(anchors_path) as f: | ||
| anchors = f.readline() | ||
| anchors = [float(x) for x in anchors.split(',')] | ||
| anchors = np.array(anchors).reshape(-1, 2) | ||
|
|
||
| yolo_model = load_model(model_path) | ||
|
|
||
| # Verify model, anchors, and classes are compatible | ||
| num_classes = len(class_names) | ||
| num_anchors = len(anchors) | ||
| # TODO: Assumes dim ordering is channel last | ||
| model_output_channels = yolo_model.layers[-1].output_shape[-1] | ||
| assert model_output_channels == num_anchors * (num_classes + 5), \ | ||
| 'Mismatch between model and given anchor and class sizes. ' \ | ||
| 'Specify matching anchors and classes with --anchors_path and ' \ | ||
| '--classes_path flags.' | ||
| print('{} model, anchors, and classes loaded.'.format(model_path)) | ||
|
|
||
| # Check if model is fully convolutional, assuming channel last order. | ||
| model_image_size = yolo_model.layers[0].input_shape[1:3] | ||
| is_fixed_size = model_image_size != (None, None) | ||
|
|
||
| # Generate colors for drawing bounding boxes. | ||
| hsv_tuples = [(x / len(class_names), 1., 1.) | ||
| for x in range(len(class_names))] | ||
| colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | ||
| colors = list( | ||
| map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), | ||
| colors)) | ||
| random.seed(10101) # Fixed seed for consistent colors across runs. | ||
| random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. | ||
| random.seed(None) # Reset seed to default. | ||
|
|
||
| # Generate output tensor targets for filtered bounding boxes. | ||
| # TODO: Wrap these backend operations with Keras layers. | ||
| yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) | ||
| input_image_shape = K.placeholder(shape=(2, )) | ||
| boxes, scores, classes = yolo_eval( | ||
| yolo_outputs, | ||
| input_image_shape, | ||
| score_threshold=args.score_threshold, | ||
| iou_threshold=args.iou_threshold) | ||
|
|
||
| cap = cv2.VideoCapture(0) | ||
| while True: | ||
| ret, img = cap.read() | ||
| if ret == False: | ||
| cap.release() | ||
| return -1 | ||
| #for image_file in os.listdir(test_path): | ||
| #try: | ||
| # image_type = imghdr.what(os.path.join(test_path, image_file)) | ||
| # if not image_type: | ||
| # continue | ||
| #except IsADirectoryError: | ||
| # continue | ||
|
|
||
| #image = Image.open(os.path.join(test_path, image_file)) | ||
|
|
||
| image = Image.fromarray(img) | ||
| if is_fixed_size: # TODO: When resizing we can use minibatch input. | ||
| resized_image = image.resize( | ||
| tuple(reversed(model_image_size)), Image.BICUBIC) | ||
| image_data = np.array(resized_image, dtype='float32') | ||
| else: | ||
| # Due to skip connection + max pooling in YOLO_v2, inputs must have | ||
| # width and height as multiples of 32. | ||
| new_image_size = (image.width - (image.width % 32), | ||
| image.height - (image.height % 32)) | ||
| resized_image = image.resize(new_image_size, Image.BICUBIC) | ||
| image_data = np.array(resized_image, dtype='float32') | ||
| print(image_data.shape) | ||
|
|
||
| image_data /= 255. | ||
| image_data = np.expand_dims(image_data, 0) # Add batch dimension. | ||
|
|
||
| out_boxes, out_scores, out_classes = sess.run( | ||
| [boxes, scores, classes], | ||
| feed_dict={ | ||
| yolo_model.input: image_data, | ||
| input_image_shape: [image.size[1], image.size[0]], | ||
| K.learning_phase(): 0 | ||
| }) | ||
| #print('Found {} boxes for {}'.format(len(out_boxes), image_file)) | ||
|
|
||
| font = ImageFont.truetype( | ||
| font='font/FiraMono-Medium.otf', | ||
| size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | ||
| thickness = (image.size[0] + image.size[1]) // 300 | ||
|
|
||
| for i, c in reversed(list(enumerate(out_classes))): | ||
|
|
||
| if not class_names[c] == "person": | ||
| continue | ||
|
|
||
| predicted_class = class_names[c] | ||
| box = out_boxes[i] | ||
| score = out_scores[i] | ||
|
|
||
| label = '{} {:.2f}'.format(predicted_class, score) | ||
|
|
||
| draw = ImageDraw.Draw(image) | ||
| label_size = draw.textsize(label, font) | ||
|
|
||
| top, left, bottom, right = box | ||
| top = max(0, np.floor(top + 0.5).astype('int32')) | ||
| left = max(0, np.floor(left + 0.5).astype('int32')) | ||
| bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) | ||
| right = min(image.size[0], np.floor(right + 0.5).astype('int32')) | ||
| #print(label, (left, top), (right, bottom)) | ||
|
|
||
| if top - label_size[1] >= 0: | ||
| text_origin = np.array([left, top - label_size[1]]) | ||
| else: | ||
| text_origin = np.array([left, top + 1]) | ||
|
|
||
| # My kingdom for a good redistributable image drawing library. | ||
| for i in range(thickness): | ||
| draw.rectangle( | ||
| [left + i, top + i, right - i, bottom - i], | ||
| outline=(50, 200, 50)) | ||
| #draw.rectangle( | ||
| # [tuple(text_origin), tuple(text_origin + label_size)], | ||
| # fill=colors[c]) | ||
| #draw.text(text_origin, label, fill=(0, 0, 0), font=font) | ||
| del draw | ||
|
|
||
| cv_img=np.asarray(image) | ||
| cv2.imshow("Stream Video", cv_img) | ||
| key = cv2.waitKey(30) & 0xff | ||
| if key == ord('q'): | ||
| break | ||
| #image.save(os.path.join(output_path, image_file), quality=90) | ||
| sess.close() | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| _main(parser.parse_args()) |
| @@ -0,0 +1,199 @@ | ||
| """ | ||
| Convert Pascal VOC 2007+2012 detection dataset to HDF5. | ||
| Does not preserve full XML annotations. | ||
| Combines all VOC subsets (train, val test) with VOC2012 train for full | ||
| training set as done in Faster R-CNN paper. | ||
| Code based on: | ||
| https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py | ||
| """ | ||
|
|
||
| import argparse | ||
| import os | ||
| import xml.etree.ElementTree as ElementTree | ||
|
|
||
| import h5py | ||
| import numpy as np | ||
|
|
||
| sets_from_2007 = [('2007', 'train'), ('2007', 'val')] | ||
| train_set = [('2012', 'train')] | ||
| val_set = [('2012', 'val')] | ||
| test_set = [('2007', 'test')] | ||
|
|
||
| classes = [ | ||
| "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", | ||
| "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", | ||
| "pottedplant", "sheep", "sofa", "train", "tvmonitor" | ||
| ] | ||
|
|
||
| parser = argparse.ArgumentParser( | ||
| description='Convert Pascal VOC 2007+2012 detection dataset to HDF5.') | ||
| parser.add_argument( | ||
| '-p', | ||
| '--path_to_voc', | ||
| help='path to VOCdevkit directory', | ||
| default='~/data/PascalVOC/VOCdevkit') | ||
|
|
||
|
|
||
| def get_boxes_for_id(voc_path, year, image_id): | ||
| """Get object bounding boxes annotations for given image. | ||
| Parameters | ||
| ---------- | ||
| voc_path : str | ||
| Path to VOCdevkit directory. | ||
| year : str | ||
| Year of dataset containing image. Either '2007' or '2012'. | ||
| image_id : str | ||
| Pascal VOC identifier for given image. | ||
| Returns | ||
| ------- | ||
| boxes : array of int | ||
| bounding box annotations of class label, xmin, ymin, xmax, ymax as a | ||
| 5xN array. | ||
| """ | ||
| fname = os.path.join(voc_path, 'VOC{}/Annotations/{}.xml'.format(year, | ||
| image_id)) | ||
| with open(fname) as in_file: | ||
| xml_tree = ElementTree.parse(in_file) | ||
| root = xml_tree.getroot() | ||
| boxes = [] | ||
| for obj in root.iter('object'): | ||
| difficult = obj.find('difficult').text | ||
| label = obj.find('name').text | ||
| if label not in classes or int( | ||
| difficult) == 1: # exclude difficult or unlisted classes | ||
| continue | ||
| xml_box = obj.find('bndbox') | ||
| bbox = (classes.index(label), int(xml_box.find('xmin').text), | ||
| int(xml_box.find('ymin').text), int(xml_box.find('xmax').text), | ||
| int(xml_box.find('ymax').text)) | ||
| boxes.extend(bbox) | ||
| return np.array( | ||
| boxes) # .T # return transpose so last dimension is variable length | ||
|
|
||
|
|
||
| def get_image_for_id(voc_path, year, image_id): | ||
| """Get image data as uint8 array for given image. | ||
| Parameters | ||
| ---------- | ||
| voc_path : str | ||
| Path to VOCdevkit directory. | ||
| year : str | ||
| Year of dataset containing image. Either '2007' or '2012'. | ||
| image_id : str | ||
| Pascal VOC identifier for given image. | ||
| Returns | ||
| ------- | ||
| image_data : array of uint8 | ||
| Compressed JPEG byte string represented as array of uint8. | ||
| """ | ||
| fname = os.path.join(voc_path, 'VOC{}/JPEGImages/{}.jpg'.format(year, | ||
| image_id)) | ||
| with open(fname, 'rb') as in_file: | ||
| data = in_file.read() | ||
| # Use of encoding based on: https://github.com/h5py/h5py/issues/745 | ||
| return np.fromstring(data, dtype='uint8') | ||
|
|
||
|
|
||
| def get_ids(voc_path, datasets): | ||
| """Get image identifiers for corresponding list of dataset identifies. | ||
| Parameters | ||
| ---------- | ||
| voc_path : str | ||
| Path to VOCdevkit directory. | ||
| datasets : list of str tuples | ||
| List of dataset identifiers in the form of (year, dataset) pairs. | ||
| Returns | ||
| ------- | ||
| ids : list of str | ||
| List of all image identifiers for given datasets. | ||
| """ | ||
| ids = [] | ||
| for year, image_set in datasets: | ||
| id_file = os.path.join(voc_path, 'VOC{}/ImageSets/Main/{}.txt'.format( | ||
| year, image_set)) | ||
| with open(id_file, 'r') as image_ids: | ||
| ids.extend(map(str.strip, image_ids.readlines())) | ||
| return ids | ||
|
|
||
|
|
||
| def add_to_dataset(voc_path, year, ids, images, boxes, start=0): | ||
| """Process all given ids and adds them to given datasets.""" | ||
| for i, voc_id in enumerate(ids): | ||
| image_data = get_image_for_id(voc_path, year, voc_id) | ||
| image_boxes = get_boxes_for_id(voc_path, year, voc_id) | ||
| images[start + i] = image_data | ||
| boxes[start + i] = image_boxes | ||
| return i | ||
|
|
||
|
|
||
| def _main(args): | ||
| voc_path = os.path.expanduser(args.path_to_voc) | ||
| train_ids = get_ids(voc_path, train_set) | ||
| val_ids = get_ids(voc_path, val_set) | ||
| test_ids = get_ids(voc_path, test_set) | ||
| train_ids_2007 = get_ids(voc_path, sets_from_2007) | ||
| total_train_ids = len(train_ids) + len(train_ids_2007) | ||
|
|
||
| # Create HDF5 dataset structure | ||
| print('Creating HDF5 dataset structure.') | ||
| fname = os.path.join(voc_path, 'pascal_voc_07_12.hdf5') | ||
| voc_h5file = h5py.File(fname, 'w') | ||
| uint8_dt = h5py.special_dtype( | ||
| vlen=np.dtype('uint8')) # variable length uint8 | ||
| vlen_int_dt = h5py.special_dtype( | ||
| vlen=np.dtype(int)) # variable length default int | ||
| train_group = voc_h5file.create_group('train') | ||
| val_group = voc_h5file.create_group('val') | ||
| test_group = voc_h5file.create_group('test') | ||
|
|
||
| # store class list for reference class ids as csv fixed-length numpy string | ||
| voc_h5file.attrs['classes'] = np.string_(str.join(',', classes)) | ||
|
|
||
| # store images as variable length uint8 arrays | ||
| train_images = train_group.create_dataset( | ||
| 'images', shape=(total_train_ids, ), dtype=uint8_dt) | ||
| val_images = val_group.create_dataset( | ||
| 'images', shape=(len(val_ids), ), dtype=uint8_dt) | ||
| test_images = test_group.create_dataset( | ||
| 'images', shape=(len(test_ids), ), dtype=uint8_dt) | ||
|
|
||
| # store boxes as class_id, xmin, ymin, xmax, ymax | ||
| train_boxes = train_group.create_dataset( | ||
| 'boxes', shape=(total_train_ids, ), dtype=vlen_int_dt) | ||
| val_boxes = val_group.create_dataset( | ||
| 'boxes', shape=(len(val_ids), ), dtype=vlen_int_dt) | ||
| test_boxes = test_group.create_dataset( | ||
| 'boxes', shape=(len(test_ids), ), dtype=vlen_int_dt) | ||
|
|
||
| # process all ids and add to datasets | ||
| print('Processing Pascal VOC 2007 datasets for training set.') | ||
| last_2007 = add_to_dataset(voc_path, '2007', train_ids_2007, train_images, | ||
| train_boxes) | ||
| print('Processing Pascal VOC 2012 training set.') | ||
| add_to_dataset( | ||
| voc_path, | ||
| '2012', | ||
| train_ids, | ||
| train_images, | ||
| train_boxes, | ||
| start=last_2007 + 1) | ||
| print('Processing Pascal VOC 2012 val set.') | ||
| add_to_dataset(voc_path, '2012', val_ids, val_images, val_boxes) | ||
| print('Processing Pascal VOC 2007 test set.') | ||
| add_to_dataset(voc_path, '2007', test_ids, test_images, test_boxes) | ||
|
|
||
| print('Closing HDF5 file.') | ||
| voc_h5file.close() | ||
| print('Done.') | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| _main(parser.parse_args()) |
| @@ -0,0 +1,244 @@ | ||
| """Convert Pascal VOC 2007+2012 detection dataset to TFRecords. | ||
| Does not preserve full XML annotations. | ||
| Combines all VOC 2007 subsets (train, val) with VOC2012 for training. | ||
| Uses VOC2012 val for val and VOC2007 test for test. | ||
| Code based on: | ||
| https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py | ||
| https://github.com/tensorflow/models/blob/master/inception/inception/data/build_image_data.py | ||
| """ | ||
|
|
||
| import argparse | ||
| import os | ||
| import xml.etree.ElementTree as ElementTree | ||
| from datetime import datetime | ||
|
|
||
| import numpy as np | ||
| import tensorflow as tf | ||
|
|
||
| from voc_to_hdf5 import get_ids | ||
|
|
||
| sets_from_2007 = [('2007', 'train'), ('2007', 'val')] | ||
| train_set = [('2012', 'train'), ('2012', 'val')] | ||
| test_set = [('2007', 'test')] | ||
|
|
||
| classes = [ | ||
| "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", | ||
| "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", | ||
| "pottedplant", "sheep", "sofa", "train", "tvmonitor" | ||
| ] | ||
|
|
||
| parser = argparse.ArgumentParser( | ||
| description='Convert Pascal VOC 2007+2012 detection dataset to TFRecords.') | ||
| parser.add_argument( | ||
| '-p', | ||
| '--path_to_voc', | ||
| help='path to Pascal VOC dataset', | ||
| default='~/data/PascalVOC/VOCdevkit') | ||
|
|
||
| # Small graph for image decoding | ||
| decoder_sess = tf.Session() | ||
| image_placeholder = tf.placeholder(dtype=tf.string) | ||
| decoded_jpeg = tf.image.decode_jpeg(image_placeholder, channels=3) | ||
|
|
||
|
|
||
| def process_image(image_path): | ||
| """Decode image at given path.""" | ||
| with open(image_path, 'rb') as f: | ||
| image_data = f.read() | ||
| image = decoder_sess.run(decoded_jpeg, | ||
| feed_dict={image_placeholder: image_data}) | ||
| assert len(image.shape) == 3 | ||
| height = image.shape[0] | ||
| width = image.shape[2] | ||
| assert image.shape[2] == 3 | ||
| return image_data, height, width | ||
|
|
||
|
|
||
| def process_anno(anno_path): | ||
| """Process Pascal VOC annotations.""" | ||
| with open(anno_path) as f: | ||
| xml_tree = ElementTree.parse(f) | ||
| root = xml_tree.getroot() | ||
| size = root.find('size') | ||
| height = float(size.find('height').text) | ||
| width = float(size.find('width').text) | ||
| boxes = [] | ||
| for obj in root.iter('object'): | ||
| difficult = obj.find('difficult').text | ||
| label = obj.find('name').text | ||
| if label not in classes or int( | ||
| difficult) == 1: # exclude difficult or unlisted classes | ||
| continue | ||
| xml_box = obj.find('bndbox') | ||
| bbox = { | ||
| 'class': classes.index(label), | ||
| 'y_min': float(xml_box.find('ymin').text) / height, | ||
| 'x_min': float(xml_box.find('xmin').text) / width, | ||
| 'y_max': float(xml_box.find('ymax').text) / height, | ||
| 'x_max': float(xml_box.find('xmax').text) / width | ||
| } | ||
| boxes.append(bbox) | ||
| return boxes | ||
|
|
||
|
|
||
| def convert_to_example(image_data, boxes, filename, height, width): | ||
| """Convert Pascal VOC ground truth to TFExample protobuf. | ||
| Parameters | ||
| ---------- | ||
| image_data : bytes | ||
| Encoded image bytes. | ||
| boxes : dict | ||
| Bounding box corners and class labels | ||
| filename : string | ||
| Path to image file. | ||
| height : int | ||
| Image height. | ||
| width : int | ||
| Image width. | ||
| Returns | ||
| ------- | ||
| example : protobuf | ||
| Tensorflow Example protobuf containing image and bounding boxes. | ||
| """ | ||
| box_classes = [b['class'] for b in boxes] | ||
| box_ymin = [b['y_min'] for b in boxes] | ||
| box_xmin = [b['x_min'] for b in boxes] | ||
| box_ymax = [b['y_max'] for b in boxes] | ||
| box_xmax = [b['x_max'] for b in boxes] | ||
| encoded_image = [tf.compat.as_bytes(image_data)] | ||
| base_name = [tf.compat.as_bytes(os.path.basename(filename))] | ||
|
|
||
| example = tf.train.Example(features=tf.train.Features(feature={ | ||
| 'filename': | ||
| tf.train.Feature(bytes_list=tf.train.BytesList(value=base_name)), | ||
| 'height': | ||
| tf.train.Feature(int64_list=tf.train.Int64List(value=[height])), | ||
| 'width': | ||
| tf.train.Feature(int64_list=tf.train.Int64List(value=[width])), | ||
| 'classes': | ||
| tf.train.Feature(int64_list=tf.train.Int64List(value=box_classes)), | ||
| 'y_mins': | ||
| tf.train.Feature(float_list=tf.train.FloatList(value=box_ymin)), | ||
| 'x_mins': | ||
| tf.train.Feature(float_list=tf.train.FloatList(value=box_xmin)), | ||
| 'y_maxes': | ||
| tf.train.Feature(float_list=tf.train.FloatList(value=box_ymax)), | ||
| 'x_maxes': | ||
| tf.train.Feature(float_list=tf.train.FloatList(value=box_xmax)), | ||
| 'encoded': | ||
| tf.train.Feature(bytes_list=tf.train.BytesList(value=encoded_image)) | ||
| })) | ||
| return example | ||
|
|
||
|
|
||
| def get_image_path(voc_path, year, image_id): | ||
| """Get path to image for given year and image id.""" | ||
| return os.path.join(voc_path, 'VOC{}/JPEGImages/{}.jpg'.format(year, | ||
| image_id)) | ||
|
|
||
|
|
||
| def get_anno_path(voc_path, year, image_id): | ||
| """Get path to image annotation for given year and image id.""" | ||
| return os.path.join(voc_path, 'VOC{}/Annotations/{}.xml'.format(year, | ||
| image_id)) | ||
|
|
||
|
|
||
| def process_dataset(name, image_paths, anno_paths, result_path, num_shards): | ||
| """Process selected Pascal VOC dataset to generate TFRecords files. | ||
| Parameters | ||
| ---------- | ||
| name : string | ||
| Name of resulting dataset 'train' or 'test'. | ||
| image_paths : list | ||
| List of paths to images to include in dataset. | ||
| anno_paths : list | ||
| List of paths to corresponding image annotations. | ||
| result_path : string | ||
| Path to put resulting TFRecord files. | ||
| num_shards : int | ||
| Number of shards to split TFRecord files into. | ||
| """ | ||
| shard_ranges = np.linspace(0, len(image_paths), num_shards + 1).astype(int) | ||
| counter = 0 | ||
| for shard in range(num_shards): | ||
| # Generate shard file name | ||
| output_filename = '{}-{:05d}-of-{:05d}'.format(name, shard, num_shards) | ||
| output_file = os.path.join(result_path, output_filename) | ||
| writer = tf.python_io.TFRecordWriter(output_file) | ||
|
|
||
| shard_counter = 0 | ||
| files_in_shard = range(shard_ranges[shard], shard_ranges[shard + 1]) | ||
| for i in files_in_shard: | ||
| image_file = image_paths[i] | ||
| anno_file = anno_paths[i] | ||
|
|
||
| # processes image + anno | ||
| image_data, height, width = process_image(image_file) | ||
| boxes = process_anno(anno_file) | ||
|
|
||
| # convert to example | ||
| example = convert_to_example(image_data, boxes, image_file, height, | ||
| width) | ||
|
|
||
| # write to writer | ||
| writer.write(example.SerializeToString()) | ||
|
|
||
| shard_counter += 1 | ||
| counter += 1 | ||
|
|
||
| if not counter % 1000: | ||
| print('{} : Processed {:d} of {:d} images.'.format( | ||
| datetime.now(), counter, len(image_paths))) | ||
| writer.close() | ||
| print('{} : Wrote {} images to {}'.format( | ||
| datetime.now(), shard_counter, output_filename)) | ||
|
|
||
| print('{} : Wrote {} images to {} shards'.format(datetime.now(), counter, | ||
| num_shards)) | ||
|
|
||
|
|
||
| def _main(args): | ||
| """Locate files for train and test sets and then generate TFRecords.""" | ||
| voc_path = args.path_to_voc | ||
| voc_path = os.path.expanduser(voc_path) | ||
| result_path = os.path.join(voc_path, 'TFRecords') | ||
| print('Saving results to {}'.format(result_path)) | ||
|
|
||
| train_path = os.path.join(result_path, 'train') | ||
| test_path = os.path.join(result_path, 'test') | ||
|
|
||
| train_ids = get_ids(voc_path, train_set) # 2012 trainval | ||
| test_ids = get_ids(voc_path, test_set) # 2007 test | ||
| train_ids_2007 = get_ids(voc_path, sets_from_2007) # 2007 trainval | ||
| total_train_ids = len(train_ids) + len(train_ids_2007) | ||
| print('{} train examples and {} test examples'.format(total_train_ids, | ||
| len(test_ids))) | ||
|
|
||
| train_image_paths = [ | ||
| get_image_path(voc_path, '2012', i) for i in train_ids | ||
| ] | ||
| train_image_paths.extend( | ||
| [get_image_path(voc_path, '2007', i) for i in train_ids_2007]) | ||
| test_image_paths = [get_image_path(voc_path, '2007', i) for i in test_ids] | ||
|
|
||
| train_anno_paths = [get_anno_path(voc_path, '2012', i) for i in train_ids] | ||
| train_anno_paths.extend( | ||
| [get_anno_path(voc_path, '2007', i) for i in train_ids_2007]) | ||
| test_anno_paths = [get_anno_path(voc_path, '2007', i) for i in test_ids] | ||
|
|
||
| process_dataset( | ||
| 'train', | ||
| train_image_paths, | ||
| train_anno_paths, | ||
| train_path, | ||
| num_shards=60) | ||
| process_dataset( | ||
| 'test', test_image_paths, test_anno_paths, test_path, num_shards=20) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| _main(parser.parse_args(args)) |
| @@ -0,0 +1,270 @@ | ||
| #! /usr/bin/env python | ||
| """ | ||
| Reads Darknet19 config and weights and creates Keras model with TF backend. | ||
| Currently only supports layers in Darknet19 config. | ||
| """ | ||
|
|
||
| import argparse | ||
| import configparser | ||
| import io | ||
| import os | ||
| from collections import defaultdict | ||
|
|
||
| import numpy as np | ||
| from keras import backend as K | ||
| from keras.layers import (Conv2D, GlobalAveragePooling2D, Input, Lambda, | ||
| MaxPooling2D) | ||
| from keras.layers.advanced_activations import LeakyReLU | ||
| from keras.layers.merge import concatenate | ||
| from keras.layers.normalization import BatchNormalization | ||
| from keras.models import Model | ||
| from keras.regularizers import l2 | ||
| from keras.utils.vis_utils import plot_model as plot | ||
|
|
||
| from yad2k.models.keras_yolo import (space_to_depth_x2, | ||
| space_to_depth_x2_output_shape) | ||
|
|
||
| parser = argparse.ArgumentParser( | ||
| description='Yet Another Darknet To Keras Converter.') | ||
| parser.add_argument('config_path', help='Path to Darknet cfg file.') | ||
| parser.add_argument('weights_path', help='Path to Darknet weights file.') | ||
| parser.add_argument('output_path', help='Path to output Keras model file.') | ||
| parser.add_argument( | ||
| '-p', | ||
| '--plot_model', | ||
| help='Plot generated Keras model and save as image.', | ||
| action='store_true') | ||
| parser.add_argument( | ||
| '-flcl', | ||
| '--fully_convolutional', | ||
| help='Model is fully convolutional so set input shape to (None, None, 3). ' | ||
| 'WARNING: This experimental option does not work properly for YOLO_v2.', | ||
| action='store_true') | ||
|
|
||
|
|
||
| def unique_config_sections(config_file): | ||
| """Convert all config sections to have unique names. | ||
| Adds unique suffixes to config sections for compability with configparser. | ||
| """ | ||
| section_counters = defaultdict(int) | ||
| output_stream = io.StringIO() | ||
| with open(config_file) as fin: | ||
| for line in fin: | ||
| if line.startswith('['): | ||
| section = line.strip().strip('[]') | ||
| _section = section + '_' + str(section_counters[section]) | ||
| section_counters[section] += 1 | ||
| line = line.replace(section, _section) | ||
| output_stream.write(line) | ||
| output_stream.seek(0) | ||
| return output_stream | ||
|
|
||
|
|
||
| # %% | ||
| def _main(args): | ||
| config_path = os.path.expanduser(args.config_path) | ||
| weights_path = os.path.expanduser(args.weights_path) | ||
| assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( | ||
| config_path) | ||
| assert weights_path.endswith( | ||
| '.weights'), '{} is not a .weights file'.format(weights_path) | ||
|
|
||
| output_path = os.path.expanduser(args.output_path) | ||
| assert output_path.endswith( | ||
| '.h5'), 'output path {} is not a .h5 file'.format(output_path) | ||
| output_root = os.path.splitext(output_path)[0] | ||
|
|
||
| # Load weights and config. | ||
| print('Loading weights.') | ||
| weights_file = open(weights_path, 'rb') | ||
| weights_header = np.ndarray( | ||
| shape=(4, ), dtype='int32', buffer=weights_file.read(16)) | ||
| print('Weights Header: ', weights_header) | ||
| # TODO: Check transpose flag when implementing fully connected layers. | ||
| # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000) | ||
|
|
||
| print('Parsing Darknet config.') | ||
| unique_config_file = unique_config_sections(config_path) | ||
| cfg_parser = configparser.ConfigParser() | ||
| cfg_parser.read_file(unique_config_file) | ||
|
|
||
| print('Creating Keras model.') | ||
| if args.fully_convolutional: | ||
| image_height, image_width = None, None | ||
| else: | ||
| image_height = int(cfg_parser['net_0']['height']) | ||
| image_width = int(cfg_parser['net_0']['width']) | ||
| prev_layer = Input(shape=(image_height, image_width, 3)) | ||
| all_layers = [prev_layer] | ||
|
|
||
| weight_decay = float(cfg_parser['net_0']['decay'] | ||
| ) if 'net_0' in cfg_parser.sections() else 5e-4 | ||
| count = 0 | ||
| for section in cfg_parser.sections(): | ||
| print('Parsing section {}'.format(section)) | ||
| if section.startswith('convolutional'): | ||
| filters = int(cfg_parser[section]['filters']) | ||
| size = int(cfg_parser[section]['size']) | ||
| stride = int(cfg_parser[section]['stride']) | ||
| pad = int(cfg_parser[section]['pad']) | ||
| activation = cfg_parser[section]['activation'] | ||
| batch_normalize = 'batch_normalize' in cfg_parser[section] | ||
|
|
||
| # padding='same' is equivalent to Darknet pad=1 | ||
| padding = 'same' if pad == 1 else 'valid' | ||
|
|
||
| # Setting weights. | ||
| # Darknet serializes convolutional weights as: | ||
| # [bias/beta, [gamma, mean, variance], conv_weights] | ||
| prev_layer_shape = K.int_shape(prev_layer) | ||
|
|
||
| # TODO: This assumes channel last dim_ordering. | ||
| weights_shape = (size, size, prev_layer_shape[-1], filters) | ||
| darknet_w_shape = (filters, weights_shape[2], size, size) | ||
| weights_size = np.product(weights_shape) | ||
|
|
||
| print('conv2d', 'bn' | ||
| if batch_normalize else ' ', activation, weights_shape) | ||
|
|
||
| conv_bias = np.ndarray( | ||
| shape=(filters, ), | ||
| dtype='float32', | ||
| buffer=weights_file.read(filters * 4)) | ||
| count += filters | ||
|
|
||
| if batch_normalize: | ||
| bn_weights = np.ndarray( | ||
| shape=(3, filters), | ||
| dtype='float32', | ||
| buffer=weights_file.read(filters * 12)) | ||
| count += 3 * filters | ||
|
|
||
| # TODO: Keras BatchNormalization mistakenly refers to var | ||
| # as std. | ||
| bn_weight_list = [ | ||
| bn_weights[0], # scale gamma | ||
| conv_bias, # shift beta | ||
| bn_weights[1], # running mean | ||
| bn_weights[2] # running var | ||
| ] | ||
|
|
||
| conv_weights = np.ndarray( | ||
| shape=darknet_w_shape, | ||
| dtype='float32', | ||
| buffer=weights_file.read(weights_size * 4)) | ||
| count += weights_size | ||
|
|
||
| # DarkNet conv_weights are serialized Caffe-style: | ||
| # (out_dim, in_dim, height, width) | ||
| # We would like to set these to Tensorflow order: | ||
| # (height, width, in_dim, out_dim) | ||
| # TODO: Add check for Theano dim ordering. | ||
| conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) | ||
| conv_weights = [conv_weights] if batch_normalize else [ | ||
| conv_weights, conv_bias | ||
| ] | ||
|
|
||
| # Handle activation. | ||
| act_fn = None | ||
| if activation == 'leaky': | ||
| pass # Add advanced activation later. | ||
| elif activation != 'linear': | ||
| raise ValueError( | ||
| 'Unknown activation function `{}` in section {}'.format( | ||
| activation, section)) | ||
|
|
||
| # Create Conv2D layer | ||
| conv_layer = (Conv2D( | ||
| filters, (size, size), | ||
| strides=(stride, stride), | ||
| kernel_regularizer=l2(weight_decay), | ||
| use_bias=not batch_normalize, | ||
| weights=conv_weights, | ||
| activation=act_fn, | ||
| padding=padding))(prev_layer) | ||
|
|
||
| if batch_normalize: | ||
| conv_layer = (BatchNormalization( | ||
| weights=bn_weight_list))(conv_layer) | ||
| prev_layer = conv_layer | ||
|
|
||
| if activation == 'linear': | ||
| all_layers.append(prev_layer) | ||
| elif activation == 'leaky': | ||
| act_layer = LeakyReLU(alpha=0.1)(prev_layer) | ||
| prev_layer = act_layer | ||
| all_layers.append(act_layer) | ||
|
|
||
| elif section.startswith('maxpool'): | ||
| size = int(cfg_parser[section]['size']) | ||
| stride = int(cfg_parser[section]['stride']) | ||
| all_layers.append( | ||
| MaxPooling2D( | ||
| padding='same', | ||
| pool_size=(size, size), | ||
| strides=(stride, stride))(prev_layer)) | ||
| prev_layer = all_layers[-1] | ||
|
|
||
| elif section.startswith('avgpool'): | ||
| if cfg_parser.items(section) != []: | ||
| raise ValueError('{} with params unsupported.'.format(section)) | ||
| all_layers.append(GlobalAveragePooling2D()(prev_layer)) | ||
| prev_layer = all_layers[-1] | ||
|
|
||
| elif section.startswith('route'): | ||
| ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] | ||
| layers = [all_layers[i] for i in ids] | ||
| if len(layers) > 1: | ||
| print('Concatenating route layers:', layers) | ||
| concatenate_layer = concatenate(layers) | ||
| all_layers.append(concatenate_layer) | ||
| prev_layer = concatenate_layer | ||
| else: | ||
| skip_layer = layers[0] # only one layer to route | ||
| all_layers.append(skip_layer) | ||
| prev_layer = skip_layer | ||
|
|
||
| elif section.startswith('reorg'): | ||
| block_size = int(cfg_parser[section]['stride']) | ||
| assert block_size == 2, 'Only reorg with stride 2 supported.' | ||
| all_layers.append( | ||
| Lambda( | ||
| space_to_depth_x2, | ||
| output_shape=space_to_depth_x2_output_shape, | ||
| name='space_to_depth_x2')(prev_layer)) | ||
| prev_layer = all_layers[-1] | ||
|
|
||
| elif section.startswith('region'): | ||
| with open('{}_anchors.txt'.format(output_root), 'w') as f: | ||
| print(cfg_parser[section]['anchors'], file=f) | ||
|
|
||
| elif (section.startswith('net') or section.startswith('cost') or | ||
| section.startswith('softmax')): | ||
| pass # Configs not currently handled during model definition. | ||
|
|
||
| else: | ||
| raise ValueError( | ||
| 'Unsupported section header type: {}'.format(section)) | ||
|
|
||
| # Create and save model. | ||
| model = Model(inputs=all_layers[0], outputs=all_layers[-1]) | ||
| print(model.summary()) | ||
| model.save('{}'.format(output_path)) | ||
| print('Saved Keras model to {}'.format(output_path)) | ||
| # Check to see if all weights have been read. | ||
| remaining_weights = len(weights_file.read()) / 4 | ||
| weights_file.close() | ||
| print('Read {} of {} from Darknet weights.'.format(count, count + | ||
| remaining_weights)) | ||
| if remaining_weights > 0: | ||
| print('Warning: {} unused weights'.format(remaining_weights)) | ||
|
|
||
| if args.plot_model: | ||
| plot(model, to_file='{}.png'.format(output_root), show_shapes=True) | ||
| print('Saved model plot to {}.png'.format(output_root)) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| _main(parser.parse_args()) |
| @@ -0,0 +1,71 @@ | ||
| """Darknet19 Model Defined in Keras.""" | ||
| import functools | ||
| from functools import partial | ||
|
|
||
| from keras.layers import Conv2D, MaxPooling2D | ||
| from keras.layers.advanced_activations import LeakyReLU | ||
| from keras.layers.normalization import BatchNormalization | ||
| from keras.models import Model | ||
| from keras.regularizers import l2 | ||
|
|
||
| from ..utils import compose | ||
|
|
||
| # Partial wrapper for Convolution2D with static default argument. | ||
| _DarknetConv2D = partial(Conv2D, padding='same') | ||
|
|
||
|
|
||
| @functools.wraps(Conv2D) | ||
| def DarknetConv2D(*args, **kwargs): | ||
| """Wrapper to set Darknet weight regularizer for Convolution2D.""" | ||
| darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} | ||
| darknet_conv_kwargs.update(kwargs) | ||
| return _DarknetConv2D(*args, **darknet_conv_kwargs) | ||
|
|
||
|
|
||
| def DarknetConv2D_BN_Leaky(*args, **kwargs): | ||
| """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" | ||
| no_bias_kwargs = {'use_bias': False} | ||
| no_bias_kwargs.update(kwargs) | ||
| return compose( | ||
| DarknetConv2D(*args, **no_bias_kwargs), | ||
| BatchNormalization(), | ||
| LeakyReLU(alpha=0.1)) | ||
|
|
||
|
|
||
| def bottleneck_block(outer_filters, bottleneck_filters): | ||
| """Bottleneck block of 3x3, 1x1, 3x3 convolutions.""" | ||
| return compose( | ||
| DarknetConv2D_BN_Leaky(outer_filters, (3, 3)), | ||
| DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), | ||
| DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) | ||
|
|
||
|
|
||
| def bottleneck_x2_block(outer_filters, bottleneck_filters): | ||
| """Bottleneck block of 3x3, 1x1, 3x3, 1x1, 3x3 convolutions.""" | ||
| return compose( | ||
| bottleneck_block(outer_filters, bottleneck_filters), | ||
| DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), | ||
| DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) | ||
|
|
||
|
|
||
| def darknet_body(): | ||
| """Generate first 18 conv layers of Darknet-19.""" | ||
| return compose( | ||
| DarknetConv2D_BN_Leaky(32, (3, 3)), | ||
| MaxPooling2D(), | ||
| DarknetConv2D_BN_Leaky(64, (3, 3)), | ||
| MaxPooling2D(), | ||
| bottleneck_block(128, 64), | ||
| MaxPooling2D(), | ||
| bottleneck_block(256, 128), | ||
| MaxPooling2D(), | ||
| bottleneck_x2_block(512, 256), | ||
| MaxPooling2D(), | ||
| bottleneck_x2_block(1024, 512)) | ||
|
|
||
|
|
||
| def darknet19(inputs): | ||
| """Generate Darknet-19 model for Imagenet classification.""" | ||
| body = darknet_body()(inputs) | ||
| logits = DarknetConv2D(1000, (1, 1), activation='softmax')(body) | ||
| return Model(inputs, logits) |
| @@ -0,0 +1 @@ | ||
| from .utils import * |
| @@ -0,0 +1,88 @@ | ||
| """Draw predicted or ground truth boxes on input image.""" | ||
|
|
||
| import colorsys | ||
| import random | ||
|
|
||
| import numpy as np | ||
| from PIL import Image, ImageDraw, ImageFont | ||
|
|
||
|
|
||
| def get_colors_for_classes(num_classes): | ||
| """Return list of random colors for number of classes given.""" | ||
| # Use previously generated colors if num_classes is the same. | ||
| if (hasattr(get_colors_for_classes, "colors") and | ||
| len(get_colors_for_classes.colors) == num_classes): | ||
| return get_colors_for_classes.colors | ||
|
|
||
| hsv_tuples = [(x / num_classes, 1., 1.) for x in range(num_classes)] | ||
| colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | ||
| colors = list( | ||
| map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), | ||
| colors)) | ||
| random.seed(10101) # Fixed seed for consistent colors across runs. | ||
| random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. | ||
| random.seed(None) # Reset seed to default. | ||
| get_colors_for_classes.colors = colors # Save colors for future calls. | ||
| return colors | ||
|
|
||
|
|
||
| def draw_boxes(image, boxes, box_classes, class_names, scores=None): | ||
| """Draw bounding boxes on image. | ||
| Draw bounding boxes with class name and optional box score on image. | ||
| Args: | ||
| image: An `array` of shape (width, height, 3) with values in [0, 1]. | ||
| boxes: An `array` of shape (num_boxes, 4) containing box corners as | ||
| (y_min, x_min, y_max, x_max). | ||
| box_classes: A `list` of indicies into `class_names`. | ||
| class_names: A `list` of `string` class names. | ||
| `scores`: A `list` of scores for each box. | ||
| Returns: | ||
| A copy of `image` modified with given bounding boxes. | ||
| """ | ||
| image = Image.fromarray(np.floor(image * 255 + 0.5).astype('uint8')) | ||
|
|
||
| font = ImageFont.truetype( | ||
| font='font/FiraMono-Medium.otf', | ||
| size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | ||
| thickness = (image.size[0] + image.size[1]) // 300 | ||
|
|
||
| colors = get_colors_for_classes(len(class_names)) | ||
|
|
||
| for i, c in list(enumerate(box_classes)): | ||
| box_class = class_names[c] | ||
| box = boxes[i] | ||
| if isinstance(scores, np.ndarray): | ||
| score = scores[i] | ||
| label = '{} {:.2f}'.format(box_class, score) | ||
| else: | ||
| label = '{}'.format(box_class) | ||
|
|
||
| draw = ImageDraw.Draw(image) | ||
| label_size = draw.textsize(label, font) | ||
|
|
||
| top, left, bottom, right = box | ||
| top = max(0, np.floor(top + 0.5).astype('int32')) | ||
| left = max(0, np.floor(left + 0.5).astype('int32')) | ||
| bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) | ||
| right = min(image.size[0], np.floor(right + 0.5).astype('int32')) | ||
| print(label, (left, top), (right, bottom)) | ||
|
|
||
| if top - label_size[1] >= 0: | ||
| text_origin = np.array([left, top - label_size[1]]) | ||
| else: | ||
| text_origin = np.array([left, top + 1]) | ||
|
|
||
| # My kingdom for a good redistributable image drawing library. | ||
| for i in range(thickness): | ||
| draw.rectangle( | ||
| [left + i, top + i, right - i, bottom - i], outline=colors[c]) | ||
| draw.rectangle( | ||
| [tuple(text_origin), tuple(text_origin + label_size)], | ||
| fill=colors[c]) | ||
| draw.text(text_origin, label, fill=(0, 0, 0), font=font) | ||
| del draw | ||
|
|
||
| return np.array(image) |
| @@ -0,0 +1,15 @@ | ||
| """Miscellaneous utility functions.""" | ||
|
|
||
| from functools import reduce | ||
|
|
||
|
|
||
| def compose(*funcs): | ||
| """Compose arbitrarily many functions, evaluated left to right. | ||
| Reference: https://mathieularose.com/function-composition-in-python/ | ||
| """ | ||
| # return lambda x: reduce(lambda v, f: f(v), funcs, x) | ||
| if funcs: | ||
| return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) | ||
| else: | ||
| raise ValueError('Composition of empty sequence not supported.') |
| @@ -0,0 +1,71 @@ | ||
| """Darknet19 Model Defined in Keras.""" | ||
| import functools | ||
| from functools import partial | ||
|
|
||
| from keras.layers import Conv2D, MaxPooling2D | ||
| from keras.layers.advanced_activations import LeakyReLU | ||
| from keras.layers.normalization import BatchNormalization | ||
| from keras.models import Model | ||
| from keras.regularizers import l2 | ||
|
|
||
| from ..utils import compose | ||
|
|
||
| # Partial wrapper for Convolution2D with static default argument. | ||
| _DarknetConv2D = partial(Conv2D, padding='same') | ||
|
|
||
|
|
||
| @functools.wraps(Conv2D) | ||
| def DarknetConv2D(*args, **kwargs): | ||
| """Wrapper to set Darknet weight regularizer for Convolution2D.""" | ||
| darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} | ||
| darknet_conv_kwargs.update(kwargs) | ||
| return _DarknetConv2D(*args, **darknet_conv_kwargs) | ||
|
|
||
|
|
||
| def DarknetConv2D_BN_Leaky(*args, **kwargs): | ||
| """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" | ||
| no_bias_kwargs = {'use_bias': False} | ||
| no_bias_kwargs.update(kwargs) | ||
| return compose( | ||
| DarknetConv2D(*args, **no_bias_kwargs), | ||
| BatchNormalization(), | ||
| LeakyReLU(alpha=0.1)) | ||
|
|
||
|
|
||
| def bottleneck_block(outer_filters, bottleneck_filters): | ||
| """Bottleneck block of 3x3, 1x1, 3x3 convolutions.""" | ||
| return compose( | ||
| DarknetConv2D_BN_Leaky(outer_filters, (3, 3)), | ||
| DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), | ||
| DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) | ||
|
|
||
|
|
||
| def bottleneck_x2_block(outer_filters, bottleneck_filters): | ||
| """Bottleneck block of 3x3, 1x1, 3x3, 1x1, 3x3 convolutions.""" | ||
| return compose( | ||
| bottleneck_block(outer_filters, bottleneck_filters), | ||
| DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), | ||
| DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) | ||
|
|
||
|
|
||
| def darknet_body(): | ||
| """Generate first 18 conv layers of Darknet-19.""" | ||
| return compose( | ||
| DarknetConv2D_BN_Leaky(32, (3, 3)), | ||
| MaxPooling2D(), | ||
| DarknetConv2D_BN_Leaky(64, (3, 3)), | ||
| MaxPooling2D(), | ||
| bottleneck_block(128, 64), | ||
| MaxPooling2D(), | ||
| bottleneck_block(256, 128), | ||
| MaxPooling2D(), | ||
| bottleneck_x2_block(512, 256), | ||
| MaxPooling2D(), | ||
| bottleneck_x2_block(1024, 512)) | ||
|
|
||
|
|
||
| def darknet19(inputs): | ||
| """Generate Darknet-19 model for Imagenet classification.""" | ||
| body = darknet_body()(inputs) | ||
| logits = DarknetConv2D(1000, (1, 1), activation='softmax')(body) | ||
| return Model(inputs, logits) |
| @@ -0,0 +1 @@ | ||
| from .utils import * |
| @@ -0,0 +1,88 @@ | ||
| """Draw predicted or ground truth boxes on input image.""" | ||
|
|
||
| import colorsys | ||
| import random | ||
|
|
||
| import numpy as np | ||
| from PIL import Image, ImageDraw, ImageFont | ||
|
|
||
|
|
||
| def get_colors_for_classes(num_classes): | ||
| """Return list of random colors for number of classes given.""" | ||
| # Use previously generated colors if num_classes is the same. | ||
| if (hasattr(get_colors_for_classes, "colors") and | ||
| len(get_colors_for_classes.colors) == num_classes): | ||
| return get_colors_for_classes.colors | ||
|
|
||
| hsv_tuples = [(x / num_classes, 1., 1.) for x in range(num_classes)] | ||
| colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | ||
| colors = list( | ||
| map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), | ||
| colors)) | ||
| random.seed(10101) # Fixed seed for consistent colors across runs. | ||
| random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. | ||
| random.seed(None) # Reset seed to default. | ||
| get_colors_for_classes.colors = colors # Save colors for future calls. | ||
| return colors | ||
|
|
||
|
|
||
| def draw_boxes(image, boxes, box_classes, class_names, scores=None): | ||
| """Draw bounding boxes on image. | ||
| Draw bounding boxes with class name and optional box score on image. | ||
| Args: | ||
| image: An `array` of shape (width, height, 3) with values in [0, 1]. | ||
| boxes: An `array` of shape (num_boxes, 4) containing box corners as | ||
| (y_min, x_min, y_max, x_max). | ||
| box_classes: A `list` of indicies into `class_names`. | ||
| class_names: A `list` of `string` class names. | ||
| `scores`: A `list` of scores for each box. | ||
| Returns: | ||
| A copy of `image` modified with given bounding boxes. | ||
| """ | ||
| image = Image.fromarray(np.floor(image * 255 + 0.5).astype('uint8')) | ||
|
|
||
| font = ImageFont.truetype( | ||
| font='font/FiraMono-Medium.otf', | ||
| size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | ||
| thickness = (image.size[0] + image.size[1]) // 300 | ||
|
|
||
| colors = get_colors_for_classes(len(class_names)) | ||
|
|
||
| for i, c in list(enumerate(box_classes)): | ||
| box_class = class_names[c] | ||
| box = boxes[i] | ||
| if isinstance(scores, np.ndarray): | ||
| score = scores[i] | ||
| label = '{} {:.2f}'.format(box_class, score) | ||
| else: | ||
| label = '{}'.format(box_class) | ||
|
|
||
| draw = ImageDraw.Draw(image) | ||
| label_size = draw.textsize(label, font) | ||
|
|
||
| top, left, bottom, right = box | ||
| top = max(0, np.floor(top + 0.5).astype('int32')) | ||
| left = max(0, np.floor(left + 0.5).astype('int32')) | ||
| bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) | ||
| right = min(image.size[0], np.floor(right + 0.5).astype('int32')) | ||
| print(label, (left, top), (right, bottom)) | ||
|
|
||
| if top - label_size[1] >= 0: | ||
| text_origin = np.array([left, top - label_size[1]]) | ||
| else: | ||
| text_origin = np.array([left, top + 1]) | ||
|
|
||
| # My kingdom for a good redistributable image drawing library. | ||
| for i in range(thickness): | ||
| draw.rectangle( | ||
| [left + i, top + i, right - i, bottom - i], outline=colors[c]) | ||
| draw.rectangle( | ||
| [tuple(text_origin), tuple(text_origin + label_size)], | ||
| fill=colors[c]) | ||
| draw.text(text_origin, label, fill=(0, 0, 0), font=font) | ||
| del draw | ||
|
|
||
| return np.array(image) |
| @@ -0,0 +1,15 @@ | ||
| """Miscellaneous utility functions.""" | ||
|
|
||
| from functools import reduce | ||
|
|
||
|
|
||
| def compose(*funcs): | ||
| """Compose arbitrarily many functions, evaluated left to right. | ||
| Reference: https://mathieularose.com/function-composition-in-python/ | ||
| """ | ||
| # return lambda x: reduce(lambda v, f: f(v), funcs, x) | ||
| if funcs: | ||
| return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) | ||
| else: | ||
| raise ValueError('Composition of empty sequence not supported.') |