In [2]:
import numpy as np
import os
from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels

from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

### Create h5 dataset

In [9]:
#################################################################################
# # # # # # # # # # #        DATA PREPARATION          # # # # # # # # # # # # #
#################################################################################
#https://github.com/pierluigiferrari/data_generator_object_detection_2d

# 1: Instantiate two `DataGenerator` objects: One for training, one for validation.

train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset   = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

# 2: Parse the image and label lists for the training and validation datasets. This can take a while.

# TODO: Set the paths to the datasets here.

data_path = '/media/auv/DATA/Data/Fish_Training_Data/'

# The directories that contain the images.
pacstorm_training_images_dir      = os.path.join(data_path , "Training/Pacstorm", "images")
pacstorm_validation_images_dir      =  os.path.join(data_path , "Validation/Pacstorm", "images")
pmfs_training_images_dir      =  os.path.join(data_path , "Training/PMFS", "images")
pmfs_validation_images_dir      =  os.path.join(data_path , "Validation/PMFS", "images")

# The directories that contain the annotations.
pacstorm_training_annotations_dir      = os.path.join(data_path , "Training", "Pacstorm_Annotations")
pacstorm_validation_annotations_dir      = os.path.join(data_path , "Validation", "Pacstorm_Annotations")
pmfs_training_annotations_dir      =  os.path.join(data_path , "Training", "PMFS_Annotations")
pmfs_validation_annotations_dir      =  os.path.join(data_path , "Validation", "PMFS_Annotations")

# The paths to the image sets.
pacstorm_training_image_set_filename    =  os.path.join(data_path , "Training/Pacstorm",'images.txt')
pmfs_training_image_set_filename    =  os.path.join(data_path , "Training/PMFS",'images.txt')
pacstorm_validation_image_set_filename    =  os.path.join(data_path , "Validation/Pacstorm",'images.txt')
pmfs_validation_image_set_filename    =  os.path.join(data_path , "Validation/PMFS",'images.txt')

# TODO: Specify name of the classes for which the data is being prepared
# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = ['background','fish', 'starfish','sponge']

here
done


In [12]:
# TODO: Set include_classes bases on the number of classes

train_dataset.parse_xml(images_dirs=[pacstorm_training_images_dir,
                                     pmfs_training_images_dir],
                        image_set_filenames=[pacstorm_training_image_set_filename,
                                             pmfs_training_image_set_filename],
                        annotations_dirs=[pacstorm_training_annotations_dir,
                                          pmfs_training_annotations_dir],
                        classes=classes,
                        include_classes=[0,1,2],
                        exclude_truncated=False,
                        exclude_difficult=False,
                        ret=False)

val_dataset.parse_xml(images_dirs=[pacstorm_validation_images_dir, 
                                   pmfs_validation_images_dir],
                      image_set_filenames=[pacstorm_validation_image_set_filename, 
                                           pmfs_validation_image_set_filename],
                      annotations_dirs=[pacstorm_validation_annotations_dir, 
                                       pmfs_validation_annotations_dir],
                      classes=classes,
                      include_classes=[0,1,2],
                      exclude_truncated=False,
                      exclude_difficult=True,
                      ret=False)

# Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will
# speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory`
# option in the constructor, because in that cas the images are in memory already anyway. If you don't
# want to create HDF5 datasets, comment out the subsequent two function calls.

train_dataset.create_hdf5_dataset(file_path='dataset_fish_train.h5',
                                  resize=False,
                                  variable_image_size=True,
                                  verbose=True)

val_dataset.create_hdf5_dataset(file_path='dataset_fish_val.h5',
                                resize=False,
                                variable_image_size=True,
                                verbose=True)





Processing image set 'images.txt':   0%|          | 0/1330 [00:00<?, ?it/s][A[A[A


Processing image set 'images.txt':   4%|▎         | 47/1330 [00:00<00:02, 459.21it/s][A[A[A


Processing image set 'images.txt':   8%|▊         | 112/1330 [00:00<00:02, 503.05it/s][A[A[A


Processing image set 'images.txt':  12%|█▏        | 163/1330 [00:00<00:02, 505.07it/s][A[A[A


Processing image set 'images.txt':  17%|█▋        | 230/1330 [00:00<00:02, 543.92it/s][A[A[A


Processing image set 'images.txt':  23%|██▎       | 307/1330 [00:00<00:01, 594.11it/s][A[A[A


Processing image set 'images.txt':  29%|██▉       | 385/1330 [00:00<00:01, 639.36it/s][A[A[A


Processing image set 'images.txt':  35%|███▍      | 465/1330 [00:00<00:01, 679.90it/s][A[A[A


Processing image set 'images.txt':  41%|████      | 540/1330 [00:00<00:01, 699.40it/s][A[A[A


Processing image set 'images.txt':  46%|████▌     | 612/1330 [00:00<00:01, 698.94it/s][A[A[A


Processing image set 'images.

Creating HDF5 dataset:   1%|          | 39/3596 [00:03<04:54, 12.06it/s][A[A[A


Creating HDF5 dataset:   1%|          | 41/3596 [00:03<04:58, 11.90it/s][A[A[A


Creating HDF5 dataset:   1%|          | 43/3596 [00:03<04:59, 11.87it/s][A[A[A


Creating HDF5 dataset:   1%|▏         | 45/3596 [00:03<04:58, 11.91it/s][A[A[A


Creating HDF5 dataset:   1%|▏         | 47/3596 [00:03<05:04, 11.67it/s][A[A[A


Creating HDF5 dataset:   1%|▏         | 49/3596 [00:04<04:55, 12.00it/s][A[A[A


Creating HDF5 dataset:   1%|▏         | 51/3596 [00:04<04:55, 11.98it/s][A[A[A


Creating HDF5 dataset:   1%|▏         | 53/3596 [00:04<05:01, 11.76it/s][A[A[A


Creating HDF5 dataset:   2%|▏         | 55/3596 [00:04<04:54, 12.03it/s][A[A[A


Creating HDF5 dataset:   2%|▏         | 57/3596 [00:04<05:04, 11.62it/s][A[A[A


Creating HDF5 dataset:   2%|▏         | 59/3596 [00:04<05:07, 11.50it/s][A[A[A


Creating HDF5 dataset:   2%|▏         | 61/3596 [00:05<05:02, 11.69it/s][A

Creating HDF5 dataset:  12%|█▏        | 423/3596 [00:37<04:46, 11.09it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 425/3596 [00:37<04:41, 11.25it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 427/3596 [00:37<04:41, 11.26it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 429/3596 [00:37<04:47, 11.02it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 431/3596 [00:38<04:52, 10.82it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 433/3596 [00:38<04:47, 11.02it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 435/3596 [00:38<04:50, 10.90it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 437/3596 [00:38<04:56, 10.66it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 439/3596 [00:38<04:47, 11.00it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 441/3596 [00:39<04:44, 11.08it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 443/3596 [00:39<04:59, 10.54it/s][A[A[A


Creating HDF5 dataset:  12%|█▏        | 445/3596 [00:39<04:50, 10

Creating HDF5 dataset:  22%|██▏       | 807/3596 [01:10<03:52, 11.98it/s][A[A[A


Creating HDF5 dataset:  22%|██▏       | 809/3596 [01:10<03:52, 12.00it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 811/3596 [01:10<03:50, 12.10it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 813/3596 [01:10<03:50, 12.08it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 815/3596 [01:10<03:51, 11.99it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 817/3596 [01:11<03:47, 12.22it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 819/3596 [01:11<03:47, 12.19it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 821/3596 [01:11<03:49, 12.10it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 823/3596 [01:11<03:48, 12.14it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 825/3596 [01:11<03:46, 12.26it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 827/3596 [01:11<03:48, 12.13it/s][A[A[A


Creating HDF5 dataset:  23%|██▎       | 829/3596 [01:12<03:41, 12

Creating HDF5 dataset:  33%|███▎      | 1189/3596 [01:42<03:22, 11.91it/s][A[A[A


Creating HDF5 dataset:  33%|███▎      | 1191/3596 [01:42<03:20, 12.00it/s][A[A[A


Creating HDF5 dataset:  33%|███▎      | 1193/3596 [01:42<03:22, 11.85it/s][A[A[A


Creating HDF5 dataset:  33%|███▎      | 1195/3596 [01:42<03:15, 12.28it/s][A[A[A


Creating HDF5 dataset:  33%|███▎      | 1197/3596 [01:42<03:14, 12.34it/s][A[A[A


Creating HDF5 dataset:  33%|███▎      | 1199/3596 [01:43<03:19, 12.03it/s][A[A[A


Creating HDF5 dataset:  33%|███▎      | 1201/3596 [01:43<03:22, 11.85it/s][A[A[A


Creating HDF5 dataset:  33%|███▎      | 1203/3596 [01:43<03:21, 11.87it/s][A[A[A


Creating HDF5 dataset:  34%|███▎      | 1205/3596 [01:43<03:27, 11.55it/s][A[A[A


Creating HDF5 dataset:  34%|███▎      | 1207/3596 [01:43<03:23, 11.74it/s][A[A[A


Creating HDF5 dataset:  34%|███▎      | 1209/3596 [01:43<03:22, 11.80it/s][A[A[A


Creating HDF5 dataset:  34%|███▎      | 1211/3596 [01:

Creating HDF5 dataset:  54%|█████▎    | 1925/3596 [02:08<00:48, 34.80it/s][A[A[A


Creating HDF5 dataset:  54%|█████▎    | 1931/3596 [02:08<00:43, 38.53it/s][A[A[A


Creating HDF5 dataset:  54%|█████▍    | 1936/3596 [02:09<00:41, 39.95it/s][A[A[A


Creating HDF5 dataset:  54%|█████▍    | 1941/3596 [02:09<00:41, 39.48it/s][A[A[A


Creating HDF5 dataset:  54%|█████▍    | 1946/3596 [02:09<00:41, 39.83it/s][A[A[A


Creating HDF5 dataset:  54%|█████▍    | 1951/3596 [02:09<00:39, 41.40it/s][A[A[A


Creating HDF5 dataset:  54%|█████▍    | 1957/3596 [02:09<00:38, 42.55it/s][A[A[A


Creating HDF5 dataset:  55%|█████▍    | 1962/3596 [02:09<00:37, 43.40it/s][A[A[A


Creating HDF5 dataset:  55%|█████▍    | 1967/3596 [02:09<00:42, 38.22it/s][A[A[A


Creating HDF5 dataset:  55%|█████▍    | 1972/3596 [02:09<00:41, 39.54it/s][A[A[A


Creating HDF5 dataset:  55%|█████▍    | 1977/3596 [02:10<00:41, 38.98it/s][A[A[A


Creating HDF5 dataset:  55%|█████▌    | 1982/3596 [02:

Creating HDF5 dataset:  74%|███████▍  | 2674/3596 [02:32<00:37, 24.81it/s][A[A[A


Creating HDF5 dataset:  74%|███████▍  | 2677/3596 [02:32<00:37, 24.77it/s][A[A[A


Creating HDF5 dataset:  75%|███████▍  | 2680/3596 [02:32<00:35, 26.10it/s][A[A[A


Creating HDF5 dataset:  75%|███████▍  | 2683/3596 [02:33<00:35, 25.63it/s][A[A[A


Creating HDF5 dataset:  75%|███████▍  | 2687/3596 [02:33<00:33, 27.48it/s][A[A[A


Creating HDF5 dataset:  75%|███████▍  | 2691/3596 [02:33<00:29, 30.27it/s][A[A[A


Creating HDF5 dataset:  75%|███████▍  | 2695/3596 [02:33<00:32, 27.44it/s][A[A[A


Creating HDF5 dataset:  75%|███████▌  | 2699/3596 [02:33<00:31, 28.54it/s][A[A[A


Creating HDF5 dataset:  75%|███████▌  | 2702/3596 [02:33<00:31, 28.66it/s][A[A[A


Creating HDF5 dataset:  75%|███████▌  | 2706/3596 [02:33<00:30, 29.26it/s][A[A[A


Creating HDF5 dataset:  75%|███████▌  | 2709/3596 [02:33<00:33, 26.49it/s][A[A[A


Creating HDF5 dataset:  75%|███████▌  | 2712/3596 [02:

Creating HDF5 dataset:  92%|█████████▏| 3307/3596 [02:56<00:09, 31.16it/s][A[A[A


Creating HDF5 dataset:  92%|█████████▏| 3311/3596 [02:56<00:10, 28.33it/s][A[A[A


Creating HDF5 dataset:  92%|█████████▏| 3315/3596 [02:56<00:09, 29.27it/s][A[A[A


Creating HDF5 dataset:  92%|█████████▏| 3319/3596 [02:57<00:09, 29.38it/s][A[A[A


Creating HDF5 dataset:  92%|█████████▏| 3323/3596 [02:57<00:09, 29.77it/s][A[A[A


Creating HDF5 dataset:  93%|█████████▎| 3327/3596 [02:57<00:08, 30.15it/s][A[A[A


Creating HDF5 dataset:  93%|█████████▎| 3331/3596 [02:57<00:09, 28.72it/s][A[A[A


Creating HDF5 dataset:  93%|█████████▎| 3334/3596 [02:57<00:09, 28.03it/s][A[A[A


Creating HDF5 dataset:  93%|█████████▎| 3337/3596 [02:57<00:09, 27.24it/s][A[A[A


Creating HDF5 dataset:  93%|█████████▎| 3341/3596 [02:57<00:09, 27.82it/s][A[A[A


Creating HDF5 dataset:  93%|█████████▎| 3345/3596 [02:57<00:08, 29.59it/s][A[A[A


Creating HDF5 dataset:  93%|█████████▎| 3349/3596 [02:

Creating HDF5 dataset:  20%|██        | 207/1027 [00:19<01:14, 10.97it/s][A[A[A


Creating HDF5 dataset:  20%|██        | 209/1027 [00:19<01:12, 11.30it/s][A[A[A


Creating HDF5 dataset:  21%|██        | 211/1027 [00:20<01:12, 11.32it/s][A[A[A


Creating HDF5 dataset:  21%|██        | 213/1027 [00:20<01:12, 11.26it/s][A[A[A


Creating HDF5 dataset:  21%|██        | 215/1027 [00:20<01:12, 11.13it/s][A[A[A


Creating HDF5 dataset:  21%|██        | 217/1027 [00:20<01:13, 11.04it/s][A[A[A


Creating HDF5 dataset:  21%|██▏       | 219/1027 [00:20<01:11, 11.28it/s][A[A[A


Creating HDF5 dataset:  22%|██▏       | 221/1027 [00:20<01:11, 11.32it/s][A[A[A


Creating HDF5 dataset:  22%|██▏       | 223/1027 [00:21<01:12, 11.02it/s][A[A[A


Creating HDF5 dataset:  22%|██▏       | 225/1027 [00:21<01:14, 10.71it/s][A[A[A


Creating HDF5 dataset:  22%|██▏       | 227/1027 [00:21<01:11, 11.12it/s][A[A[A


Creating HDF5 dataset:  22%|██▏       | 229/1027 [00:21<01:10, 11

Creating HDF5 dataset:  69%|██████▉   | 711/1027 [00:48<00:11, 27.51it/s][A[A[A


Creating HDF5 dataset:  70%|██████▉   | 714/1027 [00:48<00:11, 28.18it/s][A[A[A


Creating HDF5 dataset:  70%|██████▉   | 717/1027 [00:48<00:10, 28.53it/s][A[A[A


Creating HDF5 dataset:  70%|███████   | 720/1027 [00:49<00:14, 21.23it/s][A[A[A


Creating HDF5 dataset:  70%|███████   | 723/1027 [00:49<00:14, 21.40it/s][A[A[A


Creating HDF5 dataset:  71%|███████   | 726/1027 [00:49<00:13, 21.77it/s][A[A[A


Creating HDF5 dataset:  71%|███████   | 729/1027 [00:49<00:12, 23.02it/s][A[A[A


Creating HDF5 dataset:  71%|███████▏  | 733/1027 [00:49<00:11, 25.11it/s][A[A[A


Creating HDF5 dataset:  72%|███████▏  | 736/1027 [00:49<00:11, 24.67it/s][A[A[A


Creating HDF5 dataset:  72%|███████▏  | 739/1027 [00:49<00:11, 25.52it/s][A[A[A


Creating HDF5 dataset:  72%|███████▏  | 742/1027 [00:49<00:11, 25.39it/s][A[A[A


Creating HDF5 dataset:  73%|███████▎  | 746/1027 [00:50<00:10, 27