research/object_detection/protos/preprocessor.proto

syntax = "proto2";

package object_detection.protos;

// Message for defining a preprocessing operation on input data.
// See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
// Next ID: 41
message PreprocessingStep {
  oneof preprocessing_step {
    NormalizeImage normalize_image = 1;
    RandomHorizontalFlip random_horizontal_flip = 2;
    RandomPixelValueScale random_pixel_value_scale = 3;
    RandomImageScale random_image_scale = 4;
    RandomRGBtoGray random_rgb_to_gray = 5;
    RandomAdjustBrightness random_adjust_brightness = 6;
    RandomAdjustContrast random_adjust_contrast = 7;
    RandomAdjustHue random_adjust_hue = 8;
    RandomAdjustSaturation random_adjust_saturation = 9;
    RandomDistortColor random_distort_color = 10;
    RandomJitterBoxes random_jitter_boxes = 11;
    RandomCropImage random_crop_image = 12;
    RandomPadImage random_pad_image = 13;
    RandomCropPadImage random_crop_pad_image = 14;
    RandomCropToAspectRatio random_crop_to_aspect_ratio = 15;
    RandomBlackPatches random_black_patches = 16;
    RandomResizeMethod random_resize_method = 17;
    ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18;
    ResizeImage resize_image = 19;
    SubtractChannelMean subtract_channel_mean = 20;
    SSDRandomCrop ssd_random_crop = 21;
    SSDRandomCropPad ssd_random_crop_pad = 22;
    SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23;
    SSDRandomCropPadFixedAspectRatio ssd_random_crop_pad_fixed_aspect_ratio =
        24;
    RandomVerticalFlip random_vertical_flip = 25;
    RandomRotation90 random_rotation90 = 26;
    RGBtoGray rgb_to_gray = 27;
    ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28;
    RandomAbsolutePadImage random_absolute_pad_image = 29;
    RandomSelfConcatImage random_self_concat_image = 30;
    AutoAugmentImage autoaugment_image = 31;
    DropLabelProbabilistically drop_label_probabilistically = 32;
    RemapLabels remap_labels = 33;
    RandomJpegQuality random_jpeg_quality = 34;
    RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35;
    RandomPatchGaussian random_patch_gaussian = 36;
    RandomSquareCropByScale random_square_crop_by_scale = 37;
    RandomScaleCropAndPadToSquare random_scale_crop_and_pad_to_square = 38;
    AdjustGamma adjust_gamma = 39;
  }
}

// Normalizes pixel values in an image.
// For every channel in the image, moves the pixel values from the range
// [original_minval, original_maxval] to [target_minval, target_maxval].
message NormalizeImage {
  optional float original_minval = 1;
  optional float original_maxval = 2;
  optional float target_minval = 3 [default = 0];
  optional float target_maxval = 4 [default = 1];
}

// Randomly horizontally flips the image and detections with the specified
// probability, default to 50% of the time.
message RandomHorizontalFlip {
  // Specifies a mapping from the original keypoint indices to horizontally
  // flipped indices. This is used in the event that keypoints are specified,
  // in which case when the image is horizontally flipped the keypoints will
  // need to be permuted. E.g. for keypoints representing left_eye, right_eye,
  // and nose (in that order), one might specify:
  // keypoint_flip_permutation below:
  // keypoint_flip_permutation: 1
  // keypoint_flip_permutation: 0
  // keypoint_flip_permutation: 2
  //
  //         original image         horizontally flipped image
  //  ___________________________   ___________________________
  //        /      /      /       |       \      \      \
  //  left eye, 0    right eye, 1 | left eye, 1    right eye, 0
  //            nose, 2           |            nose, 2
  //  ___________________________   ___________________________
  // If nothing is specified, the order of keypoint will be mantained.
  repeated int32 keypoint_flip_permutation = 1;

  // The probability of running this augmentation for each image.
  optional float probability = 2 [default = 0.5];
}

// Randomly vertically flips the image and detections with the specified
// probability, default to 50% of the time.
message RandomVerticalFlip {
  // Specifies a mapping from the original keypoint indices to vertically
  // flipped indices. This is used in the event that keypoints are specified,
  // in which case when the image is vertically flipped the keypoints will
  // need to be permuted. E.g. for keypoints representing left_eye, right_eye,
  // and nose (in that order), one might specify:
  // keypoint_flip_permutation: 1
  // keypoint_flip_permutation: 0
  // keypoint_flip_permutation: 2
  //  ___________________________
  //        /      /      /       |  original image
  //  left eye, 0    right eye, 1 |
  //            nose, 2           |
  //  ___________________________
  //        \      \      \       |  vertically flipped image
  //            nose, 2           |
  //  right eye, 0    left eye, 1 |
  //  ___________________________
  // If nothing is specified, the order of keypoint will be mantained.
  repeated int32 keypoint_flip_permutation = 1;

  // The probability of running this augmentation for each image.
  optional float probability = 2 [default = 0.5];
}

// Randomly rotates the image and detections by 90 degrees counter-clockwise
// with the specified probability, default to 50% of the time.
message RandomRotation90 {
  // Specifies a mapping from the original keypoint indices to 90 degree counter
  // clockwise indices. This is used in the event that keypoints are specified,
  // in which case when the image is rotated the keypoints might need to be
  // permuted.
  repeated int32 keypoint_rot_permutation = 1;

  // The probability of running this augmentation for each image.
  optional float probability = 2 [default = 0.5];
}

// Randomly scales the values of all pixels in the image by some constant value
// between [minval, maxval], then clip the value to a range between [0, 1.0].
message RandomPixelValueScale {
  optional float minval = 1 [default = 0.9];
  optional float maxval = 2 [default = 1.1];
}

// Randomly enlarges or shrinks image (keeping aspect ratio).
message RandomImageScale {
  optional float min_scale_ratio = 1 [default = 0.5];
  optional float max_scale_ratio = 2 [default = 2.0];
}

// Randomly convert entire image to grey scale.
message RandomRGBtoGray {
  optional float probability = 1 [default = 0.1];
}

// Randomly changes image brightness by up to max_delta. Image outputs will be
// saturated between 0 and 1.
message RandomAdjustBrightness {
  optional float max_delta = 1 [default = 0.2];
}

// Randomly scales contract by a value between [min_delta, max_delta].
message RandomAdjustContrast {
  optional float min_delta = 1 [default = 0.8];
  optional float max_delta = 2 [default = 1.25];
}

// Randomly alters hue by a value of up to max_delta.
message RandomAdjustHue {
  optional float max_delta = 1 [default = 0.02];
}

// Randomly changes saturation by a value between [min_delta, max_delta].
message RandomAdjustSaturation {
  optional float min_delta = 1 [default = 0.8];
  optional float max_delta = 2 [default = 1.25];
}

// Performs a random color distortion. color_orderings should either be 0 or 1.
message RandomDistortColor {
  optional int32 color_ordering = 1;
}

// Randomly jitters corners of boxes in the image determined by ratio.
// ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4].
message RandomJitterBoxes {
  optional float ratio = 1 [default = 0.05];

  enum JitterMode {
    DEFAULT = 0;
    EXPAND = 1;
    SHRINK = 2;
    EXPAND_SYMMETRIC = 4;
    SHRINK_SYMMETRIC = 5;
    EXPAND_SYMMETRIC_XY = 6;
    SHRINK_SYMMETRIC_XY = 7;
  }
  // The mode of jittering
  // EXPAND - Only expands boxes
  // SHRINK - Only shrinks boxes
  // EXPAND_SYMMETRIC - Expands the boxes symmetrically along height and width
  //   dimensions without changing the box center. The ratios of expansion along
  //   X, Y dimensions are independent.
  // SHRINK_SYMMETRIC - Shrinks the boxes symmetrically along height and width
  //   dimensions without changing the box center. The ratios of shrinking along
  //   X, Y dimensions are independent.
  // EXPAND_SYMMETRIC_XY - Expands the boxes symetrically along height and
  //   width dimensions and the ratio of expansion is same for both.
  // SHRINK_SYMMETRIC_XY - Shrinks the boxes symetrically along height and
  //   width dimensions and the ratio of shrinking is same for both.
  // DEFAULT - Jitters each box boundary independently.
  optional JitterMode jitter_mode = 2 [default = DEFAULT];
}

// Randomly crops the image and bounding boxes.
message RandomCropImage {
  // Cropped image must cover at least one box by this fraction.
  optional float min_object_covered = 1 [default = 1.0];

  // Aspect ratio bounds of cropped image.
  optional float min_aspect_ratio = 2 [default = 0.75];
  optional float max_aspect_ratio = 3 [default = 1.33];

  // Allowed area ratio of cropped image to original image.
  optional float min_area = 4 [default = 0.1];
  optional float max_area = 5 [default = 1.0];

  // Minimum overlap threshold of cropped boxes to keep in new image. If the
  // ratio between a cropped bounding box and the original is less than this
  // value, it is removed from the new image.
  optional float overlap_thresh = 6 [default = 0.3];

  // Whether to clip the boxes to the cropped image.
  optional bool clip_boxes = 8 [default = true];

  // Probability of keeping the original image.
  optional float random_coef = 7 [default = 0.0];
}

// Randomly adds padding to the image.
message RandomPadImage {
  // Minimum dimensions for padded image. If unset, will use original image
  // dimension as a lower bound.
  optional int32 min_image_height = 1;
  optional int32 min_image_width = 2;

  // Maximum dimensions for padded image. If unset, will use double the original
  // image dimension as a lower bound.
  optional int32 max_image_height = 3;
  optional int32 max_image_width = 4;

  // Color of the padding. If unset, will pad using average color of the input
  // image.
  repeated float pad_color = 5;
}

// Randomly adds a padding of size [0, max_height_padding), [0,
// max_width_padding).
message RandomAbsolutePadImage {
  // Height will be padded uniformly at random from [0, max_height_padding).
  optional int32 max_height_padding = 1;
  // Width will be padded uniformly at random from [0, max_width_padding).
  optional int32 max_width_padding = 2;

  // Color of the padding. If unset, will pad using average color of the input
  // image.
  repeated float pad_color = 3;
}

// Randomly crops an image followed by a random pad.
message RandomCropPadImage {
  // Cropping operation must cover at least one box by this fraction.
  optional float min_object_covered = 1 [default = 1.0];

  // Aspect ratio bounds of image after cropping operation.
  optional float min_aspect_ratio = 2 [default = 0.75];
  optional float max_aspect_ratio = 3 [default = 1.33];

  // Allowed area ratio of image after cropping operation.
  optional float min_area = 4 [default = 0.1];
  optional float max_area = 5 [default = 1.0];

  // Minimum overlap threshold of cropped boxes to keep in new image. If the
  // ratio between a cropped bounding box and the original is less than this
  // value, it is removed from the new image.
  optional float overlap_thresh = 6 [default = 0.3];

  // Whether to clip the boxes to the cropped image.
  optional bool clip_boxes = 11 [default = true];

  // Probability of keeping the original image during the crop operation.
  optional float random_coef = 7 [default = 0.0];

  // Maximum dimensions for padded image. If unset, will use double the original
  // image dimension as a lower bound. Both of the following fields should be
  // length 2.
  repeated float min_padded_size_ratio = 8;
  repeated float max_padded_size_ratio = 9;

  // Color of the padding. If unset, will pad using average color of the input
  // image. This field should be of length 3.
  repeated float pad_color = 10;
}

// Randomly crops an iamge to a given aspect ratio.
message RandomCropToAspectRatio {
  // Aspect ratio.
  optional float aspect_ratio = 1 [default = 1.0];

  // Minimum overlap threshold of cropped boxes to keep in new image. If the
  // ratio between a cropped bounding box and the original is less than this
  // value, it is removed from the new image.
  optional float overlap_thresh = 2 [default = 0.3];

  // Whether to clip the boxes to the cropped image.
  optional bool clip_boxes = 3 [default = true];
}

// Randomly adds black square patches to an image.
message RandomBlackPatches {
  // The maximum number of black patches to add.
  optional int32 max_black_patches = 1 [default = 10];

  // The probability of a black patch being added to an image.
  optional float probability = 2 [default = 0.5];

  // Ratio between the dimension of the black patch to the minimum dimension of
  // the image (patch_width = patch_height = min(image_height, image_width)).
  optional float size_to_image_ratio = 3 [default = 0.1];
}

// Randomly resizes the image up to [target_height, target_width].
message RandomResizeMethod {
  optional int32 target_height = 1;
  optional int32 target_width = 2;
}

// Converts the RGB image to a grayscale image. This also converts the image
// depth from 3 to 1, unlike RandomRGBtoGray which does not change the image
// depth.
message RGBtoGray {}

// Scales boxes from normalized coordinates to pixel coordinates.
message ScaleBoxesToPixelCoordinates {}

// Resizes images to [new_height, new_width].
message ResizeImage {
  optional int32 new_height = 1;
  optional int32 new_width = 2;
  enum Method {
    AREA = 1;
    BICUBIC = 2;
    BILINEAR = 3;
    NEAREST_NEIGHBOR = 4;
  }
  optional Method method = 3 [default = BILINEAR];
}

// Normalizes an image by subtracting a mean from each channel.
message SubtractChannelMean {
  // The mean to subtract from each channel. Should be of same dimension of
  // channels in the input image.
  repeated float means = 1;
}

message SSDRandomCropOperation {
  // Cropped image must cover at least this fraction of one original bounding
  // box.
  optional float min_object_covered = 1;

  // The aspect ratio of the cropped image must be within the range of
  // [min_aspect_ratio, max_aspect_ratio].
  optional float min_aspect_ratio = 2;
  optional float max_aspect_ratio = 3;

  // The area of the cropped image must be within the range of
  // [min_area, max_area].
  optional float min_area = 4;
  optional float max_area = 5;

  // Cropped box area ratio must be above this threhold to be kept.
  optional float overlap_thresh = 6;

  // Whether to clip the boxes to the cropped image.
  optional bool clip_boxes = 8 [default = true];

  // Probability a crop operation is skipped.
  optional float random_coef = 7;
}

// Randomly crops a image according to:
//     Liu et al., SSD: Single shot multibox detector.
// This preprocessing step defines multiple SSDRandomCropOperations. Only one
// operation (chosen at random) is actually performed on an image.
message SSDRandomCrop {
  repeated SSDRandomCropOperation operations = 1;
}

message SSDRandomCropPadOperation {
  // Cropped image must cover at least this fraction of one original bounding
  // box.
  optional float min_object_covered = 1;

  // The aspect ratio of the cropped image must be within the range of
  // [min_aspect_ratio, max_aspect_ratio].
  optional float min_aspect_ratio = 2;
  optional float max_aspect_ratio = 3;

  // The area of the cropped image must be within the range of
  // [min_area, max_area].
  optional float min_area = 4;
  optional float max_area = 5;

  // Cropped box area ratio must be above this threhold to be kept.
  optional float overlap_thresh = 6;

  // Whether to clip the boxes to the cropped image.
  optional bool clip_boxes = 13 [default = true];

  // Probability a crop operation is skipped.
  optional float random_coef = 7;

  // Min ratio of padded image height and width to the input image's height and
  // width. Two entries per operation.
  repeated float min_padded_size_ratio = 8;

  // Max ratio of padded image height and width to the input image's height and
  // width. Two entries per operation.
  repeated float max_padded_size_ratio = 9;

  // Padding color.
  optional float pad_color_r = 10;
  optional float pad_color_g = 11;
  optional float pad_color_b = 12;
}

// Randomly crops and pads an image according to:
//     Liu et al., SSD: Single shot multibox detector.
// This preprocessing step defines multiple SSDRandomCropPadOperations. Only one
// operation (chosen at random) is actually performed on an image.
message SSDRandomCropPad {
  repeated SSDRandomCropPadOperation operations = 1;
}

message SSDRandomCropFixedAspectRatioOperation {
  // Cropped image must cover at least this fraction of one original bounding
  // box.
  optional float min_object_covered = 1;

  // The area of the cropped image must be within the range of
  // [min_area, max_area].
  optional float min_area = 4;
  optional float max_area = 5;

  // Cropped box area ratio must be above this threhold to be kept.
  optional float overlap_thresh = 6;

  // Whether to clip the boxes to the cropped image.
  optional bool clip_boxes = 8 [default = true];

  // Probability a crop operation is skipped.
  optional float random_coef = 7;
}

// Randomly crops a image to a fixed aspect ratio according to:
//     Liu et al., SSD: Single shot multibox detector.
// Multiple SSDRandomCropFixedAspectRatioOperations are defined by this
// preprocessing step. Only one operation (chosen at random) is actually
// performed on an image.
message SSDRandomCropFixedAspectRatio {
  repeated SSDRandomCropFixedAspectRatioOperation operations = 1;

  // Aspect ratio to crop to. This value is used for all crop operations.
  optional float aspect_ratio = 2 [default = 1.0];
}

message SSDRandomCropPadFixedAspectRatioOperation {
  // Cropped image must cover at least this fraction of one original bounding
  // box.
  optional float min_object_covered = 1;

  // The aspect ratio of the cropped image must be within the range of
  // [min_aspect_ratio, max_aspect_ratio].
  optional float min_aspect_ratio = 2;
  optional float max_aspect_ratio = 3;

  // The area of the cropped image must be within the range of
  // [min_area, max_area].
  optional float min_area = 4;
  optional float max_area = 5;

  // Cropped box area ratio must be above this threhold to be kept.
  optional float overlap_thresh = 6;

  // Whether to clip the boxes to the cropped image.
  optional bool clip_boxes = 8 [default = true];

  // Probability a crop operation is skipped.
  optional float random_coef = 7;
}

// Randomly crops and pads an image to a fixed aspect ratio according to:
//     Liu et al., SSD: Single shot multibox detector.
// Multiple SSDRandomCropPadFixedAspectRatioOperations are defined by this
// preprocessing step. Only one operation (chosen at random) is actually
// performed on an image.
message SSDRandomCropPadFixedAspectRatio {
  repeated SSDRandomCropPadFixedAspectRatioOperation operations = 1;

  // Aspect ratio to pad to. This value is used for all crop and pad operations.
  optional float aspect_ratio = 2 [default = 1.0];

  // Min ratio of padded image height and width to the input image's height and
  // width. Two entries per operation.
  repeated float min_padded_size_ratio = 3;

  // Max ratio of padded image height and width to the input image's height and
  // width. Two entries per operation.
  repeated float max_padded_size_ratio = 4;
}

// Converts class logits to softmax optionally scaling the values by temperature
// first.
message ConvertClassLogitsToSoftmax {
  // Scale to use on logits before applying softmax.
  optional float temperature = 1 [default = 1.0];
}

// Randomly concatenates the image with itself horizontally and/or vertically.
message RandomSelfConcatImage {
  // Probability of concatenating the image vertically.
  optional float concat_vertical_probability = 1 [default = 0.1];
  // Probability of concatenating the image horizontally.
  optional float concat_horizontal_probability = 2 [default = 0.1];
}

// Apply an Autoaugment policy to the image and bounding boxes.
message AutoAugmentImage {
  // What AutoAugment policy to apply to the Image
  optional string policy_name = 1 [default = "v0"];
}

// Randomly drops ground truth boxes for a label with some probability.
message DropLabelProbabilistically {
  // The label that should be dropped. This corresponds to one of the entries
  // in the label map.
  optional int32 label = 1;
  // Probability of dropping the label.
  optional float drop_probability = 2 [default = 1.0];
}

// Remap a set of labels to a new label.
message RemapLabels {
  // Labels to be remapped.
  repeated int32 original_labels = 1;
  // Label to map to.
  optional int32 new_label = 2;
}

// Applies a jpeg encoding with a random quality factor.
message RandomJpegQuality {
  // Probability of keeping the original image.
  optional float random_coef = 1 [default = 0.0];

  // Minimum jpeg quality to use.
  optional int32 min_jpeg_quality = 2 [default = 0];

  // Maximum jpeg quality to use.
  optional int32 max_jpeg_quality = 3 [default = 100];
}

// Randomly shrinks image (keeping aspect ratio) to a target number of pixels.
// If the image contains less than the chosen target number of pixels, it will
// not be changed.
message RandomDownscaleToTargetPixels {
  // Probability of keeping the original image.
  optional float random_coef = 1 [default = 0.0];

  // The target number of pixels will be chosen to be in the range
  // [min_target_pixels, max_target_pixels]
  optional int32 min_target_pixels = 2 [default = 300000];
  optional int32 max_target_pixels = 3 [default = 500000];
}

message RandomPatchGaussian {
  // Probability of keeping the original image.
  optional float random_coef = 1 [default = 0.0];

  // The patch size will be chosen to be in the range
  // [min_patch_size, max_patch_size).
  optional int32 min_patch_size = 2 [default = 1];
  optional int32 max_patch_size = 3 [default = 250];

  // The standard deviation of the gaussian noise applied within the patch will
  // be chosen to be in the range [min_gaussian_stddev, max_gaussian_stddev).
  optional float min_gaussian_stddev = 4 [default = 0.0];
  optional float max_gaussian_stddev = 5 [default = 1.0];
}

// Extract a square sized crop from an image whose side length is sampled by
// randomly scaling the maximum spatial dimension of the image. If part of the
// crop falls outside the image, it is filled with zeros.
// The augmentation is borrowed from [1]
// [1]: https://arxiv.org/abs/1904.07850
message RandomSquareCropByScale {
  // The maximum size of the border. The border defines distance in pixels to
  // the image boundaries that will not be considered as a center of a crop.
  // To make sure that the border does not go over the center of the image,
  // we chose the border value by computing the minimum k, such that
  // (max_border / (2**k)) < image_dimension/2
  optional int32 max_border = 1 [default = 128];

  // The minimum and maximum values of scale.
  optional float scale_min = 2 [default = 0.6];
  optional float scale_max = 3 [default = 1.3];

  // The number of discrete scale values to randomly sample between
  // [min_scale, max_scale]
  optional int32 num_scales = 4 [default = 8];
}

// Randomly scale, crop, and then pad an image to the desired square output
// dimensions. Specifically, this method first samples a random_scale factor
// from a uniform distribution between scale_min and scale_max, and then resizes
// the image such that it's maximum dimension is (output_size * random_scale).
// Secondly, a square output_size crop is extracted from the resized image, and
// finally the cropped region is padded to the desired square output_size.
// The augmentation is borrowed from [1]
// [1]: https://arxiv.org/abs/1911.09070
message RandomScaleCropAndPadToSquare {
  // The (square) output image size
  optional int32 output_size = 1 [default = 512];

  // The minimum and maximum values from which to sample the random scale.
  optional float scale_min = 2 [default = 0.1];
  optional float scale_max = 3 [default = 2.0];
}

// Adjusts the gamma of the image using the specified gamma and gain values.
message AdjustGamma {
  optional float gamma = 1 [default = 1.0];
  optional float gain = 2 [default = 1.0];
}