This repository has been archived by the owner on Dec 31, 2020. It is now read-only.
forked from NVIDIA/caffe
-
Notifications
You must be signed in to change notification settings - Fork 35
/
caffe.proto
2460 lines (2197 loc) · 98.8 KB
/
caffe.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
syntax = "proto2";
package caffe;
// Math and storage types
enum Type {
DOUBLE = 0;
FLOAT = 1;
FLOAT16 = 2;
INT = 3; // math not supported
UINT = 4; // math not supported
BOOL = 5; //math not supported
}
enum Packing {
NCHW = 0;
NHWC = 1;
}
// Specifies the shape (dimensions) of a Blob.
message BlobShape {
repeated int64 dim = 1 [packed = true];
}
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true];
repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true];
// New raw storage (faster and takes 1/2 of space for FP16)
optional Type raw_data_type = 10;
optional Type raw_diff_type = 11;
optional bytes raw_data = 12 [packed = false];
optional bytes raw_diff = 13 [packed = false];
// 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [default = 0];
optional int32 channels = 2 [default = 0];
optional int32 height = 3 [default = 0];
optional int32 width = 4 [default = 0];
}
// The BlobProtoVector is simply a way to pass multiple blobproto instances
// around.
message BlobProtoVector {
repeated BlobProto blobs = 1;
}
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
// the actual image data, in bytes
optional bytes data = 4;
optional int32 label = 5;
// Optionally, the datum could also hold float data.
repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [default = false];
// Unique record index assigned by Reader
optional uint32 record_id = 8 [default = 0];
}
// The label (display) name and label id.
message LabelMapItem {
// Both name and label are required.
optional string name = 1;
optional int32 label = 2;
// display_name is optional.
optional string display_name = 3;
}
message LabelMap {
repeated LabelMapItem item = 1;
}
// Sample a bbox in the normalized space [0, 1] with provided constraints.
message Sampler {
// Minimum scale of the sampled bbox.
optional float min_scale = 1 [default = 1.];
// Maximum scale of the sampled bbox.
optional float max_scale = 2 [default = 1.];
// Minimum aspect ratio of the sampled bbox.
optional float min_aspect_ratio = 3 [default = 1.];
// Maximum aspect ratio of the sampled bbox.
optional float max_aspect_ratio = 4 [default = 1.];
}
// Constraints for selecting sampled bbox.
message SampleConstraint {
// Minimum Jaccard overlap between sampled bbox and all bboxes in
// AnnotationGroup.
optional float min_jaccard_overlap = 1;
// Maximum Jaccard overlap between sampled bbox and all bboxes in
// AnnotationGroup.
optional float max_jaccard_overlap = 2;
// Minimum coverage of sampled bbox by all bboxes in AnnotationGroup.
optional float min_sample_coverage = 3;
// Maximum coverage of sampled bbox by all bboxes in AnnotationGroup.
optional float max_sample_coverage = 4;
// Minimum coverage of all bboxes in AnnotationGroup by sampled bbox.
optional float min_object_coverage = 5;
// Maximum coverage of all bboxes in AnnotationGroup by sampled bbox.
optional float max_object_coverage = 6;
}
// Sample a batch of bboxes with provided constraints.
message BatchSampler {
// Use original image as the source for sampling.
optional bool use_original_image = 1 [default = true];
// Constraints for sampling bbox.
optional Sampler sampler = 2;
// Constraints for determining if a sampled bbox is positive or negative.
optional SampleConstraint sample_constraint = 3;
// If provided, break when found certain number of samples satisfing the
// sample_constraint.
optional uint32 max_sample = 4;
// Maximum number of trials for sampling to avoid infinite loop.
optional uint32 max_trials = 5 [default = 100];
}
// Condition for emitting annotations.
message EmitConstraint {
enum EmitType {
CENTER = 0;
MIN_OVERLAP = 1;
}
optional EmitType emit_type = 1 [default = CENTER];
// If emit_type is MIN_OVERLAP, provide the emit_overlap.
optional float emit_overlap = 2;
}
// The normalized bounding box [0, 1] w.r.t. the input image size.
message NormalizedBBox {
optional float xmin = 1;
optional float ymin = 2;
optional float xmax = 3;
optional float ymax = 4;
optional int32 label = 5;
optional bool difficult = 6;
optional float score = 7;
optional float size = 8;
}
// Annotation for each object instance.
message Annotation {
optional int32 instance_id = 1 [default = 0];
optional NormalizedBBox bbox = 2;
}
// Group of annotations for a particular label.
message AnnotationGroup {
optional int32 group_label = 1;
repeated Annotation annotation = 2;
}
// An extension of Datum which contains "rich" annotations.
message AnnotatedDatum {
enum AnnotationType {
BBOX = 0;
}
optional Datum datum = 1;
// If there are "rich" annotations, specify the type of annotation.
// Currently it only supports bounding box.
// If there are no "rich" annotations, use label in datum instead.
optional AnnotationType type = 2;
// Each group contains annotation for a particular class.
repeated AnnotationGroup annotation_group = 3;
// Unique record index assigned by Reader
optional uint32 record_id = 4 [default = 0];
}
enum DatumTypeInfo {
DatumTypeInfo_DATUM = 0;
DatumTypeInfo_ANNOTATED_DATUM = 1;
}
// Caffe 2 datasets support
message C2TensorProto {
// The dimensions in the tensor.
repeated int64 dims = 1;
enum DataType {
UNDEFINED = 0;
FLOAT = 1; // float
INT32 = 2; // int
BYTE = 3; // BYTE, when deserialized, is going to be restored as uint8.
STRING = 4; // string
// Less-commonly used data types.
BOOL = 5; // bool
UINT8 = 6; // uint8_t
INT8 = 7; // int8_t
UINT16 = 8; // uint16_t
INT16 = 9; // int16_t
INT64 = 10; // int64_t
FLOAT16 = 12; // caffe2::__f16, caffe2::float16
DOUBLE = 13; // double
}
optional DataType data_type = 2 [default = FLOAT];
// For float
repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true];
// For bytes
optional bytes byte_data = 5;
// For strings
repeated bytes string_data = 6;
// For double
repeated double double_data = 9 [packed = true];
// For int64
repeated int64 int64_data = 10 [packed = true];
// Optionally, a name for the tensor.
optional string name = 7;
}
message C2TensorProtos {
repeated C2TensorProto protos = 1;
}
message FillerParameter {
// The filler type. Can be one of the following:
// constant, gaussian, positive_unitball, uniform, xavier,
// msra, bilinear, or their static versions:
// gaussianstatic, positive_unitballstatic, uniformstatic,
// xavierstatic, msrastatic, bilinearstatic
// In the static version the random number generator is only
// called once, and subsequent calls to fill blob, will write
// the same random numbers as in the first call. This is done
// to save time in cases where having same random numbers does
// not make a difference.
optional string type = 1 [default = 'constant'];
optional float value = 2 [default = 0]; // the value in constant filler
optional float min = 3 [default = 0]; // the min value in uniform filler
optional float max = 4 [default = 1]; // the max value in uniform filler
optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
optional float std = 6 [default = 1]; // the std value in Gaussian filler
// The expected number of non-zero output weights for a given input in
// Gaussian filler -- the default -1 means don't perform sparsification.
optional int32 sparse = 7 [default = -1];
// Normalize the filler variance by fan_in, fan_out, or their average.
// Applies to 'xavier' and 'msra' fillers.
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
AVERAGE = 2;
}
optional VarianceNorm variance_norm = 8 [default = FAN_IN];
}
message NetParameter {
optional string name = 1; // consider giving the network a name
// DEPRECATED. See InputParameter. The input blobs to the network.
repeated string input = 3;
// DEPRECATED. See InputParameter. The shape of the input blobs.
repeated BlobShape input_shape = 8;
// 4D input dimensions -- deprecated. Use "input_shape" instead.
// If specified, for each input blob there should be four
// values specifying the num, channels, height and width of the input blob.
// Thus, there should be a total of (4 * #input) numbers.
repeated int32 input_dim = 4;
// Whether the network will force every layer to carry out backward operation.
// If set False, then whether to carry out backward is determined
// automatically according to the net structure and learning rates.
optional bool force_backward = 5 [default = false];
// The current "state" of the network, including the phase, level, and stage.
// Some layers may be included/excluded depending on this state and the states
// specified in the layers' include and exclude fields.
optional NetState state = 6;
// Print debugging information about results while running Net::Forward,
// Net::Backward, and Net::Update.
optional bool debug_info = 7 [default = false];
// The layers that make up the net. Each of their configurations, including
// connectivity and behavior, is specified as a LayerParameter.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
// DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2;
// Default types for all layers
// These work only when layer-specific ones are omitted
optional Type default_forward_type = 11 [default = FLOAT];
optional Type default_backward_type = 12 [default = FLOAT];
optional Type default_forward_math = 13 [default = FLOAT];
optional Type default_backward_math = 14 [default = FLOAT];
// Global gradient scaling coefficient K (default - no scaling)
//
// Scenario 1: global_grad_scale_adaptive: true
// If positive, gradients scaled by K*L where L is L_2 norm of all gradients in a Net.
// This helps to improve accuracy of reduced precision training.
//
// Scenario 2: global_grad_scale_adaptive: false
// If positive, gradients scaled by K.
optional float global_grad_scale = 15 [default = 1.];
optional bool global_grad_scale_adaptive = 16 [default = false];
// Sets the default "conv_algos_override" value for every convolution layer
optional string default_conv_algos_override = 17 [default = "-1,-1,-1"];
// While using multiple GPUs we have to run reduction process after every iteration.
// For better performance we unify multiple layers in buckets.
// This parameter sets approximate number of buckets to combine layers to.
// Default value is good for majority of nets.
optional int32 reduce_buckets = 18 [default = 3];
// Sets the default "cudnn_math_override" value for every layer
optional int32 default_cudnn_math_override = 19 [default = -1];
optional bool quantize = 200 [default = false];
optional NetQuantizationParameter net_quantization_param = 201;
}
// NOTE
// Update the next available ID when you add a new SolverParameter field.
//
// SolverParameter next available ID: 55 (last added: test_and_snapshot_last_epochs)
message SolverParameter {
//////////////////////////////////////////////////////////////////////////////
// Specifying the train and test networks
//
// Exactly one train net must be specified using one of the following fields:
// train_net_param, train_net, net_param, net
// One or more test nets may be specified using any of the following fields:
// test_net_param, test_net, net_param, net
// If more than one test net field is specified (e.g., both net and
// test_net are specified), they will be evaluated in the field order given
// above: (1) test_net_param, (2) test_net, (3) net_param/net.
// A test_iter must be specified for each test_net.
// A test_level and/or a test_stage may also be specified for each test_net.
//////////////////////////////////////////////////////////////////////////////
// Proto filename for the train net, possibly combined with one or more
// test nets.
optional string net = 24;
// Inline train net param, possibly combined with one or more test nets.
optional NetParameter net_param = 25;
optional string train_net = 1; // Proto filename for the train net.
repeated string test_net = 2; // Proto filenames for the test nets.
optional NetParameter train_net_param = 21; // Inline train net params.
repeated NetParameter test_net_param = 22; // Inline test net params.
// The states for the train/test nets. Must be unspecified or
// specified once per net.
//
// By default, all states will have solver = true;
// train_state will have phase = TRAIN,
// and all test_state's will have phase = TEST.
// Other defaults are set according to the NetState defaults.
optional NetState train_state = 26;
repeated NetState test_state = 27;
// Evaluation type.
optional string eval_type = 241 [default = "classification"];
// ap_version: different ways of computing Average Precision.
// Check https://sanchom.wordpress.com/tag/average-precision/ for details.
// 11point: the 11-point interpolated average precision. Used in VOC2007.
// MaxIntegral: maximally interpolated AP. Used in VOC2012/ILSVRC.
// Integral: the natural integral of the precision-recall curve.
optional string ap_version = 242 [default = "Integral"];
// If true, display per class result.
optional bool show_per_class_result = 244 [default = false];
// The number of iterations for each test net.
repeated int32 test_iter = 3;
// The number of iterations between two testing phases.
optional int32 test_interval = 4 [default = 0];
optional bool test_compute_loss = 19 [default = false];
// If true, run an initial test pass before the first iteration,
// ensuring memory availability and printing the starting value of the loss.
optional bool test_initialization = 32 [default = false];
optional int32 rampup_interval = 41 [default = 0];
optional float rampup_lr = 42 [default = 0.];
optional float min_lr = 43 [default = 0.];
optional float base_lr = 5; // The base learning rate
// the number of iterations between displaying info. If display = 0, no info
// will be displayed.
optional int32 display = 6;
// Display the loss averaged over the last average_loss iterations
optional int32 average_loss = 33 [default = 1];
optional int32 max_iter = 7; // the maximum number of iterations
// accumulate gradients over `iter_size` x `batch_size` instances
optional int32 iter_size = 36 [default = 1];
// The learning rate decay policy. The currently implemented learning rate
// policies are as follows:
// - fixed: always return base_lr.
// - step: return base_lr * gamma ^ (floor(iter / step))
// - exp: return base_lr * gamma ^ iter
// - inv: return base_lr * (1 + gamma * iter) ^ (- power)
// - multistep: similar to step but it allows non uniform steps defined by
// stepvalue
// - poly: the effective learning rate follows a polynomial decay, to be
// zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
// - sigmoid: the effective learning rate follows a sigmod decay
// return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
// - plateau: decreases lr
// if the minimum loss isn't updated for 'plateau_winsize' iters
//
// where base_lr, max_iter, gamma, step, stepvalue and power are defined
// in the solver parameter protocol buffer, and iter is the current iteration.
optional string lr_policy = 8;
optional float gamma = 9; // The parameter to compute the learning rate.
optional float power = 10; // The parameter to compute the learning rate.
optional float momentum = 11; // The momentum value.
optional string momentum_policy = 46 [default = "fixed"];
optional float max_momentum = 47 [default = 0.99];
optional float momentum_power = 48 [default = 1.];
// LARC - Layer-wise Adaptive Rate Control
optional bool larc = 49 [default = false];
optional string larc_policy = 50 [default = "scale"];
optional float larc_eta = 51 [default = 0.001];
optional float weight_decay = 12; // The weight decay.
optional string weight_decay_policy = 52 [default = "fixed"];
optional float weight_decay_power = 53 [default = 0.5];
// regularization types supported: L1 and L2
// controlled by weight_decay
optional string regularization_type = 29 [default = "L2"];
// the stepsize for learning rate policy "step"
optional int32 stepsize = 13;
// the stepsize for learning rate policy "multistep"
repeated int32 stepvalue = 34;
// the stepsize for learning rate policy "plateau"
repeated int32 plateau_winsize = 243;
// Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
// whenever their actual L2 norm is larger.
optional float clip_gradients = 35 [default = -1];
optional int32 snapshot = 14 [default = 0]; // The snapshot interval
optional string snapshot_prefix = 15; // The prefix for the snapshot.
// whether to snapshot diff in the results or not. Snapshotting diff will help
// debugging but the final protocol buffer size will be much larger.
optional bool snapshot_diff = 16 [default = false];
enum SnapshotFormat {
HDF5 = 0;
BINARYPROTO = 1;
}
optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
// the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
enum SolverMode {
CPU = 0;
GPU = 1;
}
optional SolverMode solver_mode = 17 [default = GPU];
// the device_id will that be used in GPU mode. Use device_id = 0 in default.
optional int32 device_id = 18 [default = 0];
// If non-negative, the seed with which the Solver will initialize the Caffe
// random number generator -- useful for reproducible results. Otherwise,
// (and by default) initialize using a seed derived from the system clock.
optional int64 random_seed = 20 [default = -1];
// type of the solver
optional string type = 40 [default = "SGD"];
// numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
optional float delta = 31 [default = 1e-8];
// parameters for the Adam solver
optional float momentum2 = 39 [default = 0.999];
// RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38 [default = 0.99];
// If true, print information about the state of the net that may help with
// debugging learning problems.
optional bool debug_info = 23 [default = false];
// If false, don't save a snapshot after training finishes.
optional bool snapshot_after_train = 28 [default = true];
// DEPRECATED: old solver enum types, use string instead
enum SolverType {
SGD = 0;
NESTEROV = 1;
ADAGRAD = 2;
RMSPROP = 3;
ADADELTA = 4;
ADAM = 5;
}
// DEPRECATED: use type instead of solver_type
optional SolverType solver_type = 30 [default = SGD];
// Type used for storing weights and history
optional Type solver_data_type = 44 [default = FLOAT];
// If true:
// * Stores blobs in old (less efficient) BVLC-compatible format.
// * FP16 blobs are converted to FP32 and stored in 'data' container.
// * FP32 blobs are stored in 'data' container.
// * FP64 blobs are stored in 'double_data' container.
optional bool store_blobs_in_old_format = 45 [default = false];
// If set to N>0, makes Caffe to test and snapshot last N epochs
optional int32 test_and_snapshot_last_epochs = 54 [default = 0];
// Ignore mismatching blobs and continue while loading weights
optional bool ignore_shape_mismatch = 150 [default = true];
// Wite additional snapshot in txt format
optional bool snapshot_log = 151 [default = false];
// Sparsity params
optional int32 display_sparsity = 152 [default = 0];
optional SparseMode sparse_mode = 153 [default = SPARSE_NONE];
optional float sparsity_target = 154 [default = 0.0]; //desired sparsity as a fraction
optional float sparsity_step_factor = 155 [default = 0.01]; //sprsity step factor. 0.01 is 1%
optional int32 sparsity_step_iter = 156 [default = 1000]; //sprsity step increment iterations
optional int32 sparsity_start_iter = 157 [default = 0]; //sprsity start iteration
optional float sparsity_start_factor = 158 [default = 0.0]; //sprsity start factor
optional float sparsity_threshold_maxratio = 159 [default = 0.2]; //default ratio of the max threshold allowed to the max weight
optional bool sparsity_itr_increment_bfr_applying = 160 [default = true]; //true:itr increment before applying sparsity(old behaviour), false: after applying sparsity
optional float sparsity_threshold_value_max = 161 [default = 0.2]; //threshold_value_max
}
// A message that stores the solver snapshots
message SolverState {
optional int32 iter = 1; // The current iteration
optional string learned_net = 2; // The file that stores the learned net.
repeated BlobProto history = 3; // The history for sgd solvers
optional int32 current_step = 4 [default = 0]; // The current step for learning rate
optional float minimum_loss = 5 [default = 1E38]; // Historical minimum loss
optional int32 iter_last_event = 6 [default = 0]; // The iteration when last lr-update or min_loss-update happend
}
enum Phase {
TRAIN = 0;
TEST = 1;
}
message NetState {
optional Phase phase = 1 [default = TEST];
optional int32 level = 2 [default = 0];
repeated string stage = 3;
}
message NetStateRule {
// Set phase to require the NetState have a particular phase (TRAIN or TEST)
// to meet this rule.
optional Phase phase = 1;
// Set the minimum and/or maximum levels in which the layer should be used.
// Leave undefined to meet the rule regardless of level.
optional int32 min_level = 2;
optional int32 max_level = 3;
// Customizable sets of stages to include or exclude.
// The net must have ALL of the specified stages and NONE of the specified
// "not_stage"s to meet the rule.
// (Use multiple NetStateRules to specify conjunctions of stages.)
repeated string stage = 4;
repeated string not_stage = 5;
}
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
message ParamSpec {
// The names of the parameter blobs -- useful for sharing parameters among
// layers, but never required otherwise. To share a parameter between two
// layers, give it a (non-empty) name.
optional string name = 1;
// Whether to require shared weights to have the same shape, or just the same
// count -- defaults to STRICT if unspecified.
optional DimCheckMode share_mode = 2;
enum DimCheckMode {
// STRICT (default) requires that num, channels, height, width each match.
STRICT = 0;
// PERMISSIVE requires only the count (num*channels*height*width) to match.
PERMISSIVE = 1;
}
// The multiplier on the global learning rate for this parameter.
optional float lr_mult = 3 [default = 1.0];
// The multiplier on the global weight decay for this parameter.
optional float decay_mult = 4 [default = 1.0];
}
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 152 (last added: recurrent_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
// Type returned by Forward routines of a particular layer (aka 'Ftype')
optional Type forward_type = 145 [default = FLOAT];
// Type returned by Backward routines of a particular layer (aka 'Btype')
optional Type backward_type = 146 [default = FLOAT];
// Internal math types. Works for those layers where internal math type
// could be different compared to Ftype or Btype. For example, so called
// "pseudo fp32 mode" in convolution layers. For other layers has no meaning.
optional Type forward_math = 147 [default = FLOAT];
optional Type backward_math = 148 [default = FLOAT];
optional bool debug = 149 [default = false];
// Sets the default cudnnMathType_t value for all cuDNN-based
// computations in current lyer, if applicable. Ignored otherwise.
// If negative or omitted, assumes implicit default and allows
// optimizers like cudnnFindConvolution*AlgorithmEx to choose the best type.
// If set to zero, enforces using CUDNN_DEFAULT_MATH everywhere in current lyer.
// If set to one, enforces using CUDNN_TENSOR_OP_MATH everywhere in current lyer.
optional int32 cudnn_math_override = 150 [default = -1];
// The train / test phase for computation.
optional Phase phase = 10;
// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 5;
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
repeated ParamSpec param = 6;
// The blobs containing the numeric parameters of the layer.
repeated BlobProto blobs = 7;
// Specifies whether to backpropagate to each bottom. If unspecified,
// Caffe will automatically infer whether each input needs backpropagation
// to compute parameter gradients. If set to true for some inputs,
// backpropagation to those inputs is forced; if set false for some inputs,
// backpropagation to those inputs is skipped.
//
// The size must be either 0 or equal to the number of bottoms.
repeated bool propagate_down = 11;
// Rules controlling whether and when a layer is included in the network,
// based on the current NetState. You may specify a non-zero number of rules
// to include OR exclude, but not both. If no include or exclude rules are
// specified, the layer is always included. If the current NetState meets
// ANY (i.e., one or more) of the specified rules, the layer is
// included/excluded.
repeated NetStateRule include = 8;
repeated NetStateRule exclude = 9;
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 100;
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;
// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
optional AccuracyParameter accuracy_param = 102;
optional AnnotatedDataParameter annotated_data_param = 200;
optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
optional BiasParameter bias_param = 141;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 144;
optional DataParameter data_param = 107;
optional DetectionEvaluateParameter detection_evaluate_param = 205;
optional DetectionOutputParameter detection_output_param = 204;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
optional ELUParameter elu_param = 140;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135;
optional HDF5DataParameter hdf5_data_param = 112;
optional HDF5OutputParameter hdf5_output_param = 113;
optional HingeLossParameter hinge_loss_param = 114;
optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
optional InputParameter input_param = 143;
optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MultiBoxLossParameter multibox_loss_param = 201;
optional MVNParameter mvn_param = 120;
optional NormalizeParameter norm_param = 206;
optional PermuteParameter permute_param = 202;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PriorBoxParameter prior_box_param = 203;
optional PythonParameter python_param = 130;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional ScaleParameter scale_param = 142;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional VideoDataParameter video_data_param = 207;
optional WindowDataParameter window_data_param = 129;
optional RecurrentParameter recurrent_param = 151;
// NVIDIA PARAMETERS (Start with 68 because NV is 68 on an old-style phone)
optional DetectNetGroundTruthParameter detectnet_groundtruth_param = 6801;
optional DetectNetAugmentationParameter detectnet_augmentation_param = 6802;
// TI PARAMETERS (Start with 84 because TI is 84 on an old-style phone)
optional ImageLabelDataParameter image_label_data_param = 8403;
optional QuantizationParameter quantization_param = 8404;
}
// Message that stores parameters used to apply transformation
// to the data layer's data
message TransformationParameter {
enum InterpolationAlgo {
INTER_NEAREST = 0; //!< nearest neighbor interpolation
INTER_LINEAR = 1; //!< bilinear interpolation
INTER_CUBIC = 2; //!< bicubic interpolation
INTER_AREA = 3; //!< area-based (or super) interpolation
}
// When the images in a batch are of different shapes, we need to preprocess
// them into the same fixed shape, as downstream operations in caffe require
// images within a batch to be of the same shape.
//
// To transform one image of arbitrary shape into an image of fixed shape,
// we allow specifying a sequence of "variable-sized image transforms."
// There are three possible transforms, and it is possible for _all of them_
// to be enabled at the same time. They are always applied in the same order:
// (1) first random resize, (2) then random crop, (3) finally center crop.
// The last transform must be either a random crop or a center crop.
//
// The three supported transforms are as follows:
//
// 1. Random resize. This takes two parameters, "lower" and "upper," or
// "L" and "U" for short. If the original image has shape (oldW, oldH),
// the shorter side, D = min(oldW, oldH), is calculated. Then a resize
// target size R is chosing uniformly from the interval [L, U], and both
// sides of the original image are resized by a scaling factor R/D to yield
// a new image with shape (R/D * oldW, R/D * oldH).
//
// 2. Random crop. This takes one 'crop_size' parameter. A square region is randomly
// chosen from the image for cropping. Works in TRAIN phase only.
//
// 3. Center crop. This takes one 'crop_size' parameter. A square region is chosen
// from the center of the image for cropping. Works in TEST phase only.
//
optional uint32 img_rand_resize_lower = 10 [default = 0];
optional uint32 img_rand_resize_upper = 11 [default = 0];
// Limits for randomly generated ratio R so that longer side
// length would be set to shorter side length multiplied by R.
// If applied to sguare, the shorter side is chosen randomly.
// Pair {1,1} means "resize image to square (by shortest side)".
// Values less than 1 are ignored.
optional float rand_resize_ratio_lower = 12 [default = 0];
optional float rand_resize_ratio_upper = 13 [default = 0];
// Limits for randomly generated vertical stretch, i.e.
// "height" *= "vertical_stretch" where
// "vertical_stretch" = Rand(vertical_stretch_lower,vertical_stretch_upper).
// Pair {1,1} means "do nothing".
optional float vertical_stretch_lower = 14 [default = 1];
optional float vertical_stretch_upper = 15 [default = 1];
// Limits for randomly generated horizontal stretch, i.e.
// "width" *= "horizontal_stretch" where
// "horizontal_stretch" = Rand(horizontal_stretch_lower,horizontal_stretch_upper).
// Pair {1,1} means "do nothing".
optional float horizontal_stretch_lower = 16 [default = 1];
optional float horizontal_stretch_upper = 17 [default = 1];
// OpenCV algorithm used for downsampling
optional InterpolationAlgo interpolation_algo_down = 18 [default = INTER_NEAREST];
// OpenCV algorithm used for upsampling
optional InterpolationAlgo interpolation_algo_up = 19 [default = INTER_CUBIC];
// No upscale by default no matter what resize parameters are chosen
optional bool allow_upscale = 20 [default = false];
// If followed by CuDNN, set to NHWC for better performance
optional Packing forward_packing = 21 [default = NCHW];
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 1 [default = 1];
// Specify if we want to randomly mirror data.
optional bool mirror = 2 [default = false];
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 3 [default = 0];
optional uint32 crop_h = 211 [default = 0];
optional uint32 crop_w = 212 [default = 0];
// mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [default = false];
// Run the transform (synchronously) on the GPU
// False if omitted when Forward Type is float/double.
// True otherwise (float16 doesn't work well on CPU).
optional bool use_gpu_transform = 8 [default = false];
// If non-negative, the seed with which the transformer's
// random number generator would be initialized -- useful for reproducible results.
// Otherwise, (and by default) initialize using a seed derived from the system clock.
optional int64 random_seed = 9 [default = -1];
optional bool display = 22 [default = false];
optional int32 num_labels = 23 [default = 0];
// Resize policy
optional ResizeParameter resize_param = 208;
// Noise policy
optional NoiseParameter noise_param = 209;
// Distortion policy
optional DistortionParameter distort_param = 213;
// Expand policy
optional ExpansionParameter expand_param = 214;
// Constraint for emitting the annotation after transformation.
optional EmitConstraint emit_constraint = 210;
}
// Message that stores parameters used to create gridbox ground truth
message DetectNetGroundTruthParameter {
// stride of gridbox with respect to image size
optional uint32 stride = 1 [default = 4];
// coverage region scale with respect to bounding box size
optional float scale_cvg = 2 [default = 0.5];
enum GridboxType {
GRIDBOX_MAX = 0;
GRIDBOX_MIN = 1;
}
// determines coverage region's maximum and minimum dimensions
optional GridboxType gridbox_type = 3 [default = GRIDBOX_MAX];
// if gridbox_type is equal to GRIDBOX_MAX, the maximum size a given coverage
// region may take.
optional uint32 max_cvg_len = 4 [default = 50];
// if gridbox_type is equal to GRIDBOX_MIN, the minimum size a given coverage
// region may take.
optional uint32 min_cvg_len = 5 [default = 50];
enum CoverageType {
RECTANGULAR = 0;
}
// shape of the coverage geometry.
optional CoverageType coverage_type = 7 [default = RECTANGULAR];
// Size that incoming images are cropped / scaled to during training / test
// time. The network will only see images of this size.
optional uint32 image_size_x = 8 [default = 1248];
optional uint32 image_size_y = 9 [default = 384];
// coverage proportional to the size of the gridbox region overlapping the
// normalize object coverage by the size of the object
optional bool obj_norm = 11 [default = false];
// crop incoming bboxes such that their bounds remain inside the gridbox.
optional bool crop_bboxes = 12 [default = true];
// Integer and target index of classes to be included:
message ClassMapping {
required uint32 src = 1;
required uint32 dst = 2;
}
repeated ClassMapping object_class = 13;
}
// Message that stores parameters used to apply image and label augmentations
// specific to NVDataLayer's online augmentation module
message DetectNetAugmentationParameter {
// probability that a random crop of the training image will be taken at test
// time. If image dimensions are less than input size and random cropping is
// off, then the image will be scaled deterministically to the input size.
optional float crop_prob = 1 [default = 1.0];
// number of pixels to shift the image by. If cropping is enabled, this number
// is added to the range of possible crop offset values.
optional uint32 shift_x = 2 [default = 0];
optional uint32 shift_y = 3 [default = 0];
// probability that the training image will be scaled at test time. 0 turns
// scale augmentation off.
optional float scale_prob = 4 [default = 0.33];
// minimum and maximum scaling factor. 1.0 implies no scaling.
optional float scale_min = 5 [default = 0.7];
optional float scale_max = 6 [default = 1.0];
// probability that image will be flipped across the Y axis. 0 turns flip
// augmentation off.
optional float flip_prob = 7 [default = 0.33];
// maximum angle in degrees (in both directions) image may be rotated. 0 turns
// rotation augmentation off.
optional float rotation_prob = 8 [default = 0.33];
optional float max_rotate_degree = 9 [default = 1.0];
// maximum rotation of hue in degrees (in both directions). 0 turns hue
// augmentation off.
optional float hue_rotation_prob = 10 [default = 0.33];
optional float hue_rotation = 11 [default = 15];
// maximum desaturation parameter. 1.0 may convert RGB to luminance. 0 turns
// desaturation augmentation off.
optional float desaturation_prob = 12 [default = 0.33];
optional float desaturation_max = 13 [default = 0.5];
// Resize policy
optional ResizeParameter resize_param = 208;
// Noise policy
optional NoiseParameter noise_param = 209;
// Distortion policy
optional DistortionParameter distort_param = 213;
// Expand policy
optional ExpansionParameter expand_param = 214;
// Constraint for emitting the annotation after transformation.
optional EmitConstraint emit_constraint = 210;
}
// Message that stores parameters used by data transformer for resize policy
message ResizeParameter {
//Probability of using this resize policy
optional float prob = 1 [default = 1];
enum Resize_mode {
WARP = 1;
FIT_SMALL_SIZE = 2;
FIT_LARGE_SIZE_AND_PAD = 3;
}
optional Resize_mode resize_mode = 2 [default = WARP];
optional uint32 height = 3 [default = 0];
optional uint32 width = 4 [default = 0];
// A parameter used to update bbox in FIT_SMALL_SIZE mode.
optional uint32 height_scale = 8 [default = 0];
optional uint32 width_scale = 9 [default = 0];
enum Pad_mode {
CONSTANT = 1;
MIRRORED = 2;
REPEAT_NEAREST = 3;
}
// Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering
optional Pad_mode pad_mode = 5 [default = CONSTANT];
// if specified can be repeated once (would fill all the channels)
// or can be repeated the same number of times as channels
// (would use it them to the corresponding channel)
repeated float pad_value = 6;
enum Interp_mode { //Same as in OpenCV
LINEAR = 1;
AREA = 2;
NEAREST = 3;
CUBIC = 4;
LANCZOS4 = 5;
}
//interpolation for for resizing
repeated Interp_mode interp_mode = 7;
}
message SaltPepperParameter {
//Percentage of pixels
optional float fraction = 1 [default = 0];
repeated float value = 2;
}
// Message that stores parameters used by data transformer for transformation
// policy
message NoiseParameter {
//Probability of using this resize policy
optional float prob = 1 [default = 0];
// Histogram equalized
optional bool hist_eq = 2 [default = false];
// Color inversion
optional bool inverse = 3 [default = false];
// Grayscale
optional bool decolorize = 4 [default = false];
// Gaussian blur
optional bool gauss_blur = 5 [default = false];
// JPEG compression quality (-1 = no compression)
optional float jpeg = 6 [default = -1];
// Posterization
optional bool posterize = 7 [default = false];
// Erosion