/
__init__.py
executable file
·3236 lines (2714 loc) · 125 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
"""
CNTK core operators. Calling these operators creates nodes in the CNTK computational graph.
"""
from __future__ import division
from __future__ import print_function
import numpy as np
import numbers
from . import sequence
from .functions import CloneMethod, Function, BlockFunction, load_model, register_native_user_function, native_user_function
from cntk.internal import sanitize_input, sanitize_shape, sanitize_axis, sanitize_dynamic_axes, sanitize_axis_list, sanitize_multi_axis_reduction_list, typemap, sanitize_pooling_args, sanitize_convolution_args, sanitize_permutation
from cntk.internal.utils import get_data_type
from ..axis import Axis
from .. import cntk_py
from ..cntk_py import sentinel_value_for_auto_select_random_seed as SentinelValueForAutoSelectRandomSeed
from ..default_options import get_default_override, default_override_or
TIMES_NO_INFERRED_INPUT_RANK = cntk_py.TimesNoInferredInputRank
TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK = cntk_py.TimesReduceSequenceAxisWithoutInferredInputRank
CONSTANT_PAD = cntk_py.PaddingMode_CONSTANTPAD
REFLECT_PAD = cntk_py.PaddingMode_REFLECTPAD
SYMMETRIC_PAD = cntk_py.PaddingMode_SYMMETRICPAD
@typemap
def combine(*operands, **kw_name):
'''
Create a new Function instance which just combines the outputs of the specified list of
'operands' Functions such that the 'Outputs' of the new 'Function' are union of the
'Outputs' of each of the specified 'operands' Functions. E.g., when creating a classification
model, typically the CrossEntropy loss Function and the ClassificationError Function comprise
the two roots of the computation graph which can be combined to create a single Function
with 2 outputs; viz. CrossEntropy loss and ClassificationError output.
Example:
>>> in1 = C.input_variable((4,))
>>> in2 = C.input_variable((4,))
>>> in1_data = np.asarray([[1., 2., 3., 4.]], np.float32)
>>> in2_data = np.asarray([[0., 5., -3., 2.]], np.float32)
>>> plus_operation = in1 + in2
>>> minus_operation = in1 - in2
>>> forward = C.combine([plus_operation, minus_operation]).eval({in1: in1_data, in2: in2_data})
>>> len(forward)
2
>>> list(forward.values()) # doctest: +SKIP
[array([[[ 1., -3., 6., 2.]]], dtype=float32),
array([[[ 1., 7., 0., 6.]]], dtype=float32)]
>>> x = C.input_variable((4,))
>>> _ = C.combine(x, x)
>>> _ = C.combine([x, x])
>>> _ = C.combine((x, x))
>>> _ = C.combine(C.combine(x, x), x)
Args:
operands (list): list of functions or their variables to combine
name (str, optional): the name of the Combine Function in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
name = (lambda name='': (name))(**kw_name) # Python 2.7 does not allow (*inputs, name='')
from cntk.cntk_py import combine
if len(operands) == 1 and isinstance(operands[0], (tuple, list)):
operands = operands[0]
if isinstance(operands, tuple):
operands = list(operands)
operands_unfold = []
for o in operands:
if hasattr(o, 'outputs') and len(o.outputs) > 1:
operands_unfold += o.outputs
else:
operands_unfold += [o]
return combine(operands_unfold, name)
@typemap
def as_block(composite, block_arguments_map, block_op_name, block_instance_name=''):
'''
Create a new block Function instance which just encapsulates the specified composite Function
to create a new Function that appears to be a primitive. All the arguments of the composite
being encapsulated must be Placeholder variables.
The purpose of block Functions is to enable creation of hierarchical Function graphs
where details of implementing certain building block operations can be encapsulated away
such that the actual structure of the block's implementation is not inlined into
the parent graph where the block is used, and instead the block just appears as an opaque
primitive. Users still have the ability to peek at the underlying Function graph that implements
the actual block Function.
Args:
composite: The composite Function that the block encapsulates
block_arguments_map: A list of tuples, mapping from block's underlying composite's arguments to
actual variables they are connected to
block_op_name: Name of the op that the block represents
block_instance_name (str, optional): the name of the block Function in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import as_block
return as_block(composite, block_arguments_map, block_op_name, block_instance_name)
@typemap
def as_composite(root_function, name=''):
'''
Creates a composite Function that has the specified root_function as its root.
The composite denotes a higher-level Function encapsulating the entire graph
of Functions underlying the specified rootFunction.
Args:
root_function: Root Function, the graph underlying which, the newly created composite encapsulates
name (str, optional): the name of the Alias Function in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import as_composite
return as_composite(root_function, name)
@typemap
def alias(x, name=''):
'''
Create a new Function instance which just aliases the specified 'x' Function/Variable
such that the 'Output' of the new 'Function' is same as the 'Output' of the specified
'x' Function/Variable, and has the newly specified name.
The purpose of this operator is to create a new distinct reference to a symbolic
computation which is different from the original Function/Variable that it aliases and can
be used for e.g. to substitute a specific instance of the aliased Function/Variable in the
computation graph instead of substituting all usages of the aliased Function/Variable.
Args:
operand: The Function/Variable to alias
name (str, optional): the name of the Alias Function in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import alias
x = sanitize_input(x)
return alias(x, name)
@typemap
def reconcile_dynamic_axes(x, dynamic_axes_as, name=''):
'''
Create a new Function instance which reconciles the dynamic axes of the
specified tensor operands. The output of the returned Function has the sample
layout of the 'x' operand and the dynamic axes of the 'dynamic_axes_as' operand.
This operator also performs a runtime check to ensure that the dynamic axes layouts
of the 2 operands indeed match.
Args:
x: The Function/Variable, whose dynamic axes are to be reconciled
dynamic_axes_as: The Function/Variable, to whose dynamic axes the
operand 'x''s dynamic axes are reconciled to.
name (str, optional): the name of the reconcile_dynamic_axes Function in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import reconcile_dynamic_axes
x = sanitize_input(x)
dynamic_axes_as = sanitize_input(dynamic_axes_as)
return reconcile_dynamic_axes(x, dynamic_axes_as, name)
@typemap
def labels_to_graph(labels, name=''):
'''
Conversion node from labels to graph. Typically used as an input to ForwardBackward node.
This node's objective is to transform input labels into a graph representing exact forward-backward criterion.
Example:
>>> num_classes = 2
>>> labels = C.input_variable((num_classes))
>>> graph = C.labels_to_graph(labels)
Args:
labels: input training labels
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import labels_to_graph
dtype = get_data_type(labels)
labels = sanitize_input(labels, dtype)
return labels_to_graph(labels, name)
@typemap
def forward_backward(graph, features, blankTokenId, delayConstraint=-1, name=''):
'''
Criterion node for training methods that rely on forward-backward Viterbi-like passes, e.g. Connectionist Temporal Classification (CTC) training
The node takes as the input the graph of labels, produced by the labels_to_graph operation that determines the exact forward/backward procedure.
Example:
graph = cntk.labels_to_graph(labels)
networkOut = model(features)
fb = C.forward_backward(graph, networkOut, 132)
Args:
graph: labels graph
features: network output
blankTokenId: id of the CTC blank label
delayConstraint: label output delay constraint introduced during training that allows to have shorter delay during inference. This is using the original time information to enforce that CTC tokens only get aligned within a time margin. Setting this parameter smaller will result in shorted delay between label output during decoding, yet may hurt accuracy. delayConstraint=-1 means no constraint
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import forward_backward
dtype = get_data_type(features, graph)
features = sanitize_input(features, dtype)
graph = sanitize_input(graph, dtype)
return forward_backward(graph, features, blankTokenId, delayConstraint, name)
##########################################################################
# convolution ops
##########################################################################
@typemap
def convolution(convolution_map, operand, strides=(1,), sharing=[True],
auto_padding=[True], dilation=(1,), reduction_rank=1, groups=1, max_temp_mem_size_in_samples=0, name=''):
'''
Computes the convolution of ``convolution_map`` (typically a tensor of learnable parameters) with
``operand`` (commonly an image or output of a previous convolution/pooling operation).
This operation is used in image and language processing applications. It supports arbitrary
dimensions, strides, sharing, and padding.
This function operates on input tensors with dimensions :math:`[C \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`. This can be understood as a rank-n
object, where each entry consists of a :math:`C`-dimensional vector. For example, an RGB image would have dimensions
:math:`[3 \\times W \\times H]`, i.e. a :math:`[W \\times H]`-sized structure, where each entry (pixel) consists of a 3-tuple.
`convolution` convolves the input ``operand`` with a :math:`n+2` rank tensor of (typically learnable) filters called
``convolution_map`` of shape :math:`[O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n ]` (typically :math:`m_i \\ll M_i`).
The first dimension, :math:`O`, is the nunber of convolution filters (i.e. the number of
channels in the output). The second dimension, :math:`I`, must match the number of channels in the input, which can be ignored if `reduction_rank` is `0`.
The last n dimensions are the spatial extent of the filter. I.e. for each output position, a vector of
dimension :math:`O` is computed. Hence, the total number of filter parameters is :math:`O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n`
Example:
>>> img = np.reshape(np.arange(25.0, dtype = np.float32), (1, 5, 5))
>>> x = C.input_variable(img.shape)
>>> filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2))
>>> kernel = C.constant(value = filter)
>>> np.round(C.convolution(kernel, x, auto_padding = [False]).eval({x: [img]}),5)
array([[[[ 6., 8., 10., 12.],
[ 16., 18., 20., 22.],
[ 26., 28., 30., 32.],
[ 36., 38., 40., 42.]]]], dtype=float32)
Args:
convolution_map: convolution filter weights, stored as a tensor of dimensions :math:`[O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n]`,
where :math:`[m_1 \\times m_2 \\times \\ldots \\times m_n]` must be the kernel dimensions (spatial extent of the filter).
operand: convolution input. A tensor with dimensions :math:`[I \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`.
strides (tuple, optional): stride dimensions. If strides[i] > 1 then only pixel positions that are multiples of strides[i] are computed.
For example, a stride of 2 will lead to a halving of that dimension. The first stride dimension that lines up with the number
of input channels can be set to any non-zero value.
sharing (bool): sharing flags for each input dimension
auto_padding (bool): flags for each input dimension whether it should be padded automatically (that is,
symmetrically) or not padded at all. Padding means that the convolution kernel is applied to all pixel positions, where all
pixels outside the area are assumed zero ("padded with zeroes"). Without padding, the kernels are only shifted over
positions where all inputs to the kernel still fall inside the area. In this case, the output dimension will be less than
the input dimension. The last value that lines up with the number of input channels must be false.
dilation (tuple, optional): the dilation value along each axis, default 1 mean no dilation.
reduction_rank (`int`, default 1): must be 0 or 1, 0 mean no depth or channel dimension in the input and 1 mean the input has channel or depth dimension.
groups (`int`, default 1): number of groups during convolution, that controls the connections between input and output channels. Deafult value is 1,
which means that all input channels are convolved to produce all output channels. A value of N would mean that the input (and output) channels are
divided into N groups with the input channels in one group (say i-th input group) contributing to output channels in only one group (i-th output group).
Number of input and output channels must be divisble by value of groups argument. Also, value of this argument must be strictly positive, i.e. groups > 0.
max_temp_mem_size_in_samples (int): maximum amount of auxiliary memory (in samples) that should be reserved to perform convolution
operations. Some convolution engines (e.g. cuDNN and GEMM-based engines) can benefit from using workspace as it may improve
performance. However, sometimes this may lead to higher memory utilization. Default is 0 which means the same as the input
samples.
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import convolution
operand = sanitize_input(operand)
strides, sharing, auto_padding = sanitize_convolution_args(strides, sharing, auto_padding)
dilation = sanitize_shape(dilation)
return convolution(convolution_map, operand, strides, sharing, auto_padding, dilation,
reduction_rank, groups, max_temp_mem_size_in_samples, name)
@typemap
def convolution_transpose(convolution_map, operand, strides=(1,), sharing=[True],
auto_padding=[True], output_shape=None, dilation=(1,), reduction_rank=1, max_temp_mem_size_in_samples=0, name=''):
'''
Computes the transposed convolution of ``convolution_map`` (typically a tensor of learnable parameters) with
``operand`` (commonly an image or output of a previous convolution/pooling operation).
This is also known as ``fractionally strided convolutional layers``, or, ``deconvolution``.
This operation is used in image and language processing applications. It supports arbitrary
dimensions, strides, sharing, and padding.
This function operates on input tensors with dimensions :math:`[C \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`. This can be understood as a rank-n
object, where each entry consists of a :math:`C`-dimensional vector. For example, an RGB image would have dimensions
:math:`[3 \\times W \\times H]`, i.e. a :math:`[W \\times H]`-sized structure, where each entry (pixel) consists of a 3-tuple.
`convolution_transpose` convolves the input ``operand`` with a :math:`n+2` rank tensor of (typically learnable) filters called
``convolution_map`` of shape :math:`[I \\times O \\times m_1 \\times m_2 \\times \\ldots \\times m_n ]` (typically :math:`m_i \\ll M_i`).
The first dimension, :math:`I`, must match the number of channels in the input. The second dimension, :math:`O`, is the number of convolution filters (i.e. the number of
channels in the output).
The last n dimensions are the spatial extent of the filter. I.e. for each output position, a vector of
dimension :math:`O` is computed. Hence, the total number of filter parameters is :math:`I \\times O \\times m_1 \\times m_2 \\times \\ldots \\times m_n`
Example:
>>> img = np.reshape(np.arange(9.0, dtype = np.float32), (1, 3, 3))
>>> x = C.input_variable(img.shape)
>>> filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2))
>>> kernel = C.constant(value = filter)
>>> np.round(C.convolution_transpose(kernel, x, auto_padding = [False]).eval({x: [img]}),5)
array([[[[ 0., 2., 3., -2.],
[ 6., 4., 6., -1.],
[ 9., 10., 12., 2.],
[ -6., 5., 6., 16.]]]], dtype=float32)
Args:
convolution_map: convolution filter weights, stored as a tensor of dimensions :math:`[I \\times O \\times m_1 \\times m_2 \\times \\ldots \\times m_n]`,
where :math:`[m_1 \\times m_2 \\times \\ldots \\times m_n]` must be the kernel dimensions (spatial extent of the filter).
operand: convolution input. A tensor with dimensions :math:`[I \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`.
strides (tuple, optional): stride dimensions. If strides[i] > 1 then only pixel positions that are multiples of strides[i] are computed.
For example, a stride of 2 will lead to a halving of that dimension. The first stride dimension that lines up with the number
of input channels can be set to any non-zero value.
sharing (bool): sharing flags for each input dimension
auto_padding (bool): flags for each input dimension whether it should be padded automatically (that is,
symmetrically) or not padded at all. Padding means that the convolution kernel is applied to all pixel positions, where all
pixels outside the area are assumed zero ("padded with zeroes"). Without padding, the kernels are only shifted over
positions where all inputs to the kernel still fall inside the area. In this case, the output dimension will be less than
the input dimension. The last value that lines up with the number of input channels must be false.
output_shape: user expected output shape after convolution transpose.
dilation (tuple, optional): the dilation value along each axis, default 1 mean no dilation.
reduction_rank (`int`, default 1): must be 0 or 1, 0 mean no depth or channel dimension in the input and 1 mean the input has channel or depth dimension.
max_temp_mem_size_in_samples (int): maximum amount of auxiliary memory (in samples) that should be reserved to perform convolution
operations. Some convolution engines (e.g. cuDNN and GEMM-based engines) can benefit from using workspace as it may improve
performance. However, sometimes this may lead to higher memory utilization. Default is 0 which means the same as the input
samples.
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import convolution_transpose
operand = sanitize_input(operand)
strides, sharing, auto_padding = sanitize_convolution_args(strides, sharing, auto_padding)
if output_shape is None:
output_shape = (0,)
output_shape = sanitize_shape(output_shape)
dilation = sanitize_shape(dilation)
return convolution_transpose(convolution_map, operand, strides, sharing, auto_padding,
output_shape, dilation, reduction_rank, max_temp_mem_size_in_samples, name)
from cntk.cntk_py import PoolingType_Max, PoolingType_Average
MAX_POOLING = PoolingType_Max
'''int: constant used to specify maximum pooling'''
AVG_POOLING = PoolingType_Average
'''int: constant used to specify average pooling'''
@typemap
def roipooling(operand, rois, pooling_type, roi_output_shape, spatial_scale, name=''):
'''
The ROI (Region of Interest) pooling operation pools over sub-regions of an input volume and produces
a fixed sized output volume regardless of the ROI size. It is used for example for object detection.
Each input image has a fixed number of regions of interest, which are specified as bounding boxes (x, y, w, h)
that are relative to the image size [W x H]. This operation can be used as a replacement for the final
pooling layer of an image classification network (as presented in Fast R-CNN and others).
.. versionchanged:: 2.1
The signature was updated to match the Caffe implementation:
the parameters `pooling_type` and `spatial_scale` were added, and
the coordinates for the parameters `rois` are now absolute to the original image size.
Args:
operand: a convolutional feature map as the input volume ([W x H x C x N]).
pooling_type: only :const:`~cntk.ops.MAX_POOLING`
rois: the coordinates of the ROIs per image ([4 x roisPerImage x N]), each ROI is (x1, y1, x2, y2) absolute to original image size.
roi_output_shape: dimensions (width x height) of the ROI pooling output shape
spatial_scale: the scale of operand from the original image size.
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import roipooling
if pooling_type != MAX_POOLING:
raise ValueError('Unsupported pooling type, ROIPooling support only MAX pooling.')
operand = sanitize_input(operand)
rois = sanitize_input(rois)
roi_output_shape = sanitize_shape(roi_output_shape)
return roipooling(operand, rois, pooling_type, roi_output_shape, spatial_scale, name)
@typemap
def pooling(operand, pooling_type, pooling_window_shape, strides=(1,), auto_padding=[False],
ceil_out_dim=False, include_pad=False, name=''):
'''
The pooling operations compute a new tensor by selecting the maximum or average value in the pooling input.
In the case of average pooling with padding, the average is only over the valid region.
N-dimensional pooling allows to create max or average pooling of any dimensions, stride or padding.
Example:
>>> img = np.reshape(np.arange(16, dtype = np.float32), [1, 4, 4])
>>> x = C.input_variable(img.shape)
>>> C.pooling(x, C.AVG_POOLING, (2,2), (2,2)).eval({x : [img]})
array([[[[ 2.5, 4.5],
[ 10.5, 12.5]]]], dtype=float32)
>>> C.pooling(x, C.MAX_POOLING, (2,2), (2,2)).eval({x : [img]})
array([[[[ 5., 7.],
[ 13., 15.]]]], dtype=float32)
Args:
operand: pooling input
pooling_type: one of :const:`~cntk.ops.MAX_POOLING` or :const:`~cntk.ops.AVG_POOLING`
pooling_window_shape: dimensions of the pooling window
strides (default 1): strides.
auto_padding (default [False,]): automatic padding flags for each input dimension.
ceil_out_dim (default False): ceiling while computing output size
include_pad(default False): include pad while average pooling
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import pooling
operand = sanitize_input(operand)
pooling_window_shape, strides, auto_padding = sanitize_pooling_args(pooling_window_shape, strides, auto_padding)
return pooling(operand, pooling_type, pooling_window_shape, strides, auto_padding,
ceil_out_dim, include_pad, name)
MAX_UNPOOLING = PoolingType_Max
'''int: constant used to specify maximum unpooling'''
@typemap
def unpooling(operand, pooling_input, unpooling_type, unpooling_window_shape, strides=(1,), auto_padding=[False],
name=''):
'''
Unpools the ``operand`` using information from ``pooling_input``. Unpooling mirrors the operations
performed by pooling and depends on the values provided to the corresponding pooling operation. The output
should have the same shape as pooling_input. Pooling the result of an unpooling operation should
give back the original input.
Example:
>>> img = np.reshape(np.arange(16, dtype = np.float32), [1, 4, 4])
>>> x = C.input_variable(img.shape)
>>> y = C.pooling(x, C.MAX_POOLING, (2,2), (2,2))
>>> C.unpooling(y, x, C.MAX_UNPOOLING, (2,2), (2,2)).eval({x : [img]})
array([[[[ 0., 0., 0., 0.],
[ 0., 5., 0., 7.],
[ 0., 0., 0., 0.],
[ 0., 13., 0., 15.]]]], dtype=float32)
Args:
operand: unpooling input
pooling_input: input to the corresponding pooling operation
unpooling_type: only :const:`~cntk.ops.MAX_UNPOOLING` is supported now
unpooling_window_shape: dimensions of the unpooling window
strides (default 1): strides.
auto_padding: automatic padding flags for each input dimension.
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import unpooling
operand = sanitize_input(operand)
pooling_input = sanitize_input(pooling_input)
unpooling_window_shape, strides, auto_padding = sanitize_pooling_args(unpooling_window_shape, strides, auto_padding)
return unpooling(operand, pooling_input, unpooling_type,
unpooling_window_shape, strides, auto_padding, name)
@typemap
def batch_normalization(operand, scale, bias, running_mean, running_inv_std, spatial,
normalization_time_constant=5000, blend_time_constant=0,
epsilon=0.00001, use_cudnn_engine=False, name='', running_count=None):
# TODO: running_count should be right after running_inv_std; no need for upwards compat
'''
Normalizes layer outputs for every minibatch for each output (feature) independently
and applies affine transformation to preserve representation of the layer.
Args:
operand: input of the batch normalization operation
scale: parameter tensor that holds the learned componentwise-scaling factors
bias: parameter tensor that holds the learned bias. ``scale`` and ``bias`` must have the same
dimensions which must be equal to the input dimensions in case of ``spatial`` = False or
number of output convolution feature maps in case of ``spatial`` = True
running_mean: running mean which is used during evaluation phase and might be used during
training as well. You must pass a constant tensor with initial value 0 and the same dimensions
as ``scale`` and ``bias``
running_inv_std: running variance. Represented as ``running_mean``
running_count: Denotes the total number of samples that have been used so far to compute
the ``running_mean`` and ``running_inv_std`` parameters. You must pass a scalar (either rank-0 ``constant(val)``).
spatial(bool): flag that indicates whether to compute mean/var for each feature in a minibatch
independently or, in case of convolutional layers, per future map
normalization_time_constant(float, default 5000): time constant for computing running average of
mean and variance as a low-pass filtered version of the batch statistics.
blend_time_constant(float, default 0): constant for smoothing batch estimates with the running
statistics
epsilon: conditioner constant added to the variance when computing the inverse standard deviation
use_cudnn_engine(bool, default True):
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
if running_count is None:
running_count = constant(0)
import warnings
warnings.warn("batch_normalization requires an additional "
"'running_count' parameter, which can be "
"instantiated as 'constant(0)'", Warning)
from cntk.cntk_py import batch_normalization
operand = sanitize_input(operand)
return batch_normalization(operand, scale, bias, running_mean, running_inv_std, running_count, spatial,
normalization_time_constant, blend_time_constant,
epsilon, use_cudnn_engine, name)
##########################################################################
# comparison ops
##########################################################################
@typemap
def less(left, right, name=''):
'''
Elementwise 'less' comparison of two tensors. Result is 1 if left < right else 0.
Example:
>>> C.less([41., 42., 43.], [42., 42., 42.]).eval()
array([ 1., 0., 0.], dtype=float32)
>>> C.less([-1,0,1], [0]).eval()
array([ 1., 0., 0.], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import less
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return less(left, right, name)
@typemap
def equal(left, right, name=''):
'''
Elementwise 'equal' comparison of two tensors. Result is 1 if values are equal 0 otherwise.
Example:
>>> C.equal([41., 42., 43.], [42., 42., 42.]).eval()
array([ 0., 1., 0.], dtype=float32)
>>> C.equal([-1,0,1], [1]).eval()
array([ 0., 0., 1.], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import equal
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return equal(left, right, name)
@typemap
def greater(left, right, name=''):
'''
Elementwise 'greater' comparison of two tensors. Result is 1 if left > right else 0.
Example:
>>> C.greater([41., 42., 43.], [42., 42., 42.]).eval()
array([ 0., 0., 1.], dtype=float32)
>>> C.greater([-1,0,1], [0]).eval()
array([ 0., 0., 1.], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import greater
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return greater(left, right, name)
@typemap
def greater_equal(left, right, name=''):
'''
Elementwise 'greater equal' comparison of two tensors. Result is 1 if left >= right else 0.
Example:
>>> C.greater_equal([41., 42., 43.], [42., 42., 42.]).eval()
array([ 0., 1., 1.], dtype=float32)
>>> C.greater_equal([-1,0,1], [0]).eval()
array([ 0., 1., 1.], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import greater_equal
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return greater_equal(left, right, name)
@typemap
def not_equal(left, right, name=''):
'''
Elementwise 'not equal' comparison of two tensors. Result is 1 if left != right else 0.
Example:
>>> C.not_equal([41., 42., 43.], [42., 42., 42.]).eval()
array([ 1., 0., 1.], dtype=float32)
>>> C.not_equal([-1,0,1], [0]).eval()
array([ 1., 0., 1.], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import not_equal
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return not_equal(left, right, name)
@typemap
def less_equal(left, right, name=''):
'''
Elementwise 'less equal' comparison of two tensors. Result is 1 if left <= right else 0.
Example:
>>> C.less_equal([41., 42., 43.], [42., 42., 42.]).eval()
array([ 1., 1., 0.], dtype=float32)
>>> C.less_equal([-1,0,1], [0]).eval()
array([ 1., 1., 0.], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import less_equal
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return less_equal(left, right, name)
##########################################################################
# linear ops
##########################################################################
# This is a helper to wrap associative operations like plus(left, right) such
# that they accept multiple arguments.
def associative_multi_arg(f):
'''
The output of this operation is the result of an operation (`plus`, `log_add_exp`, `element_times`, `element_max`, `element_min`)
of two or more input tensors. Broadcasting is supported.
Example:
>>> C.plus([1, 2, 3], [4, 5, 6]).eval()
array([ 5., 7., 9.], dtype=float32)
>>> C.element_times([5., 10., 15., 30.], [2.]).eval()
array([ 10., 20., 30., 60.], dtype=float32)
>>> C.plus([-5, -4, -3, -2, -1], [10], [3, 2, 3, 2, 3], [-13], [+42], 'multi_arg_example').eval()
array([ 37., 37., 39., 39., 41.], dtype=float32)
>>> C.element_times([5., 10., 15., 30.], [2.], [1., 2., 1., 2.]).eval()
array([ 10., 40., 30., 120.], dtype=float32)
>>> a = np.arange(3,dtype=np.float32)
>>> np.exp(C.log_add_exp(np.log(1+a), np.log(1+a*a)).eval())
array([ 2., 4., 8.], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from functools import wraps
@wraps(f)
def associative_binary_operation(arg1, arg2, *more_args, **name_kwarg):
name = (lambda name='': name)(**name_kwarg) # Python 2.7 does not allow (arg1, arg2, *more, name='')
# in case name is specified without keyword
if not name and more_args and isinstance(more_args[-1], str):
name = more_args[-1]
more_args = more_args[0:-1]
# implement as a tree reduction
def tree_reduce(args, name):
n = len(args)
if n > 2: return f(tree_reduce(args[:n//2], name=''),tree_reduce(args[n//2:], name=''), name=name) # only the outer-most op gets the 'name' parameter
elif n == 2: return f(args[0],args[1], name=name)
else: return args[0]
return tree_reduce((arg1, arg2) + more_args, name=name)
return associative_binary_operation
@associative_multi_arg
@typemap
def plus(left, right, name=''):
'''
The output of this operation is the sum of the two or more input tensors. It supports broadcasting.
Example:
>>> C.plus([1, 2, 3], [4, 5, 6]).eval()
array([ 5., 7., 9.], dtype=float32)
>>> C.plus([-5, -4, -3, -2, -1], [10]).eval()
array([ 5., 6., 7., 8., 9.], dtype=float32)
>>> C.plus([-5, -4, -3, -2, -1], [10], [3, 2, 3, 2, 3], [-13], [+42], 'multi_arg_example').eval()
array([ 37., 37., 39., 39., 41.], dtype=float32)
>>> C.plus([-5, -4, -3, -2, -1], [10], [3, 2, 3, 2, 3]).eval()
array([ 8., 8., 10., 10., 12.], dtype=float32)
Args:
arg1: left side tensor
arg2: right side tensor
*more_args: additional inputs
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import plus as cntk_py_plus
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return cntk_py_plus(left, right, name)
@typemap
def minus(left, right, name=''):
'''
The output of this operation is left minus right tensor. It supports broadcasting.
Example:
>>> C.minus([1, 2, 3], [4, 5, 6]).eval()
array([-3., -3., -3.], dtype=float32)
>>> C.minus([[1,2],[3,4]], 1).eval()
array([[ 0., 1.],
[ 2., 3.]], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import minus
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return minus(left, right, name)
@typemap
def pow(base, exponent, name=''):
'''
Computes `base` raised to the power of `exponent`. It supports broadcasting.
This is well defined if `base` is non-negative or `exponent` is an integer.
Otherwise the result is NaN. The gradient with respect to the base is well
defined if the forward operation is well defined. The gradient with respect
to the exponent is well defined if the base is non-negative, and it is set
to 0 otherwise.
Example:
>>> C.pow([1, 2, -2], [3, -2, 3]).eval()
array([ 1. , 0.25, -8. ], dtype=float32)
>>> C.pow([[0.5, 2],[4, 1]], -2).eval()
array([[ 4. , 0.25 ],
[ 0.0625, 1. ]], dtype=float32)
Args:
base: base tensor
exponent: exponent tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import pow
dtype = get_data_type(base, exponent)
base = sanitize_input(base, dtype)
exponent = sanitize_input(exponent, dtype)
return pow(base, exponent, name)
@associative_multi_arg
@typemap
def element_times(left, right, name=''):
'''
The output of this operation is the element-wise product of the two or more input
tensors. It supports broadcasting.
Example:
>>> C.element_times([1., 1., 1., 1.], [0.5, 0.25, 0.125, 0.]).eval()
array([ 0.5 , 0.25 , 0.125, 0. ], dtype=float32)
>>> C.element_times([5., 10., 15., 30.], [2.]).eval()
array([ 10., 20., 30., 60.], dtype=float32)
>>> C.element_times([5., 10., 15., 30.], [2.], [1., 2., 1., 2.]).eval()
array([ 10., 40., 30., 120.], dtype=float32)
Args:
arg1: left side tensor
arg2: right side tensor
*more_args: additional inputs
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import element_times as cntk_py_element_times
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return cntk_py_element_times(left, right, name)
# TODO: move element_max/min to C++
@associative_multi_arg
@typemap
def element_max(left, right, name=''):
'''
The output of this operation is the element-wise max of the two or more input
tensors. It supports broadcasting.
Args:
arg1: left side tensor
arg2: right side tensor
*more_args: additional inputs
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
gt = greater(left, right)
# TODO: use as_block()
return element_select(gt, left, right, name)
@associative_multi_arg
@typemap
def element_min(left, right, name=''):
'''
The output of this operation is the element-wise min of the two or more input
tensors. It supports broadcasting.
Args:
arg1: left side tensor
arg2: right side tensor
*more_args: additional inputs
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
lt = less(left, right)
# TODO: use as_block()
return element_select(lt, left, right, name)
@typemap
def element_divide(left, right, name=''):
'''
The output of this operation is the element-wise division of the two input
tensors. It supports broadcasting.
Example:
>>> C.element_divide([1., 1., 1., 1.], [0.5, 0.25, 0.125, 0.]).eval()
array([ 2., 4., 8., 0.], dtype=float32)
>>> C.element_divide([5., 10., 15., 30.], [2.]).eval()
array([ 2.5, 5. , 7.5, 15. ], dtype=float32)
Args:
left: left side tensor
right: right side tensor
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import element_divide
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return element_divide(left, right, name)
@associative_multi_arg
@typemap
def log_add_exp(left, right, name=''):
'''
Calculates the log of the sum of the exponentials
of the two or more input tensors. It supports broadcasting.
Example:
>>> a = np.arange(3,dtype=np.float32)
>>> np.exp(C.log_add_exp(np.log(1+a), np.log(1+a*a)).eval())
array([ 2., 4., 8.], dtype=float32)
>>> np.exp(C.log_add_exp(np.log(1+a), [0.]).eval())
array([ 2., 3., 4.], dtype=float32)
Args:
arg1: left side tensor
arg2: right side tensor
*more_args: additional inputs
name (str, optional): the name of the Function instance in the network
Returns:
:class:`~cntk.ops.functions.Function`
'''
from cntk.cntk_py import log_add_exp as cntk_py_log_add_exp
dtype = get_data_type(left, right)
left = sanitize_input(left, dtype)
right = sanitize_input(right, dtype)
return cntk_py_log_add_exp(left, right, name)
INFINITELY_REPEAT = cntk_py.MinibatchSource.infinitely_repeat
@typemap
def times(left, right, output_rank=1, infer_input_rank_to_map=TIMES_NO_INFERRED_INPUT_RANK, name=''):
'''
The output of this operation is the matrix product of the two input matrices.
It supports broadcasting. Sparse is supported in the left operand, if it is a matrix.
The operator '@' has been overloaded such that in Python 3.5 and later X @ W equals times(X, W).
For better performance on times operation on sequence which is followed by sequence.reduce_sum, use
infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK, i.e. replace following::
sequence.reduce_sum(times(seq1, seq2))
with::
times(seq1, seq2, infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK)
Example:
>>> C.times([[1,2],[3,4]], [[5],[6]]).eval()
array([[ 17.],
[ 39.]], dtype=float32)
>>> C.times(1.*np.reshape(np.arange(8), (2,2,2)),1.*np.reshape(np.arange(8), (2,2,2)), output_rank=1).eval()
array([[ 28., 34.],
[ 76., 98.]])
>>> C.times(1.*np.reshape(np.arange(8), (2,2,2)),1.*np.reshape(np.arange(8), (2,2,2)), output_rank=2).eval()
array([[[[ 4., 5.],
[ 6., 7.]],
<BLANKLINE>
[[ 12., 17.],
[ 22., 27.]]],
<BLANKLINE>
<BLANKLINE>
[[[ 20., 29.],
[ 38., 47.]],
<BLANKLINE>
[[ 28., 41.],
[ 54., 67.]]]])
Args:
left: left side matrix or tensor
right: right side matrix or tensor
output_rank (int): in case we have tensors as arguments, output_rank represents
the number of axes to be collapsed in order to transform the tensors
into matrices, perform the operation and then reshape back (explode the axes)
infer_input_rank_to_map (int): meant for internal use only. Always use default value
name (str, optional): the name of the Function instance in the network