diff --git a/.azure-pipelines/Linux-CI.yml b/.azure-pipelines/Linux-CI.yml index 3c84e9d1617..599af1eeba6 100644 --- a/.azure-pipelines/Linux-CI.yml +++ b/.azure-pipelines/Linux-CI.yml @@ -44,9 +44,23 @@ jobs: python -m pip install virtualenv python -m virtualenv py$(python.version) source py$(python.version)/bin/activate + + git clone https://github.com/protocolbuffers/protobuf.git + cd protobuf + git checkout 3.11.x + git submodule update --init --recursive + mkdir build_source && cd build_source + + cmake ../cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_INSTALL_SYSCONFDIR=/etc -DCMAKE_POSITION_INDEPENDENT_CODE=ON -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=Release + export NUM_PROCESSOR=`grep -c ^processor /proc/cpuinfo` + sudo make -j${NUM_PROCESSOR} + sudo make install + ldconfig + cd ../.. + python -m pip install --upgrade pip - python -m pip install numpy protobuf - sudo apt-get install protobuf-compiler libprotoc-dev + python -m pip install numpy protobuf==3.11.3 + sudo apt-get install -qq -o=Dpkg::Use-Pty=0 -y --no-install-recommends dos2unix git submodule update --init --recursive export ONNX_BUILD_TESTS=1 diff --git a/VERSION_NUMBER b/VERSION_NUMBER index dd664ddf4ca..5577eb4ef04 100644 --- a/VERSION_NUMBER +++ b/VERSION_NUMBER @@ -1 +1 @@ -1.8.201 +1.8.202 diff --git a/docs/Changelog.md b/docs/Changelog.md index 45f82342d1f..7902c24206e 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -18633,19 +18633,21 @@ This version of the operator has been available since version 14 of the default and the running statistics in training mode (training_mode=True). There are multiple cases for the number of outputs, which we list below: - Output case #1: Y, running_mean, running_var, current_mean, current_var (training_mode=True) + Output case #1: Y, running_mean, running_var (training_mode=True) Output case #2: Y (training_mode=False) When training_mode=False, extra outputs are invalid. The outputs are updated as follows when training_mode=True: ``` - current_mean = ReduceMean(X, axis=all_except_channel_index) - current_var = ReduceVar(X, axis=all_except_channel_index) - running_mean = input_mean * momentum + current_mean * (1 - momentum) running_var = input_var * momentum + current_var * (1 - momentum) Y = (X - current_mean) / sqrt(current_var + epsilon) * scale + B + + where: + + current_mean = ReduceMean(X, axis=all_except_channel_index) + current_var = ReduceVar(X, axis=all_except_channel_index) ``` When training_mode=False: @@ -18687,7 +18689,7 @@ This version of the operator has been available since version 14 of the default
running (training) or estimated (testing) variance tensor of shape (C).
-#### Outputs (1 - 5) +#### Outputs (1 - 3)
Y (differentiable) : T
@@ -18696,10 +18698,6 @@ This version of the operator has been available since version 14 of the default
The running mean after the BatchNormalization operator.
running_var (optional, non-differentiable) : T
The running variance after the BatchNormalization operator.
-
current_mean (optional, non-differentiable) : T
-
Current mean used during training to speed up gradient computation.
-
current_var (optional, non-differentiable) : T
-
Current variance used during training to speed up gradient computation.
#### Type Constraints diff --git a/docs/Operators.md b/docs/Operators.md index 6bf612f51b2..447131ed397 100644 --- a/docs/Operators.md +++ b/docs/Operators.md @@ -1863,19 +1863,21 @@ expect(node, inputs=[x], outputs=[y], name='test_averagepool_3d_default') and the running statistics in training mode (training_mode=True). There are multiple cases for the number of outputs, which we list below: - Output case #1: Y, running_mean, running_var, current_mean, current_var (training_mode=True) + Output case #1: Y, running_mean, running_var (training_mode=True) Output case #2: Y (training_mode=False) When training_mode=False, extra outputs are invalid. The outputs are updated as follows when training_mode=True: ``` - current_mean = ReduceMean(X, axis=all_except_channel_index) - current_var = ReduceVar(X, axis=all_except_channel_index) - running_mean = input_mean * momentum + current_mean * (1 - momentum) running_var = input_var * momentum + current_var * (1 - momentum) Y = (X - current_mean) / sqrt(current_var + epsilon) * scale + B + + where: + + current_mean = ReduceMean(X, axis=all_except_channel_index) + current_var = ReduceVar(X, axis=all_except_channel_index) ``` When training_mode=False: @@ -1919,7 +1921,7 @@ Other versions of this operator: 1running (training) or estimated (testing) variance tensor of shape (C). -#### Outputs (1 - 5) +#### Outputs (1 - 3)
Y (differentiable) : T
@@ -1928,10 +1930,6 @@ Other versions of this operator:
1The running mean after the BatchNormalization operator.
running_var (optional, non-differentiable) : T
The running variance after the BatchNormalization operator.
-
current_mean (optional, non-differentiable) : T
-
Current mean used during training to speed up gradient computation.
-
current_var (optional, non-differentiable) : T
-
Current variance used during training to speed up gradient computation.
#### Type Constraints @@ -2003,18 +2001,18 @@ var = np.array([1, 1.5]).astype(np.float32) # using np.bool(1) while generating test data with "'bool' object has no attribute 'dtype'" # working around by using np.byte(1).astype(bool) training_mode = 1 -y, saved_mean, saved_var, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var) +y, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var) node = onnx.helper.make_node( 'BatchNormalization', inputs=['x', 's', 'bias', 'mean', 'var'], - outputs=['y', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], + outputs=['y', 'output_mean', 'output_var'], training_mode=training_mode ) # output size: (1, 2, 1, 3) expect(node, inputs=[x, s, bias, mean, var], - outputs=[y, output_mean, output_var, saved_mean, saved_var], + outputs=[y, output_mean, output_var], name='test_batchnorm_example_training_mode') # input size: (2, 3, 4, 5) @@ -2026,20 +2024,20 @@ var = np.random.rand(3).astype(np.float32) training_mode = 1 momentum = 0.9 epsilon = 1e-2 -y, saved_mean, saved_var, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var, momentum, - epsilon) +y, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var, momentum, + epsilon) node = onnx.helper.make_node( 'BatchNormalization', inputs=['x', 's', 'bias', 'mean', 'var'], - outputs=['y', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], + outputs=['y', 'output_mean', 'output_var'], epsilon=epsilon, training_mode=training_mode ) # output size: (2, 3, 4, 5) expect(node, inputs=[x, s, bias, mean, var], - outputs=[y, output_mean, output_var, saved_mean, saved_var], + outputs=[y, output_mean, output_var], name='test_batchnorm_epsilon_training_mode') ``` diff --git a/docs/TestCoverage.md b/docs/TestCoverage.md index b33b66dcf42..8f4bb4582ec 100644 --- a/docs/TestCoverage.md +++ b/docs/TestCoverage.md @@ -1376,18 +1376,18 @@ var = np.array([1, 1.5]).astype(np.float32) # using np.bool(1) while generating test data with "'bool' object has no attribute 'dtype'" # working around by using np.byte(1).astype(bool) training_mode = 1 -y, saved_mean, saved_var, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var) +y, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var) node = onnx.helper.make_node( 'BatchNormalization', inputs=['x', 's', 'bias', 'mean', 'var'], - outputs=['y', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], + outputs=['y', 'output_mean', 'output_var'], training_mode=training_mode ) # output size: (1, 2, 1, 3) expect(node, inputs=[x, s, bias, mean, var], - outputs=[y, output_mean, output_var, saved_mean, saved_var], + outputs=[y, output_mean, output_var], name='test_batchnorm_example_training_mode') # input size: (2, 3, 4, 5) @@ -1399,20 +1399,20 @@ var = np.random.rand(3).astype(np.float32) training_mode = 1 momentum = 0.9 epsilon = 1e-2 -y, saved_mean, saved_var, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var, momentum, - epsilon) +y, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var, momentum, + epsilon) node = onnx.helper.make_node( 'BatchNormalization', inputs=['x', 's', 'bias', 'mean', 'var'], - outputs=['y', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], + outputs=['y', 'output_mean', 'output_var'], epsilon=epsilon, training_mode=training_mode ) # output size: (2, 3, 4, 5) expect(node, inputs=[x, s, bias, mean, var], - outputs=[y, output_mean, output_var, saved_mean, saved_var], + outputs=[y, output_mean, output_var], name='test_batchnorm_epsilon_training_mode') ``` diff --git a/onnx/backend/test/case/node/batchnorm.py b/onnx/backend/test/case/node/batchnorm.py index ab6367b37f1..fec39af7ca8 100644 --- a/onnx/backend/test/case/node/batchnorm.py +++ b/onnx/backend/test/case/node/batchnorm.py @@ -29,7 +29,7 @@ def _batchnorm_training_mode(x, s, bias, mean, var, momentum=0.9, epsilon=1e-5): output_mean = mean * momentum + saved_mean * (1 - momentum) output_var = var * momentum + saved_var * (1 - momentum) y = _batchnorm_test_mode(x, s, bias, saved_mean, saved_var, epsilon=epsilon) - return y.astype(np.float32), saved_mean, saved_var, output_mean, output_var + return y.astype(np.float32), output_mean, output_var class BatchNormalization(Base): @@ -84,18 +84,18 @@ def export_train(): # type: () -> None # using np.bool(1) while generating test data with "'bool' object has no attribute 'dtype'" # working around by using np.byte(1).astype(bool) training_mode = 1 - y, saved_mean, saved_var, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var) + y, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var) node = onnx.helper.make_node( 'BatchNormalization', inputs=['x', 's', 'bias', 'mean', 'var'], - outputs=['y', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], + outputs=['y', 'output_mean', 'output_var'], training_mode=training_mode ) # output size: (1, 2, 1, 3) expect(node, inputs=[x, s, bias, mean, var], - outputs=[y, output_mean, output_var, saved_mean, saved_var], + outputs=[y, output_mean, output_var], name='test_batchnorm_example_training_mode') # input size: (2, 3, 4, 5) @@ -107,18 +107,18 @@ def export_train(): # type: () -> None training_mode = 1 momentum = 0.9 epsilon = 1e-2 - y, saved_mean, saved_var, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var, momentum, - epsilon) + y, output_mean, output_var = _batchnorm_training_mode(x, s, bias, mean, var, momentum, + epsilon) node = onnx.helper.make_node( 'BatchNormalization', inputs=['x', 's', 'bias', 'mean', 'var'], - outputs=['y', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], + outputs=['y', 'output_mean', 'output_var'], epsilon=epsilon, training_mode=training_mode ) # output size: (2, 3, 4, 5) expect(node, inputs=[x, s, bias, mean, var], - outputs=[y, output_mean, output_var, saved_mean, saved_var], + outputs=[y, output_mean, output_var], name='test_batchnorm_epsilon_training_mode') diff --git a/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/model.onnx b/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/model.onnx index f8dd6d32ce3..53523e7657a 100644 --- a/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/model.onnx +++ b/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/model.onnx @@ -1,12 +1,11 @@ - backend-test:ž -‡ + backend-test:Ó +p x s bias mean vary output_mean -output_var -saved_mean saved_var"BatchNormalization* +output_var"BatchNormalization* epsilon ×#< * training_mode $test_batchnorm_epsilon_training_modeZ @@ -45,14 +44,5 @@ saved_mean saved_var"BatchNormalization* output_var - -b - -saved_mean - - -b - saved_var -  B \ No newline at end of file diff --git a/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/test_data_set_0/output_3.pb b/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/test_data_set_0/output_3.pb deleted file mode 100644 index b6f0d22db61..00000000000 --- a/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/test_data_set_0/output_3.pb +++ /dev/null @@ -1,2 +0,0 @@ -B -saved_meanJ "`Þ= 4t>øO= \ No newline at end of file diff --git a/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/test_data_set_0/output_4.pb b/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/test_data_set_0/output_4.pb deleted file mode 100644 index 9d7c5f6aeca..00000000000 --- a/onnx/backend/test/data/node/test_batchnorm_epsilon_training_mode/test_data_set_0/output_4.pb +++ /dev/null @@ -1 +0,0 @@ -B saved_varJ 0+X?- –?ùž? \ No newline at end of file diff --git a/onnx/backend/test/data/node/test_batchnorm_example_training_mode/model.onnx b/onnx/backend/test/data/node/test_batchnorm_example_training_mode/model.onnx index 3978e1f4b3c..ca4d6881f01 100644 --- a/onnx/backend/test/data/node/test_batchnorm_example_training_mode/model.onnx +++ b/onnx/backend/test/data/node/test_batchnorm_example_training_mode/model.onnx @@ -1,12 +1,11 @@ - backend-test:Š -t + backend-test:À +] x s bias mean vary output_mean -output_var -saved_mean saved_var"BatchNormalization* +output_var"BatchNormalization* training_mode $test_batchnorm_example_training_modeZ x  @@ -43,14 +42,5 @@ saved_mean saved_var"BatchNormalization* output_var - -b - -saved_mean - - -b - saved_var -  B \ No newline at end of file diff --git a/onnx/backend/test/data/node/test_batchnorm_example_training_mode/test_data_set_0/output_3.pb b/onnx/backend/test/data/node/test_batchnorm_example_training_mode/test_data_set_0/output_3.pb deleted file mode 100644 index 0593c9a9e91..00000000000 Binary files a/onnx/backend/test/data/node/test_batchnorm_example_training_mode/test_data_set_0/output_3.pb and /dev/null differ diff --git a/onnx/backend/test/data/node/test_batchnorm_example_training_mode/test_data_set_0/output_4.pb b/onnx/backend/test/data/node/test_batchnorm_example_training_mode/test_data_set_0/output_4.pb deleted file mode 100644 index df97b713232..00000000000 --- a/onnx/backend/test/data/node/test_batchnorm_example_training_mode/test_data_set_0/output_4.pb +++ /dev/null @@ -1 +0,0 @@ -B saved_varJ«ª*?«ª*? \ No newline at end of file diff --git a/onnx/checker.cc b/onnx/checker.cc index 6cdb77ea5e0..347168dd369 100644 --- a/onnx/checker.cc +++ b/onnx/checker.cc @@ -619,7 +619,7 @@ void check_graph(const GraphProto& graph, const CheckerContext& ctx, const Lexic "Nodes in a graph must be topologically sorted, however input '", input, "' of node: \n", - ProtoDebugString(node), + "name: ", node.name(), " OpType: ", node.op_type(), "\n is not output of any previous nodes."); } } @@ -633,7 +633,8 @@ void check_graph(const GraphProto& graph, const CheckerContext& ctx, const Lexic } ONNX_CATCH(ValidationError & ex) { ONNX_HANDLE_EXCEPTION([&]() { - ex.AppendContext("Bad node spec: " + ProtoDebugString(node)); + ex.AppendContext( + "Bad node spec for node. Name: " + node.name() + " OpType: " + node.op_type()); ONNX_THROW_EX(ex); }); } @@ -732,7 +733,7 @@ void check_function(const FunctionProto& function, const CheckerContext& ctx, co "Nodes in a function must be topologically sorted, however input '", input, "' of node: \n", - ProtoDebugString(node), + "Name: ", node.name(), " OpType: ", node.op_type(), "\n is neither output of any previous nodes nor input of the function."); } } diff --git a/onnx/common/version.h b/onnx/common/version.h index e0270e57a4b..37a535fbf9f 100644 --- a/onnx/common/version.h +++ b/onnx/common/version.h @@ -8,6 +8,6 @@ namespace ONNX_NAMESPACE { // Represents the most recent release version. Updated with every release. -constexpr const char* LAST_RELEASE_VERSION = "1.8.201"; +constexpr const char* LAST_RELEASE_VERSION = "1.8.202"; } diff --git a/onnx/defs/nn/defs.cc b/onnx/defs/nn/defs.cc index 5cbf67c83a1..23f5de572d6 100644 --- a/onnx/defs/nn/defs.cc +++ b/onnx/defs/nn/defs.cc @@ -1598,19 +1598,21 @@ statistics in inference mode (training_mode=False, default), and the running statistics in training mode (training_mode=True). There are multiple cases for the number of outputs, which we list below: -Output case #1: Y, running_mean, running_var, current_mean, current_var (training_mode=True) +Output case #1: Y, running_mean, running_var (training_mode=True) Output case #2: Y (training_mode=False) When training_mode=False, extra outputs are invalid. The outputs are updated as follows when training_mode=True: ``` -current_mean = ReduceMean(X, axis=all_except_channel_index) -current_var = ReduceVar(X, axis=all_except_channel_index) - running_mean = input_mean * momentum + current_mean * (1 - momentum) running_var = input_var * momentum + current_var * (1 - momentum) Y = (X - current_mean) / sqrt(current_var + epsilon) * scale + B + +where: + +current_mean = ReduceMean(X, axis=all_except_channel_index) +current_var = ReduceVar(X, axis=all_except_channel_index) ``` When training_mode=False: @@ -1626,7 +1628,7 @@ ONNX_OPERATOR_SET_SCHEMA( BatchNormalization, 14, OpSchema() - .NumOutputs({1, 5}) + .NumOutputs({1, 3}) .SetDoc(BatchNormalization_ver14_doc + GenerateOptionalArgumentsDoc()) .Attr( "epsilon", @@ -1722,26 +1724,6 @@ ONNX_OPERATOR_SET_SCHEMA( true, 1, OpSchema::NonDifferentiable) - .Output( - 3, - "current_mean", - "Current mean used during training to speed up gradient " - "computation.", - "T", - OpSchema::Optional, - true, - 1, - OpSchema::NonDifferentiable) - .Output( - 4, - "current_var", - "Current variance used during training to speed up " - "gradient computation.", - "T", - OpSchema::Optional, - true, - 1, - OpSchema::NonDifferentiable) .TypeConstraint( "T", {"tensor(float16)", "tensor(float)", "tensor(double)"}, @@ -1760,9 +1742,9 @@ ONNX_OPERATOR_SET_SCHEMA( if (ctx.getAttribute("training_mode") && static_cast(ctx.getAttribute("training_mode")->i()) != 0) { - if (ctx.getNumOutputs() != 5) + if (ctx.getNumOutputs() != 3) fail_shape_inference( - "This number of op outputs should be 5 when Training_mode = True, but it is not."); + "This number of op outputs should be 3 when Training_mode = True, but it is not."); } else { if (ctx.getNumOutputs() != 1) fail_shape_inference( @@ -1780,16 +1762,6 @@ ONNX_OPERATOR_SET_SCHEMA( propagateElemTypeFromInputToOutput(ctx, 0, 2); updateOutputShape(ctx, 2, outputs_shape); } - - if (ctx.getNumOutputs() > 3) { - propagateElemTypeFromInputToOutput(ctx, 0, 3); - updateOutputShape(ctx, 3, outputs_shape); - } - - if (ctx.getNumOutputs() > 4) { - propagateElemTypeFromInputToOutput(ctx, 0, 4); - updateOutputShape(ctx, 4, outputs_shape); - } } })); diff --git a/onnx/proto_utils.h b/onnx/proto_utils.h index d235731a795..b9064b09503 100644 --- a/onnx/proto_utils.h +++ b/onnx/proto_utils.h @@ -20,7 +20,10 @@ namespace ONNX_NAMESPACE { #ifdef ONNX_USE_LITE_PROTO using ::google::protobuf::MessageLite; inline std::string ProtoDebugString(const MessageLite& proto) { - return proto.SerializeAsString(); + // Since the MessageLite interface does not support reflection, there is very + // little information that this and similar methods can provide. + // But when using lite proto this is the best we can provide. + return proto.ShortDebugString(); } #else using ::google::protobuf::Message; diff --git a/onnx/test/shape_inference_test.py b/onnx/test/shape_inference_test.py index 50fbd398db4..acaf76073ad 100644 --- a/onnx/test/shape_inference_test.py +++ b/onnx/test/shape_inference_test.py @@ -3463,13 +3463,11 @@ def test_batch_norm_train(self): # type: () -> None ('input_mean', TensorProto.FLOAT, (4,)), ('input_var', TensorProto.FLOAT, (4,))], [make_node('BatchNormalization', ['x', 'scale', 'b', 'input_mean', 'input_var'], - ['out', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], training_mode=1)], + ['out', 'output_mean', 'output_var'], training_mode=1)], []) self._assert_inferred(graph, [make_tensor_value_info('out', TensorProto.FLOAT, (3, 4, 5, 6, 7)), # type: ignore make_tensor_value_info('output_mean', TensorProto.FLOAT, (4,)), # type: ignore make_tensor_value_info('output_var', TensorProto.FLOAT, (4,)), # type: ignore - make_tensor_value_info('saved_mean', TensorProto.FLOAT, (4,)), # type: ignore - make_tensor_value_info('saved_var', TensorProto.FLOAT, (4,)) # type: ignore ]) def test_batch_norm_train_dim_param(self): # type: () -> None @@ -3480,13 +3478,11 @@ def test_batch_norm_train_dim_param(self): # type: () -> None ('input_mean', TensorProto.FLOAT, ('C',)), ('input_var', TensorProto.FLOAT, ('C',))], [make_node('BatchNormalization', ['x', 'scale', 'b', 'input_mean', 'input_var'], - ['out', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], training_mode=1)], + ['out', 'output_mean', 'output_var'], training_mode=1)], []) self._assert_inferred(graph, [make_tensor_value_info('out', TensorProto.FLOAT, (3, 'C', 5, 6, 7)), # type: ignore make_tensor_value_info('output_mean', TensorProto.FLOAT, ('C',)), # type: ignore make_tensor_value_info('output_var', TensorProto.FLOAT, ('C',)), # type: ignore - make_tensor_value_info('saved_mean', TensorProto.FLOAT, ('C',)), # type: ignore - make_tensor_value_info('saved_var', TensorProto.FLOAT, ('C',)) # type: ignore ]) def test_batch_norm_test(self): # type: () -> None @@ -3521,13 +3517,11 @@ def test_batch_norm_train_no_shape(self): # type: () -> None ('input_mean', TensorProto.FLOAT, ('C',)), ('input_var', TensorProto.FLOAT, ('C',))], [make_node('BatchNormalization', ['x', 'scale', 'b', 'input_mean', 'input_var'], - ['out', 'output_mean', 'output_var', 'saved_mean', 'saved_var'], training_mode=1)], + ['out', 'running_mean', 'running_var'], training_mode=1)], []) self._assert_inferred(graph, [make_tensor_value_info('out', TensorProto.FLOAT, None), # type: ignore - make_tensor_value_info('output_mean', TensorProto.FLOAT, ('C',)), # type: ignore - make_tensor_value_info('output_var', TensorProto.FLOAT, ('C',)), # type: ignore - make_tensor_value_info('saved_mean', TensorProto.FLOAT, ('C',)), # type: ignore - make_tensor_value_info('saved_var', TensorProto.FLOAT, ('C',)), # type: ignore + make_tensor_value_info('running_mean', TensorProto.FLOAT, ('C',)), # type: ignore + make_tensor_value_info('running_var', TensorProto.FLOAT, ('C',)), # type: ignore ])