microsoft · jchen351 · Nov 15, 2022 · Nov 10, 2022 · Nov 11, 2022 · Nov 11, 2022
diff --git a/onnxruntime/core/providers/cpu/ml/zipmap.cc b/onnxruntime/core/providers/cpu/ml/zipmap.cc
@@ -78,15 +78,15 @@ common::Status ZipMapOp::Compute(OpKernelContext* context) const {
     if (y_data == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch");
 
     //auto* y_data = Y->MutableData<std::vector<std::map<std::string, float>>>();
-    y_data->resize(batch_size);
+    y_data->resize(onnxruntime::narrow<size_t>(batch_size));
     int64_t current_weight_0 = 0;
     for (int64_t n = 0; n < batch_size; n++) {
       std::map<std::string, float> map1;
       for (int64_t j = 0; j < features_per_batch; j++) {
-        map1[classlabels_strings_[j]] = x_data[current_weight_0 + j];
+        map1[classlabels_strings_[onnxruntime::narrow<size_t>(j)]] = x_data[current_weight_0 + j];
       }
       current_weight_0 += features_per_batch;
-      (*y_data)[n] = std::move(map1);
+      (*y_data)[onnxruntime::narrow<size_t>(n)] = std::move(map1);
     }
   } else {
     if (features_per_batch != static_cast<int64_t>(classlabels_int64s_.size())) {
@@ -98,7 +98,7 @@ common::Status ZipMapOp::Compute(OpKernelContext* context) const {
     auto* y_data = context->Output<std::vector<std::map<std::int64_t, float>>>(0);
     if (y_data == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch");
     //auto* y_data = Y->MutableData<std::vector<std::map<int64_t, float>>>();
-    y_data->resize(batch_size);
+    y_data->resize(onnxruntime::narrow<size_t>(batch_size));
     int64_t current_weight_0 = 0;
     for (int n = 0; n < batch_size; n++) {
       std::map<int64_t, float> map2;

diff --git a/onnxruntime/core/providers/cpu/nn/batch_norm.h b/onnxruntime/core/providers/cpu/nn/batch_norm.h
@@ -76,8 +76,8 @@ class BatchNorm : public OpKernel {
     Tensor* Y = p_op_kernel_context->Output(0, x_shape);
 
     const auto& dims_vec = x_shape.GetDims();
-    const size_t N = dims_vec[0];
-    const size_t C = dims_vec[1];  // assume NCHW as per the spec
+    const size_t N = onnxruntime::narrow<size_t>(dims_vec[0]);
+    const size_t C = onnxruntime::narrow<size_t>(dims_vec[1]);  // assume NCHW as per the spec
 
     // calculate sample_size (per individual channel)
     size_t sample_size = 1;

diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc
@@ -58,14 +58,14 @@ Status ConvTranspose<float>::PrePack(const Tensor& tensor, int input_idx, Alloca
     }
     filter_shape_ = tensor.Shape();
 
-    const size_t K = static_cast<size_t>(filter_shape_[0]) / conv_transpose_attrs_.group;
-    const size_t N = filter_shape_.SizeFromDimension(1);
+    const size_t K = static_cast<size_t>(filter_shape_[0]) / onnxruntime::narrow<size_t>(conv_transpose_attrs_.group);
+    const size_t N = onnxruntime::narrow<size_t>(filter_shape_.SizeFromDimension(1));
     auto packed_elements_per_group = N * K;
     if (packed_elements_per_group == 0 || N == 1 || K == 1) {  // No need for single row or single col case
       return Status::OK();
     }
 
-    size_t packed_filter_data_size = packed_elements_per_group * sizeof(float) * conv_transpose_attrs_.group;
+    size_t packed_filter_data_size = SafeInt<size_t>(packed_elements_per_group) * sizeof(float) * conv_transpose_attrs_.group;
     auto* packed_filter_data = alloc->Alloc(packed_filter_data_size);
 
     // Initialize memory to 0 as there could be some padding associated with pre-packed
@@ -260,9 +260,9 @@ Status ConvTranspose<float>::DoConvTranspose(OpKernelContext* context, bool dyna
       math::Gemm<float>(
           p.F ? CblasTrans : CblasNoTrans,
           CblasNoTrans,
-          kernel_dim,
-          input_image_size,
-          p.num_input_channels / conv_transpose_attrs_.group,
+          onnxruntime::narrow<ptrdiff_t>(kernel_dim),
+          onnxruntime::narrow<ptrdiff_t>(input_image_size),
+          onnxruntime::narrow<ptrdiff_t>( p.num_input_channels / conv_transpose_attrs_.group),
           1,
           filter_data + group_id * W_offset,
           Xdata + group_id * X_offset,
@@ -306,8 +306,8 @@ Status ConvTranspose<float>::DoConvTranspose(OpKernelContext* context, bool dyna
     }
 
     if (p.B != nullptr) {
-      auto Ymatrix = EigenMatrixMap<float>(Ydata, output_size, p.num_output_channels);
-      auto Bvec = ConstEigenVectorMap<float>(p.B->Data<float>(), p.num_output_channels);
+      auto Ymatrix = EigenMatrixMap<float>(Ydata, onnxruntime::narrow<size_t>(output_size), onnxruntime::narrow<size_t>(p.num_output_channels));
+      auto Bvec = ConstEigenVectorMap<float>(p.B->Data<float>(), onnxruntime::narrow<size_t>(p.num_output_channels));
       Ymatrix.rowwise() += Bvec.transpose();
     }
 

diff --git a/onnxruntime/core/providers/cpu/nn/flatten.h b/onnxruntime/core/providers/cpu/nn/flatten.h
@@ -22,6 +22,7 @@ class Flatten final : public OpKernel {
     if (X == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch");
 
     const TensorShape& X_shape = X->Shape();
+
     auto axis = axis_;
 
     // Valid axis range is [-rank, rank] instead of [-rank, rank-1], add additional check to only handle neg axis case.
@@ -30,8 +31,7 @@ class Flatten final : public OpKernel {
     }
 
     ORT_ENFORCE(gsl::narrow_cast<int64_t>(X_shape.NumDimensions()) >= axis, "The rank of input tensor must be >= axis");
-
-    Tensor* Y = context->Output(0, {X_shape.SizeToDimension(axis), X_shape.SizeFromDimension(axis)});
+    Tensor* Y = context->Output(0, {X_shape.SizeToDimension(onnxruntime::narrow<size_t>(axis)), X_shape.SizeFromDimension(onnxruntime::narrow<size_t>(axis))});
 
     CopyCpuTensor(X, Y);
 

diff --git a/onnxruntime/core/providers/cpu/nn/instance_norm.cc b/onnxruntime/core/providers/cpu/nn/instance_norm.cc
@@ -29,11 +29,11 @@ Status InstanceNorm<float>::Compute(OpKernelContext* p_op_kernel_context) const
   Tensor* Y = p_op_kernel_context->Output(0, x_shape);
 
   for (auto i = 0; i < N * C; ++i) {
-    ConstEigenVectorArrayMap<float> Xi(input->Data<float>() + W * i, W);
+    ConstEigenVectorArrayMap<float> Xi(input->Data<float>() + W * i, onnxruntime::narrow<size_t>(W));
     const float Xi_mean = Xi.mean();
     const float squared_norm = (Xi - Xi_mean).matrix().squaredNorm();
     const float inv_stdev = 1.0f / std::sqrt(squared_norm / W + epsilon_);
-    EigenVectorArrayMap<float> Yi(Y->MutableData<float>() + W * i, W);
+    EigenVectorArrayMap<float> Yi(Y->MutableData<float>() + W * i, onnxruntime::narrow<size_t>(W));
     const float channel_scale = inv_stdev * scale->Data<float>()[i % C];
     const float channel_shift = B->Data<float>()[i % C] - Xi_mean * channel_scale;
     Yi = Xi * channel_scale + channel_shift;

diff --git a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc
@@ -30,8 +30,8 @@ Status ComputeImpl(OpKernelContext* p_ctx, int64_t orig_axis, float epsilon, boo
 
   const TensorShape& x_shape = X->Shape();
   const int64_t axis = HandleNegativeAxis(orig_axis, x_shape.NumDimensions());
-  auto norm_count = x_shape.SizeToDimension(axis);
-  auto norm_size = x_shape.SizeFromDimension(axis);
+  auto norm_count = x_shape.SizeToDimension(onnxruntime::narrow<size_t>(axis));
+  auto norm_size = x_shape.SizeFromDimension(onnxruntime::narrow<size_t>(axis));
 
   const auto scale_size = scale->Shape().Size();
   const auto bias_size = (bias_data) ? bias->Shape().Size() : 0;

diff --git a/onnxruntime/core/providers/cpu/nn/lp_norm.cc b/onnxruntime/core/providers/cpu/nn/lp_norm.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <core/common/safeint.h>
 #include "core/providers/cpu/nn/lp_norm.h"
 #include "core/util/math_cpuonly.h"
 #include "core/providers/common.h"
@@ -32,8 +33,8 @@ void DoNormalizeP2(
     const int64_t sf) {
   for (int i = 0; i < n; ++i) {
     auto base = (i / sf) * sf * m + (i % sf);
-    ConstStridedVec<T> xVec(xData + base, 1, m, InnerStride(sf));
-    StridedVec<T> yVec(yData + base, 1, m, InnerStride(sf));
+    ConstStridedVec<T> xVec(xData + base, 1, onnxruntime::narrow<size_t>(m), InnerStride(onnxruntime::narrow<size_t>(sf)));
+    StridedVec<T> yVec(yData + base, 1, onnxruntime::narrow<size_t>(m), InnerStride(onnxruntime::narrow<size_t>(sf)));
 
     auto norm = xVec.template lpNorm<2>();
     if (norm != 0) {
@@ -54,8 +55,8 @@ void DoNormalizeP1(
     const int64_t sf) {
   for (int i = 0; i < n; ++i) {
     auto base = (i / sf) * sf * m + (i % sf);
-    ConstStridedVec<T> xVec(xData + base, 1, m, InnerStride(sf));
-    StridedVec<T> yVec(yData + base, 1, m, InnerStride(sf));
+    ConstStridedVec<T> xVec(xData + base, 1, onnxruntime::narrow<size_t>(m), InnerStride(onnxruntime::narrow<size_t>(sf)));
+    StridedVec<T> yVec(yData + base, 1, onnxruntime::narrow<size_t>(m), InnerStride(onnxruntime::narrow<size_t>(sf)));
 
     auto norm = xVec.template lpNorm<1>();
     if (norm != 0) {
@@ -74,9 +75,9 @@ Status LpNorm<T>::Compute(OpKernelContext* p_op_kernel_context) const {
   Tensor* output = p_op_kernel_context->Output(0, input_shape);
 
   const auto canonical_axis = HandleNegativeAxis(axis_, static_cast<int64_t>(input_shape.NumDimensions()));
-  const int64_t m = input_shape.GetDims()[canonical_axis];
+  const int64_t m = input_shape.GetDims()[onnxruntime::narrow<size_t>(canonical_axis)];
   const int64_t n = input_shape.Size() / m;
-  const int64_t sf = input_shape.SizeFromDimension(canonical_axis + 1);
+  const int64_t sf = input_shape.SizeFromDimension(SafeInt<size_t>(canonical_axis) + 1);
 
   if (p_ == 1) {
     DoNormalizeP1(input->Data<T>(), output->MutableData<T>(), m, n, sf);

diff --git a/onnxruntime/core/providers/cpu/nn/pool.cc b/onnxruntime/core/providers/cpu/nn/pool.cc
@@ -78,23 +78,23 @@ Status Pool<T, PoolType>::Compute(OpKernelContext* context) const {
 
   switch (kernel_shape.size()) {
     case 1: {
-      RunLoop<Pool1DTask<T, PoolType>>(tp, total_channels,
+      RunLoop<Pool1DTask<T, PoolType>>(tp, onnxruntime::narrow<size_t>(total_channels),
                                        {X_data, Y_data, x_step, y_step, pooled_height, stride_h(), height, kernel_shape,
                                         pads, pool_context_, pool_attrs_});
 
       break;
     }
 
     case 2: {
-      RunLoop<Pool2DTask<T, PoolType>>(tp, total_channels,
+      RunLoop<Pool2DTask<T, PoolType>>(tp, onnxruntime::narrow<size_t>(total_channels),
                                        {X_data, Y_data, x_step, y_step, pooled_height, pooled_width, stride_h(),
                                         stride_w(), height, width, kernel_shape, pads, pool_context_, pool_attrs_});
 
       break;
     }
     case 3: {
       RunLoop<Pool3DTask<T, PoolType>>(
-          tp, total_channels,
+          tp, onnxruntime::narrow<size_t>(total_channels),
           {X_data, Y_data, x_step, y_step, pooled_height, pooled_width, pooled_depth, stride_h(), stride_w(),
            stride_d(), height, width, depth, kernel_shape, pads, pool_context_, pool_attrs_});
 
@@ -212,7 +212,7 @@ Status MaxPoolV8::ComputeImpl(OpKernelContext* context) const {
       int64_t y_step = pooled_height;
       const int64_t dilation_h = pool_attrs_.dilations[0];
 
-      RunLoop<MaxPool1DTask<T>>(tp, total_channels,
+      RunLoop<MaxPool1DTask<T>>(tp, onnxruntime::narrow<size_t>(total_channels),
                                 {X_data, Y_data, I_data, x_step, y_step, dilation_h, pooled_height, stride_h(),
                                  height, kernel_shape, pads});
       break;
@@ -224,7 +224,7 @@ Status MaxPoolV8::ComputeImpl(OpKernelContext* context) const {
       const int64_t dilation_h = pool_attrs_.dilations[0];
       const int64_t dilation_w = pool_attrs_.dilations[1];
       RunLoop<MaxPool2DTask<T>>(
-          tp, total_channels,
+          tp, onnxruntime::narrow<size_t>(total_channels),
           {X_data, Y_data, I_data, x_step, y_step, dilation_h, dilation_w, pooled_height, pooled_width, stride_h(),
            stride_w(), height, width, kernel_shape, pads, pool_attrs_.storage_order});
       break;
@@ -235,7 +235,7 @@ Status MaxPoolV8::ComputeImpl(OpKernelContext* context) const {
       const int64_t dilation_h = pool_attrs_.dilations[0];
       const int64_t dilation_w = pool_attrs_.dilations[1];
       const int64_t dilation_d = pool_attrs_.dilations[2];
-      RunLoop<MaxPool3DTask<T>>(tp, total_channels,
+      RunLoop<MaxPool3DTask<T>>(tp, onnxruntime::narrow<size_t>(total_channels),
                                 {X_data, Y_data, I_data, x_step, y_step,
                                  dilation_h, dilation_w, dilation_d, pooled_height, pooled_width,
                                  pooled_depth, stride_h(), stride_w(), stride_d(), height,

diff --git a/onnxruntime/core/providers/cpu/nn/shrink.cc b/onnxruntime/core/providers/cpu/nn/shrink.cc
@@ -55,7 +55,7 @@ Status ShrinkImpl(const Tensor* input, Tensor* output, float bias, float lambd)
 
 template <>
 Status ShrinkImpl<MLFloat16>(const Tensor* input, Tensor* output, float bias, float lambd) {
-  const auto& span = gsl::make_span(input->Data<MLFloat16>(), input->Shape().Size());
+  const auto& span = gsl::make_span(input->Data<MLFloat16>(), onnxruntime::narrow<size_t>(input->Shape().Size()));
   auto* output_data = output->MutableData<MLFloat16>();
   std::transform(span.begin(), span.end(), output_data, [bias, lambd](const MLFloat16& val) {
     float fl = math::halfToFloat(val.val);
@@ -66,7 +66,7 @@ Status ShrinkImpl<MLFloat16>(const Tensor* input, Tensor* output, float bias, fl
 
 template <>
 Status ShrinkImpl<BFloat16>(const Tensor* input, Tensor* output, float bias, float lambd) {
-  const auto& span = gsl::make_span(input->Data<BFloat16>(), input->Shape().Size());
+  const auto& span = gsl::make_span(input->Data<BFloat16>(), onnxruntime::narrow<size_t>(input->Shape().Size()));
   auto* output_data = output->MutableData<BFloat16>();
   std::transform(span.begin(), span.end(), output_data, [bias, lambd](const BFloat16& val) {
     float fl = val.ToFloat();

diff --git a/onnxruntime/core/providers/cpu/nn/string_normalizer.cc b/onnxruntime/core/providers/cpu/nn/string_normalizer.cc
@@ -307,14 +307,14 @@ Status StringNormalizer::Compute(OpKernelContext* ctx) const {
       return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
                     "Single dimension value must be greater than 0");
     }
-    C = input_dims[0];
+    C = onnxruntime::narrow<size_t>(input_dims[0]);
   } else if (input_dims.size() == 2) {
     if (input_dims[0] != 1 || input_dims[1] < 1) {
       return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
                     "Input dimensions are either[C > 0] or [1][C > 0] allowed");
     }
     N = 1;
-    C = input_dims[1];
+    C = onnxruntime::narrow<size_t>(input_dims[1]);
   } else {
     return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
                   "Input dimensions are either[C > 0] or [1][C > 0] allowed");

diff --git a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc
@@ -8,6 +8,7 @@
 
 #include <functional>
 #include <unordered_map>
+#include <core/common/safeint.h>
 
 namespace onnxruntime {
 
@@ -145,8 +146,8 @@ struct TfIdfVectorizer::Impl {
     assert(ngram_id != 0);
     --ngram_id;
     assert(ngram_id < ngram_indexes_.size());
-    auto output_idx = static_cast<int64_t>(row_num) * output_size_ + ngram_indexes_[ngram_id];
-    assert(static_cast<size_t>(output_idx) < frequencies.size());
+    size_t output_idx = row_num * output_size_ + SafeInt<size_t>(ngram_indexes_[ngram_id]);
+    assert(output_idx < frequencies.size());
     ++frequencies[output_idx];
   }
 };
@@ -196,7 +197,7 @@ TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), imp
                 "Negative ngram_indexes values are not allowed");
     // Set output size to max output index + 1;
     auto greatest_hit = std::max_element(impl_->ngram_indexes_.begin(), impl_->ngram_indexes_.end());
-    impl_->output_size_ = *greatest_hit + 1;
+    impl_->output_size_ = SafeInt<size_t>(*greatest_hit) + 1;
   }
 
   status = info.GetAttrsAsSpan("weights", impl_->weights_);
@@ -221,12 +222,12 @@ TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), imp
   const auto total_items = (pool_strings.empty()) ? pool_int64s.size() : pool_strings.size();
   size_t ngram_id = 1;  // start with 1, 0 - means no n-gram
   // Load into dictionary only required gram sizes
-  const size_t min_gram_length = impl_->min_gram_length_;
-  const size_t max_gram_length = impl_->max_gram_length_;
+  const size_t min_gram_length = onnxruntime::narrow<size_t>(impl_->min_gram_length_);
+  const size_t max_gram_length = onnxruntime::narrow<size_t>(impl_->max_gram_length_);
   size_t ngram_size = 1;
   for (size_t i = 0; i < impl_->ngram_counts_.size(); ++i) {
-    size_t start_idx = impl_->ngram_counts_[i];
-    size_t end_idx = ((i + 1) < impl_->ngram_counts_.size()) ? impl_->ngram_counts_[i + 1] : total_items;
+    size_t start_idx = onnxruntime::narrow<size_t>(impl_->ngram_counts_[i]);
+    size_t end_idx = onnxruntime::narrow<size_t>((i + 1) < impl_->ngram_counts_.size() ? impl_->ngram_counts_[i + 1] : total_items);
     ORT_ENFORCE(end_idx >= start_idx && end_idx <= total_items,
                 "n-gram counts out of bounds for ", std::to_string(ngram_size), "-grams");
     auto items = end_idx - start_idx;
@@ -329,7 +330,7 @@ void TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx, ptrdiff_t row_num, size_
 
     while (ngram_start < ngram_row_end) {
       // We went far enough so no n-grams of any size can be gathered
-      auto at_least_this = AdvanceElementPtr(ngram_start, skip_distance * (start_ngram_size - 1), elem_size);
+      auto at_least_this = AdvanceElementPtr(ngram_start, SafeInt<size_t>(skip_distance) * (start_ngram_size - 1), elem_size);
       if (at_least_this >= ngram_row_end) {
         break;
       }
@@ -384,7 +385,7 @@ void TfIdfVectorizer::ComputeImpl(OpKernelContext* ctx, ptrdiff_t row_num, size_
 Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const {
   auto X = ctx->Input<Tensor>(0);
   auto& input_shape = X->Shape();
-  const size_t total_items = input_shape.Size();
+  const size_t total_items = onnxruntime::narrow<size_t>(input_shape.Size());
 
   int32_t num_rows = 0;
   size_t B = 0;
@@ -396,10 +397,10 @@ Status TfIdfVectorizer::Compute(OpKernelContext* ctx) const {
     assert(total_items == 1);
   } else if (input_dims.size() == 1) {
     num_rows = 1;
-    C = input_dims[0];
+    C = onnxruntime::narrow<size_t>(input_dims[0]);
   } else if (input_dims.size() == 2) {
-    B = input_dims[0];
-    C = input_dims[1];
+    B = onnxruntime::narrow<size_t>(input_dims[0]);
+    C = onnxruntime::narrow<size_t>(input_dims[1]);
     num_rows = static_cast<int32_t>(B);
     if (B < 1) {
       return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,