Skip to content

Commit

Permalink
Merge pull request #9765 from microsoft/user/dwayner/DML1.8forORT1.10
Browse files Browse the repository at this point in the history
Update DirectML 1.5.1 to 1.8.0 for ORT1.10
  • Loading branch information
fdwr committed Nov 20, 2021
2 parents 6856619 + f28d7ec commit 7396689
Show file tree
Hide file tree
Showing 33 changed files with 2,300 additions and 1,083 deletions.
2 changes: 1 addition & 1 deletion cmake/external/dml.cmake
Expand Up @@ -20,7 +20,7 @@ if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
set(NUGET_CONFIG ${PROJECT_SOURCE_DIR}/../NuGet.config)
set(PACKAGES_CONFIG ${PROJECT_SOURCE_DIR}/../packages.config)
get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/../packages ABSOLUTE)
set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.5.1)
set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.8.0)
set(DML_SHARED_LIB DirectML.dll)

# Restore nuget packages, which will pull down the DirectML redist package
Expand Down
Expand Up @@ -122,6 +122,10 @@ namespace Windows::AI::MachineLearning::Adapter
// Operator supports true 64-bit tensors directly, no strides needed.
// So fallback to strided 32-bit only occurs when the device lacks 64-bit support.
bool prefer64BitTensorsDirectly = false;

// The operator supports emulation for uint64/int64 even if the hardware doesn't
// support native uint64/int64 data types.
bool support64BitTensorsViaEmulation = false;
};

using InternalRegistrationInfoMap = std::unordered_map<onnxruntime::KernelDef*, std::shared_ptr<InternalRegistrationInfo>>;
Expand Down
Expand Up @@ -345,6 +345,7 @@ HRESULT STDMETHODCALLTYPE AbiCustomRegistry::RegisterOperatorKernel(
bool supportedWith64BitTensorsVia32BitStrides,
bool supportedWith64BitTensorsVia32BitStridesFromAnyEp,
bool prefer64BitTensorsDirectly,
bool support64BitTensorsViaEmulation,
_In_reads_(constantCpuInputCount) const uint32_t* requiredConstantCpuInputs,
uint32_t constantCpuInputCount) const noexcept
{
Expand Down Expand Up @@ -472,6 +473,7 @@ HRESULT STDMETHODCALLTYPE AbiCustomRegistry::RegisterOperatorKernel(
regInfo->supportedWith64BitTensorsVia32BitStrides = supportedWith64BitTensorsVia32BitStrides;
regInfo->supportedWith64BitTensorsVia32BitStridesFromAnyEp = supportedWith64BitTensorsVia32BitStridesFromAnyEp;
regInfo->prefer64BitTensorsDirectly = prefer64BitTensorsDirectly;
regInfo->support64BitTensorsViaEmulation = support64BitTensorsViaEmulation;

// Only internal operators support usage in DML graphs
if (supportsGraph)
Expand Down Expand Up @@ -546,7 +548,8 @@ HRESULT STDMETHODCALLTYPE AbiCustomRegistry::RegisterOperatorKernel(
requiredConstantCpuInputs ||
supportedWith64BitTensorsVia32BitStrides ||
supportedWith64BitTensorsVia32BitStridesFromAnyEp ||
prefer64BitTensorsDirectly)
prefer64BitTensorsDirectly ||
support64BitTensorsViaEmulation)
{
ORT_THROW_HR(E_INVALIDARG);
}
Expand Down
Expand Up @@ -44,6 +44,7 @@ class AbiCustomRegistry : public WRL::Base<IMLOperatorRegistry, IMLOperatorRegis
bool supportedWith64BitTensorsVia32BitStrides = false,
bool supportedWith64BitTensorsVia32BitStridesFromAnyEp = false,
bool prefer64BitTensorsDirectly = false,
bool support64BitTensorsViaEmulation = false,
_In_reads_(constantCpuInputCount) const uint32_t* requiredConstantCpuInputs = nullptr,
uint32_t constantCpuInputCount = 0) const noexcept override;

Expand Down
Expand Up @@ -20,7 +20,7 @@ DML_TENSOR_DATA_TYPE GetDmlDataTypeFromMlDataTypeNoThrow(MLOperatorTensorDataTyp
case MLOperatorTensorDataType::String: return DML_TENSOR_DATA_TYPE_UNKNOWN;
case MLOperatorTensorDataType::Bool: return DML_TENSOR_DATA_TYPE_UINT8;
case MLOperatorTensorDataType::Float16: return DML_TENSOR_DATA_TYPE_FLOAT16;
case MLOperatorTensorDataType::Double: return DML_TENSOR_DATA_TYPE_UNKNOWN;
case MLOperatorTensorDataType::Double: return DML_TENSOR_DATA_TYPE_FLOAT64;
case MLOperatorTensorDataType::UInt32: return DML_TENSOR_DATA_TYPE_UINT32;
case MLOperatorTensorDataType::UInt64: return DML_TENSOR_DATA_TYPE_UINT64;
case MLOperatorTensorDataType::Complex64: return DML_TENSOR_DATA_TYPE_UNKNOWN;
Expand Down Expand Up @@ -119,7 +119,7 @@ uint32_t GetSupportedDeviceDataTypeMask(IDMLDevice* dmlDevice)
uint32_t deviceTypeMask = 0u;

// Form the bitmask of all supported data types.
for (uint32_t i = 0; i <= DML_TENSOR_DATA_TYPE_INT8; ++i)
for (uint32_t i = 0; i <= DML_TENSOR_DATA_TYPE_INT64; ++i)
{
DML_FEATURE_QUERY_TENSOR_DATA_TYPE_SUPPORT dataTypeQuery = { static_cast<DML_TENSOR_DATA_TYPE>(i) };
DML_FEATURE_DATA_TENSOR_DATA_TYPE_SUPPORT dataTypeSupport = {};
Expand Down
Expand Up @@ -96,4 +96,24 @@ namespace Dml

return minimumImpliedSizeInBytes;
}

template <typename T>
void CastToClampedScalarUnion(DML_TENSOR_DATA_TYPE dataType, T value, DML_SCALAR_UNION* outputValue)
{
switch (dataType)
{
case DML_TENSOR_DATA_TYPE_UINT8: outputValue->UInt8 = clamp_cast<uint8_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_UINT16: outputValue->UInt16 = clamp_cast<uint16_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_UINT32: outputValue->UInt32 = clamp_cast<uint32_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_UINT64: outputValue->UInt64 = clamp_cast<uint64_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_INT8: outputValue->Int8 = clamp_cast<int8_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_INT16: outputValue->Int16 = clamp_cast<int16_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_INT32: outputValue->Int32 = clamp_cast<int32_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_INT64: outputValue->Int64 = clamp_cast<int64_t, T>(value); break;
case DML_TENSOR_DATA_TYPE_FLOAT16: outputValue->Float32 = clamp_cast<float, T>(value); break;
case DML_TENSOR_DATA_TYPE_FLOAT32: outputValue->Float32 = clamp_cast<float, T>(value); break;
case DML_TENSOR_DATA_TYPE_FLOAT64: outputValue->Float64 = clamp_cast<double, T>(value); break;
default: assert(false);
}
}
} // namespace Dml

0 comments on commit 7396689

Please sign in to comment.