-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] More radical feature initialization refactoring #155222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] More radical feature initialization refactoring #155222
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-flang-driver Author: Stanislav Mekhanoshin (rampitec) ChangesFactoring in flang, just have a single fillAMDGPUFeatureMap Patch is 26.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155222.diff 4 Files Affected:
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index a235cccac516b..87de9e6865e71 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -201,8 +201,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
return false;
- // TODO: Should move this logic into TargetParser
- auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
+ auto HasError = fillAMDGPUFeatureMap(CPU, getTriple(), Features);
switch (HasError.first) {
default:
break;
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index cd8dddad05282..f6a9dd0ebc70c 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -253,18 +253,16 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
const TargetOptions &targetOpts,
const llvm::Triple triple) {
llvm::StringRef cpu = targetOpts.cpu;
- llvm::StringMap<bool> implicitFeaturesMap;
- // Get the set of implicit target features
- llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap);
+ llvm::StringMap<bool> FeaturesMap;
// Add target features specified by the user
for (auto &userFeature : targetOpts.featuresAsWritten) {
std::string userKeyString = userFeature.substr(1);
- implicitFeaturesMap[userKeyString] = (userFeature[0] == '+');
+ FeaturesMap[userKeyString] = (userFeature[0] == '+');
}
auto HasError =
- llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap);
+ llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, FeaturesMap);
if (HasError.first) {
unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
"Unsupported feature ID: %0");
@@ -273,9 +271,9 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
}
llvm::SmallVector<std::string> featuresVec;
- for (auto &implicitFeatureItem : implicitFeaturesMap) {
- featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
- implicitFeatureItem.first().str())
+ for (auto &FeatureItem : FeaturesMap) {
+ featuresVec.push_back((llvm::Twine(FeatureItem.second ? "+" : "-") +
+ FeatureItem.first().str())
.str());
}
llvm::sort(featuresVec);
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 2f68d66dee90f..0739207e26b5f 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -184,14 +184,8 @@ LLVM_ABI void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU);
/// Fills Features map with default values for given target GPU
-LLVM_ABI void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
- StringMap<bool> &Features);
-
-/// Inserts wave size feature for given GPU into features map
LLVM_ABI std::pair<FeatureError, StringRef>
-insertWaveSizeFeature(StringRef GPU, const Triple &T,
- StringMap<bool> &Features);
-
+fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap<bool> &Features);
} // namespace AMDGPU
struct BasicSubtargetFeatureKV {
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 480622d6338fc..8de28adcfb110 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -364,8 +364,320 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
}
-void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
- StringMap<bool> &Features) {
+static std::pair<FeatureError, StringRef>
+insertWaveSizeFeature(StringRef GPU, const Triple &T,
+ const StringMap<bool> &DefaultFeatures,
+ StringMap<bool> &Features) {
+ const bool IsNullGPU = GPU.empty();
+ const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32");
+ const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64");
+ const bool HaveWave32 = Features.count("wavefrontsize32");
+ const bool HaveWave64 = Features.count("wavefrontsize64");
+ if (HaveWave32 && HaveWave64)
+ return {AMDGPU::INVALID_FEATURE_COMBINATION,
+ "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"};
+
+ if (HaveWave32 && !IsNullGPU && TargetHasWave64)
+ return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"};
+
+ if (HaveWave64 && !IsNullGPU && TargetHasWave32)
+ return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"};
+
+ // Don't assume any wavesize with an unknown subtarget.
+ // Default to wave32 if target supports both.
+ if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 &&
+ !TargetHasWave64)
+ Features.insert(std::make_pair("wavefrontsize32", true));
+
+ for (const auto &Entry : DefaultFeatures) {
+ if (!Features.count(Entry.getKey()))
+ Features[Entry.getKey()] = Entry.getValue();
+ }
+
+ return {NO_ERROR, StringRef()};
+}
+
+static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
+ StringMap<bool> &Features) {
+ AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
+ switch (Kind) {
+ case GK_GFX1250:
+ Features["ci-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["gfx12-insts"] = true;
+ Features["gfx1250-insts"] = true;
+ Features["bitop3-insts"] = true;
+ Features["prng-inst"] = true;
+ Features["tanh-insts"] = true;
+ Features["tensor-cvt-lut-insts"] = true;
+ Features["transpose-load-f4f6-insts"] = true;
+ Features["bf16-trans-insts"] = true;
+ Features["bf16-cvt-insts"] = true;
+ Features["fp8-conversion-insts"] = true;
+ Features["fp8e5m3-insts"] = true;
+ Features["permlane16-swap"] = true;
+ Features["ashr-pk-insts"] = true;
+ Features["atomic-buffer-pk-add-bf16-inst"] = true;
+ Features["vmem-pref-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-flat-pk-add-16-insts"] = true;
+ Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["atomic-ds-pk-add-16-insts"] = true;
+ Features["setprio-inc-wg-inst"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["wavefrontsize32"] = true;
+ break;
+ case GK_GFX1201:
+ case GK_GFX1200:
+ case GK_GFX12_GENERIC:
+ Features["ci-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dot9-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dot11-insts"] = true;
+ Features["dot12-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["atomic-ds-pk-add-16-insts"] = true;
+ Features["atomic-flat-pk-add-16-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-buffer-pk-add-bf16-inst"] = true;
+ Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["gfx12-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["image-insts"] = true;
+ Features["fp8-conversion-insts"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ break;
+ case GK_GFX1153:
+ case GK_GFX1152:
+ case GK_GFX1151:
+ case GK_GFX1150:
+ case GK_GFX1103:
+ case GK_GFX1102:
+ case GK_GFX1101:
+ case GK_GFX1100:
+ case GK_GFX11_GENERIC:
+ Features["ci-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dot9-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dot12-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["image-insts"] = true;
+ Features["gws"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ break;
+ case GK_GFX1036:
+ case GK_GFX1035:
+ case GK_GFX1034:
+ case GK_GFX1033:
+ case GK_GFX1032:
+ case GK_GFX1031:
+ case GK_GFX1030:
+ case GK_GFX10_3_GENERIC:
+ Features["ci-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memrealtime"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ break;
+ case GK_GFX1012:
+ case GK_GFX1011:
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX1013:
+ case GK_GFX1010:
+ case GK_GFX10_1_GENERIC:
+ Features["dl-insts"] = true;
+ Features["ci-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memrealtime"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ break;
+ case GK_GFX950:
+ Features["bitop3-insts"] = true;
+ Features["fp6bf6-cvt-scale-insts"] = true;
+ Features["fp4-cvt-scale-insts"] = true;
+ Features["bf8-cvt-scale-insts"] = true;
+ Features["fp8-cvt-scale-insts"] = true;
+ Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
+ Features["f32-to-f16bf16-cvt-sr-insts"] = true;
+ Features["prng-inst"] = true;
+ Features["permlane16-swap"] = true;
+ Features["permlane32-swap"] = true;
+ Features["ashr-pk-insts"] = true;
+ Features["dot12-insts"] = true;
+ Features["dot13-insts"] = true;
+ Features["atomic-buffer-pk-add-bf16-inst"] = true;
+ Features["gfx950-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX942:
+ Features["fp8-insts"] = true;
+ Features["fp8-conversion-insts"] = true;
+ if (Kind != GK_GFX950)
+ Features["xf32-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX9_4_GENERIC:
+ Features["gfx940-insts"] = true;
+ Features["atomic-ds-pk-add-16-insts"] = true;
+ Features["atomic-flat-pk-add-16-insts"] = true;
+ Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["gfx90a-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["dot3-insts"] = true;
+ Features["dot4-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["mai-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx8-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["s-memrealtime"] = true;
+ Features["ci-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["wavefrontsize64"] = true;
+ break;
+ case GK_GFX90A:
+ Features["gfx90a-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ [[fallthrough]];
+ case GK_GFX908:
+ Features["dot3-insts"] = true;
+ Features["dot4-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["mai-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX906:
+ Features["dl-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX90C:
+ case GK_GFX909:
+ case GK_GFX904:
+ case GK_GFX902:
+ case GK_GFX900:
+ case GK_GFX9_GENERIC:
+ Features["gfx9-insts"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX810:
+ case GK_GFX805:
+ case GK_GFX803:
+ case GK_GFX802:
+ case GK_GFX801:
+ Features["gfx8-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["s-memrealtime"] = true;
+ Features["ci-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["wavefrontsize64"] = true;
+ break;
+ case GK_GFX705:
+ case GK_GFX704:
+ case GK_GFX703:
+ case GK_GFX702:
+ case GK_GFX701:
+ case GK_GFX700:
+ Features["ci-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX602:
+ case GK_GFX601:
+ case GK_GFX600:
+ Features["image-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["wavefrontsize64"] = true;
+ break;
+ case GK_NONE:
+ break;
+ default:
+ llvm_unreachable("Unhandled GPU!");
+ }
+}
+
+std::pair<FeatureError, StringRef>
+AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+ StringMap<bool> &Features) {
// XXX - What does the member GPU mean if device name string passed here?
if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
// AMDGCN SPIRV must support the union of all AMDGCN features. This list
@@ -434,280 +746,9 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize32"] = true;
Features["wavefrontsize64"] = true;
} else if (T.isAMDGCN()) {
- AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
- switch (Kind) {
- case GK_GFX1250:
- Features["ci-insts"] = true;
- Features["dot7-insts"] = true;
- Features["dot8-insts"] = true;
- Features["dl-insts"] = true;
- Features["16-bit-insts"] = true;
- Features["dpp"] = true;
- Features["gfx8-insts"] = true;
- Features["gfx9-insts"] = true;
- Features["gfx10-insts"] = true;
- Features["gfx10-3-insts"] = true;
- Features["gfx11-insts"] = true;
- Features["gfx12-insts"] = true;
- Features["gfx1250-insts"] = true;
- Features["bitop3-insts"] = true;
- Features["prng-inst"] = true;
- Features["tanh-insts"] = true;
- Features["tensor-cvt-lut-insts"] = true;
- Features["transpose-load-f4f6-insts"] = true;
- Features["bf16-trans-insts"] = true;
- Features["bf16-cvt-insts"] = true;
- Features["fp8-conversion-insts"] = true;
- Features["fp8e5m3-insts"] = true;
- Features["permlane16-swap"] = true;
- Features["ashr-pk-insts"] = true;
- Features["atomic-buffer-pk-add-bf16-inst"] = true;
- Features["vmem-pref-insts"] = true;
- Features["atomic-fadd-rtn-insts"] = true;
- Features["atomic-buffer-global-pk-add-f16-insts"] = true;
- Features["atomic-flat-pk-add-16-insts"] = true;
- Features["atomic-global-pk-add-bf16-inst"] = true;
- Features["atomic-ds-pk-add-16-insts"] = true;
- Features["setprio-inc-wg-inst"] = true;
- Features["atomic-fmin-fmax-global-f32"] = true;
- Features["atomic-fmin-fmax-global-f64"] = true;
- Features["wavefrontsize32"] = true;
- break;
- case GK_GFX1201:
- case GK_GFX1200:
- case GK_GFX12_GENERIC:
- Features["ci-insts"] = true;
- Features["dot7-insts"] = true;
- Features["dot8-insts"] = true;
- Features["dot9-insts"] = true;
- Features["dot10-insts"] = true;
- Features["dot11-insts"] = true;
- Features["dot12-insts"] = true;
- Features["dl-insts"] = true;
- Features["atomic-ds-pk-add-16-insts"] = true;
- Features["atomic-flat-pk-add-16-insts"] = true;
- Features["atomic-buffer-global-pk-add-f16-insts"] = true;
- Features["atomic-buffer-pk-add-bf16-inst"] = true;
- Features["atomic-global-pk-add-bf16-inst"] = true;
- Features["16-bit-insts"] = true;
- Features["dpp"] = true;
- Features["gfx8-insts"] = true;
- Features["gfx9-insts"] = true;
- Features["gfx10-insts"] = true;
- Features["gfx10-3-insts"] = true;
- Features["gfx11-insts"] = true;
- Features["gfx12-insts"] = true;
- Features["atomic-fadd-rtn-insts"] = true;
- Features["image-insts"] = true;
- Features["fp8-conversion-insts"] = true;
- Features["atomic-fmin-fmax-global-f32"] = true;
- break;
- case GK_GFX1153:
- case GK_GFX1152:
- case GK_GFX1151:
- case GK_GFX1150:
- case GK_GFX1103:
- case GK_GFX1102:
- case GK_GFX1101:
- case GK_GFX1100:
- case GK_GFX11_GENERIC:
- Features["ci-insts"] = true;
- Features["dot5-insts"] = true;
- Features["dot7-insts"] = true;
- Features["dot8-insts"] = true;
- Features["dot9-insts"] = true;
- Features["dot10-insts"] = true;
- Features["dot12-insts"] = true;
- Features["dl-insts"] = true;
- Features["16-bit-insts"] = true;
- Features["dpp"] = true;
- Features["gfx8-insts"] = true;
- Features["gfx9-insts"] = true;
- Features["gfx10-insts"] = true;
- Features["gfx10-3-insts"] = true;
- Features["gfx11-insts"] = true;
- Features["atomic-fadd-rtn-insts"] = true;
- Features["image-insts"] = true;
- Features["gws"] = true;
- Features["atomic-fmin-fmax-global-f32"] = true;
- break;
- case GK_GFX1036:
- case GK_GFX1035:
- case GK_GFX1034:
- case GK_GFX1033:
- case GK_GFX1032:
- case GK_GFX1031:
- case GK_GFX1030:
- case GK_GFX10_3_GENERIC:
- Features["ci-insts"] = true;
- Features["dot1-insts"] = true;
- Features["dot2-insts"] = true;
- Features["dot5-insts"] = true;
- Features["dot6-insts"] = true;
- Features["dot7-insts"] = true;
- Features["dot10-insts"] = true;
- Features["dl-insts"] = true;
- Features["16-bit-insts"] = true;
- Features["dpp"] = true;
- Features["gfx8-insts"] = true;
- Features["gfx9-insts"] = true;
- Features["gfx10-insts"] = true;
- Features["gfx10-3-insts"] = true;
- Features["image-insts"] = true;
- Features["s-memrealtime"] = true;
- Features["s-memtime-inst"] = true;
- Features["gws"] = true;
- Features["vmem-to-lds-load-insts"] = true;
- Features["atomic-fmin-fmax-global-f32"] = true;
- Features["atomic-fmin-fmax-global-f64"] = true;
- break;
- case GK_GFX1012:
- case GK_GFX101...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
24dfb4b
to
eb6c9b5
Compare
If a wavefrontsize32 or wavefrontsize64 is the only possible value insert it into feature list by default and use that value as an indication that another wavefront size is not legal.
Factoring in flang, just have a single fillAMDGPUFeatureMap function doing it all as an external interface and returing an error.
c7e8319
to
7ffe0f9
Compare
d991837
to
1e98c6b
Compare
…ture' into users/rampitec/08-25-_amdgpu_more_radical_feature_initialization_refactoring
…ture' into users/rampitec/08-25-_amdgpu_more_radical_feature_initialization_refactoring
Do you want me to squash it with parent? I do not mind either way, just split so it is easier to review. |
Squashing seems to be cleaner. |
Graphite seem to be really dumb, I have the squashed commit but there is no way to publish it:
|
I will just commit both, because these dances around our infrastructure are simply ridiculous. |
…ature_initialization_refactoring
Factoring in flang, just have a single fillAMDGPUFeatureMap
function doing it all as an external interface and returing
an error.