From d55784cd8744ecc0506d656ad81f8bf051bf5873 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Thu, 30 Oct 2025 00:21:49 -0700 Subject: [PATCH] [HLSL][Matrix] Change MatrixSubscriptExpr flattened index to row major fixes #165663 After experimenting with clang vs DXC via the offload test suite. Noticed clang was doing column major init: ``` 1,3,5 2,4,6 [row][col] --> [0][0] = 1 [0][1] = 3 [0][2] = 5 [1][0] = 2 [1][1] = 4 [1][2] = 6 ``` While DXC was doing row major indexing ``` 1,2,3, 4,5,6 [row][col] --> [0][0] = 1 [0][1] = 2 [0][2] = 3 [1][0] = 4 [1][1] = 5 [1][2] = 6 ``` Reading the docs See https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-per-component-math#matrix-ordering ``` Row-major and column-major packing order has no influence on the packing order of constructors (which always follows row-major ordering). ``` So changed up the flatten index and cofirmed it did not change for c\c++ via testing. --- clang/lib/CodeGen/CGExpr.cpp | 17 ++++++--- clang/lib/CodeGen/CGExprScalar.cpp | 12 +++++-- clang/test/CodeGen/matrix-type-indexing.c | 35 +++++++++++++++++++ .../test/CodeGenCXX/matrix-type-indexing.cpp | 35 +++++++++++++++++++ .../BasicFeatures/matrix-type-indexing.hlsl | 30 ++++++++++++++++ llvm/include/llvm/IR/MatrixBuilder.h | 17 +++++++-- 6 files changed, 137 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/matrix-type-indexing.c create mode 100644 clang/test/CodeGenCXX/matrix-type-indexing.cpp create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 01f2161f27555..208eb98972b23 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4824,11 +4824,18 @@ LValue CodeGenFunction::EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E) { llvm::Value *RowIdx = EmitMatrixIndexExpr(E->getRowIdx()); llvm::Value *ColIdx = EmitMatrixIndexExpr(E->getColumnIdx()); - llvm::Value *NumRows = Builder.getIntN( - RowIdx->getType()->getScalarSizeInBits(), - E->getBase()->getType()->castAs()->getNumRows()); - llvm::Value *FinalIdx = - Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx); + llvm::Value *FinalIdx; + if (getLangOpts().HLSL) { + llvm::Value *NumCols = Builder.getIntN( + RowIdx->getType()->getScalarSizeInBits(), + E->getBase()->getType()->castAs()->getNumColumns()); + FinalIdx = Builder.CreateAdd(Builder.CreateMul(RowIdx, NumCols), ColIdx); + } else { + llvm::Value *NumRows = Builder.getIntN( + RowIdx->getType()->getScalarSizeInBits(), + E->getBase()->getType()->castAs()->getNumRows()); + FinalIdx = Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx); + } return LValue::MakeMatrixElt( MaybeConvertMatrixAddress(Base.getAddress(), *this), FinalIdx, E->getBase()->getType(), Base.getBaseInfo(), TBAAAccessInfo()); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 714192db1b15c..5220621fe7a70 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2118,9 +2118,17 @@ Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) { Value *ColumnIdx = CGF.EmitMatrixIndexExpr(E->getColumnIdx()); const auto *MatrixTy = E->getBase()->getType()->castAs(); - unsigned NumRows = MatrixTy->getNumRows(); + llvm::MatrixBuilder MB(Builder); - Value *Idx = MB.CreateIndex(RowIdx, ColumnIdx, NumRows); + Value *Idx; + if (CGF.getLangOpts().HLSL) { + unsigned NumCols = MatrixTy->getNumColumns(); + Idx = MB.createRowMajorIndex(RowIdx, ColumnIdx, NumCols); + } else { + unsigned NumRows = MatrixTy->getNumRows(); + Idx = MB.createColumnMajorIndex(RowIdx, ColumnIdx, NumRows); + } + if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0) MB.CreateIndexAssumption(Idx, MatrixTy->getNumElementsFlattened()); diff --git a/clang/test/CodeGen/matrix-type-indexing.c b/clang/test/CodeGen/matrix-type-indexing.c new file mode 100644 index 0000000000000..19b9f02f5b0a4 --- /dev/null +++ b/clang/test/CodeGen/matrix-type-indexing.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + + typedef float fx2x3_t __attribute__((matrix_type(2, 3))); + float Out[6]; + +void one_index_test(int index, fx2x3_t M) { + // CHECK-LABEL: one_index_test + // CHECK: %row = alloca i32, align 4 + // CHECK: %col = alloca i32, align 4 + // CHECK: [[row_load:%.*]] = load i32, ptr %row, align 4 + // CHECK-NEXT: [[row_load_zext:%.*]] = zext i32 [[row_load]] to i64 + // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col, align 4 + // CHECK-NEXT: [[col_load_zext:%.*]] = zext i32 [[col_load]] to i64 + // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_zext]], 2 + // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_zext]] + // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4 + // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]] + const unsigned int COLS = 3; + unsigned int row = index / COLS; + unsigned int col = index % COLS; + Out[index] = M[row][col]; +} + +float two_index_test(int row, int col, fx2x3_t M) { + // CHECK-LABEL: two_index_test + // CHECK: [[row_load:%.*]] = load i32, ptr %row.addr, align 4 + // CHECK-NEXT: [[row_load_sext:%.*]] = sext i32 [[row_load]] to i64 + // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col.addr, align 4 + // CHECK-NEXT: [[col_load_sext:%.*]] = sext i32 [[col_load]] to i64 + // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_sext]], 2 + // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_sext]] + // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4 + // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]] + return M[row][col]; +} diff --git a/clang/test/CodeGenCXX/matrix-type-indexing.cpp b/clang/test/CodeGenCXX/matrix-type-indexing.cpp new file mode 100644 index 0000000000000..b0ed5dc1d717e --- /dev/null +++ b/clang/test/CodeGenCXX/matrix-type-indexing.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s + + typedef float fx2x3_t __attribute__((matrix_type(2, 3))); + float Out[6]; + +void one_index_test(int index, fx2x3_t M) { + // CHECK-LABEL: one_index_test + // CHECK: %row = alloca i32, align 4 + // CHECK: %col = alloca i32, align 4 + // CHECK: [[row_load:%.*]] = load i32, ptr %row, align 4 + // CHECK-NEXT: [[row_load_zext:%.*]] = zext i32 [[row_load]] to i64 + // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col, align 4 + // CHECK-NEXT: [[col_load_zext:%.*]] = zext i32 [[col_load]] to i64 + // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_zext]], 2 + // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_zext]] + // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4 + // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]] + const unsigned int COLS = 3; + unsigned int row = index / COLS; + unsigned int col = index % COLS; + Out[index] = M[row][col]; +} + +float two_index_test(int row, int col, fx2x3_t M) { + // CHECK-LABEL: two_index_test + // CHECK: [[row_load:%.*]] = load i32, ptr %row.addr, align 4 + // CHECK-NEXT: [[row_load_sext:%.*]] = sext i32 [[row_load]] to i64 + // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col.addr, align 4 + // CHECK-NEXT: [[col_load_sext:%.*]] = sext i32 [[col_load]] to i64 + // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_sext]], 2 + // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_sext]] + // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4 + // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]] + return M[row][col]; +} diff --git a/clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl new file mode 100644 index 0000000000000..ff63b23311e77 --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// NOTE: This test is only to confirm we can do codgen with the matrix alias. + +RWBuffer Out : register(u1); + +void one_index_test(int index, half2x3 M) { + // CHECK-LABEL: one_index_test + // CHECK: %row = alloca i32, align 4 + // CHECK: %col = alloca i32, align 4 + // CHECK: [[row_load:%.*]] = load i32, ptr %row, align 4 + // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col, align 4 + // CHECK-NEXT: [[row_offset:%.*]] = mul i32 [[row_load]], 3 + // CHECK-NEXT: [[row_major_index:%.*]] = add i32 [[row_offset]], [[col_load]] + // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x half>, ptr %M.addr, align 2 + // CHECK-NEXT: %matrixext = extractelement <6 x half> [[matrix_as_vec]], i32 [[row_major_index]] + const uint COLS = 3; + uint row = index / COLS; + uint col = index % COLS; + Out[index] = M[row][col]; +} + +half two_index_test(int row, int col, half2x3 M) { + // CHECK-LABEL: two_index_test + // CHECK: [[row_offset:%.*]] = mul i32 [[row_load:%.*]], 3 + // CHECK-NEXT: [[row_major_index:%.*]] = add i32 [[row_offset]], [[col_load:%.*]] + // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x half>, ptr %M.addr, align 2 + // CHECK-NEXT: %matrixext = extractelement <6 x half> [[matrix_as_vec]], i32 [[row_major_index]] + return M[row][col]; +} diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h index 3a04ca87f2b55..f90cd92096103 100644 --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -241,8 +241,8 @@ class MatrixBuilder { /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from /// a matrix with \p NumRows embedded in a vector. - Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows, - Twine const &Name = "") { + Value *createColumnMajorIndex(Value *RowIdx, Value *ColumnIdx, + unsigned NumRows, Twine const &Name = "") { unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(), ColumnIdx->getType()->getScalarSizeInBits()); Type *IntTy = IntegerType::get(RowIdx->getType()->getContext(), MaxWidth); @@ -251,6 +251,19 @@ class MatrixBuilder { Value *NumRowsV = B.getIntN(MaxWidth, NumRows); return B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx); } + + /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from + /// a matrix with \p NumRows embedded in a vector. + Value *createRowMajorIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumCols, + Twine const &Name = "") { + unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(), + ColumnIdx->getType()->getScalarSizeInBits()); + Type *IntTy = IntegerType::get(RowIdx->getType()->getContext(), MaxWidth); + RowIdx = B.CreateZExt(RowIdx, IntTy); + ColumnIdx = B.CreateZExt(ColumnIdx, IntTy); + Value *NumColsV = B.getIntN(MaxWidth, NumCols); + return B.CreateAdd(B.CreateMul(RowIdx, NumColsV), ColumnIdx); + } }; } // end namespace llvm