Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/taco/lower/lowerer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,11 @@ class LowererImpl : public util::Uncopyable {
/// tensors, instead of the full tensor.
ir::Expr searchForStartOfWindowPosition(Iterator iterator, ir::Expr start, ir::Expr end);

/// Expression that returns the end of a window to iterate over
/// in a compressed iterator. It is used when operating over windows of
/// tensors, instead of the full tensor.
ir::Expr searchForEndOfWindowPosition(Iterator iterator, ir::Expr start, ir::Expr end);

/// Statement that guards against going out of bounds of the window that
/// the input iterator was configured with.
ir::Stmt upperBoundGuardForWindowPosition(Iterator iterator, ir::Expr access);
Expand Down
29 changes: 27 additions & 2 deletions src/lower/lowerer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,12 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator,
// variable from the windowed space.
if (iterator.isWindowed()) {
coordinateArray = this->projectWindowedPositionToCanonicalSpace(iterator, coordinateArray);
boundsGuard = this->upperBoundGuardForWindowPosition(iterator, coordinate);
// If this forall is being parallelized via CPU threads (OpenMP), then we can't
// emit a `break` statement, since OpenMP doesn't support breaking out of a
// parallel loop. Instead, we'll bound the top of the loop and omit the check.
if (forall.getParallelUnit() != ParallelUnit::CPUThread) {
boundsGuard = this->upperBoundGuardForWindowPosition(iterator, coordinate);
}
}
declareCoordinate = VarDecl::make(coordinate, coordinateArray);
}
Expand Down Expand Up @@ -1060,7 +1065,14 @@ Stmt LowererImpl::lowerForallPosition(Forall forall, Iterator iterator,
// If we have a window on this iterator, then search for the start of
// the window rather than starting at the beginning of the level.
if (iterator.isWindowed()) {
startBound = this->searchForStartOfWindowPosition(iterator, startBound, endBound);
auto startBoundCopy = startBound;
startBound = this->searchForStartOfWindowPosition(iterator, startBound, endBound);
// As discussed above, if this position loop is parallelized over CPU
// threads (OpenMP), then we need to have an explicit upper bound to
// the for loop, instead of breaking out of the loop in the middle.
if (forall.getParallelUnit() == ParallelUnit::CPUThread) {
endBound = this->searchForEndOfWindowPosition(iterator, startBoundCopy, endBound);
}
}
} else {
taco_iassert(iterator.isOrdered() && iterator.getParent().isOrdered());
Expand Down Expand Up @@ -2795,6 +2807,19 @@ Expr LowererImpl::searchForStartOfWindowPosition(Iterator iterator, ir::Expr sta
return Call::make("taco_binarySearchAfter", args, Datatype::UInt64);
}

Expr LowererImpl::searchForEndOfWindowPosition(Iterator iterator, ir::Expr start, ir::Expr end) {
taco_iassert(iterator.isWindowed());
vector<Expr> args = {
// Search over the `crd` array of the level,
iterator.getMode().getModePack().getArray(1),
// between the start and end position,
start, end,
// for the end of the window.
iterator.getWindowUpperBound(),
};
return Call::make("taco_binarySearchAfter", args, Datatype::UInt64);
}

Stmt LowererImpl::upperBoundGuardForWindowPosition(Iterator iterator, ir::Expr access) {
taco_iassert(iterator.isWindowed());
return ir::IfThenElse::make(
Expand Down
76 changes: 38 additions & 38 deletions test/tests-windowing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ using namespace taco;
// tensor with a mix of IndexVars and WindowedIndexVars.
TEST(windowing, mixIndexing) {
auto dim = 10;
Tensor<int> a("a", {dim, dim, dim, dim, dim}, {Dense, Dense, Dense, Dense, Dense});
Tensor<int> a("a", {dim, dim, dim, dim, dim}, Format{Dense, Dense, Dense, Dense, Dense});
IndexVar i, j, k, l, m;
auto w1 = a(i, j(1, 3), k, l(4, 5), m(6, 7));
auto w2 = a(i(1, 3), j(2, 4), k, l, m(3, 5));
}

TEST(windowing, boundsChecks) {
Tensor<int> a("a", {5}, {Dense});
Tensor<int> a("a", {5}, Format{Dense});
IndexVar i("i");
ASSERT_THROWS_EXCEPTION_WITH_ERROR([&]() { a(i(-1, 4)); }, "slice lower bound");
ASSERT_THROWS_EXCEPTION_WITH_ERROR([&]() { a(i(0, 10)); }, "slice upper bound");
Expand All @@ -29,10 +29,10 @@ TEST(windowing, boundsChecks) {
// in the same expression.
TEST(windowing, sliceMultipleWays) {
auto dim = 10;
Tensor<int> a("a", {dim}, {Dense});
Tensor<int> b("b", {dim}, {Sparse});
Tensor<int> c("c", {dim}, {Dense});
Tensor<int> expected("expected", {dim}, {Dense});
Tensor<int> a("a", {dim}, Format{Dense});
Tensor<int> b("b", {dim}, Format{Sparse});
Tensor<int> c("c", {dim}, Format{Dense});
Tensor<int> expected("expected", {dim}, Format{Dense});
for (int i = 0; i < dim; i++) {
a.insert({i}, i);
b.insert({i}, i);
Expand All @@ -51,28 +51,28 @@ TEST(windowing, sliceMultipleWays) {
// of the input tensors and formats for each of the tensors in the computation.
struct basic : public TestWithParam<std::tuple<int, ModeFormat, ModeFormat, ModeFormat>> {};
TEST_P(basic, windowing){
Tensor<int> expectedAdd("expectedAdd", {2, 2}, {Dense, Dense});
Tensor<int> expectedAdd("expectedAdd", {2, 2}, Format{Dense, Dense});
expectedAdd.insert({0, 0}, 14);
expectedAdd.insert({0, 1}, 17);
expectedAdd.insert({1, 0}, 17);
expectedAdd.insert({1, 1}, 20);
expectedAdd.pack();
Tensor<int> expectedMul("expectedMul", {2, 2}, {Dense, Dense});
Tensor<int> expectedMul("expectedMul", {2, 2}, Format{Dense, Dense});
expectedMul.insert({0, 0}, 64);
expectedMul.insert({0, 1}, 135);
expectedMul.insert({1, 0}, 135);
expectedMul.insert({1, 1}, 240);
expectedMul.pack();
Tensor<int> d("d", {2, 2}, {Dense, Dense});
Tensor<int> d("d", {2, 2}, Format{Dense, Dense});

// The test is parameterized by a dimension, and formats for the different tensors.
auto dim = std::get<0>(GetParam());
auto x = std::get<1>(GetParam());
auto y = std::get<2>(GetParam());
auto z = std::get<3>(GetParam());
Tensor<int> a("a", {dim, dim}, {Dense, x});
Tensor<int> b("b", {dim, dim}, {Dense, y});
Tensor<int> c("c", {dim, dim}, {Dense, z});
Tensor<int> a("a", {dim, dim}, Format{Dense, x});
Tensor<int> b("b", {dim, dim}, Format{Dense, y});
Tensor<int> c("c", {dim, dim}, Format{Dense, z});
for (int i = 0; i < dim; i++) {
for (int j = 0; j < dim; j++) {
a.insert({i, j}, i + j);
Expand Down Expand Up @@ -111,17 +111,17 @@ INSTANTIATE_TEST_CASE_P(
struct slicedOutput : public TestWithParam<std::tuple<ModeFormat, ModeFormat>> {};
TEST_P(slicedOutput, windowing) {
auto dim = 10;
Tensor<int> expected("expected", {10, 10}, {Dense, Dense});
Tensor<int> expected("expected", {10, 10}, Format{Dense, Dense});
expected.insert({8, 8}, 12);
expected.insert({8, 9}, 14);
expected.insert({9, 8}, 14);
expected.insert({9, 9}, 16);
expected.pack();
auto x = std::get<0>(GetParam());
auto y = std::get<1>(GetParam());
Tensor<int> a("a", {dim, dim}, {Dense, x});
Tensor<int> b("b", {dim, dim}, {Dense, y});
Tensor<int> c("c", {dim, dim}, {Dense, Dense});
Tensor<int> a("a", {dim, dim}, Format{Dense, x});
Tensor<int> b("b", {dim, dim}, Format{Dense, y});
Tensor<int> c("c", {dim, dim}, Format{Dense, Dense});
for (int i = 0; i < dim; i++) {
for (int j = 0; j < dim; j++) {
a.insert({i, j}, i + j);
Expand Down Expand Up @@ -152,15 +152,15 @@ TEST_P(matrixMultiply, windowing) {
auto dim = 10;
auto windowDim = 4;

Tensor<int> a("a", {windowDim, windowDim}, {Dense, Dense});
Tensor<int> b("b", {windowDim, windowDim}, {Dense, Dense});
Tensor<int> c("c", {windowDim, windowDim}, {Dense, Dense});
Tensor<int> expected("expected", {windowDim, windowDim}, {Dense, Dense});
Tensor<int> a("a", {windowDim, windowDim}, Format{Dense, Dense});
Tensor<int> b("b", {windowDim, windowDim}, Format{Dense, Dense});
Tensor<int> c("c", {windowDim, windowDim}, Format{Dense, Dense});
Tensor<int> expected("expected", {windowDim, windowDim}, Format{Dense, Dense});

auto x = std::get<0>(GetParam());
auto y = std::get<1>(GetParam());
Tensor<int> aw("aw", {dim, dim}, {Dense, x});
Tensor<int> bw("bw", {dim, dim}, {Dense, y});
Tensor<int> aw("aw", {dim, dim}, Format{Dense, x});
Tensor<int> bw("bw", {dim, dim}, Format{Dense, y});
for (int i = 0; i < dim; i++) {
for (int j = 0; j < dim; j++) {
aw.insert({i, j}, i + j);
Expand Down Expand Up @@ -198,17 +198,17 @@ struct workspace : public TestWithParam<std::tuple<ModeFormat, ModeFormat>> {};
TEST_P(workspace, windowing) {
auto dim = 10;
size_t windowDim = 4;
Tensor<int> d("d", {static_cast<int>(windowDim)}, {Dense});
Tensor<int> expected("expected", {static_cast<int>(windowDim)}, {Dense});
Tensor<int> d("d", {static_cast<int>(windowDim)}, Format{Dense});
Tensor<int> expected("expected", {static_cast<int>(windowDim)}, Format{Dense});
expected.insert({0}, 8); expected.insert({1}, 11);
expected.insert({2}, 14); expected.insert({3}, 17);
expected.pack();

auto x = std::get<0>(GetParam());
auto y = std::get<1>(GetParam());
Tensor<int> a("a", {dim}, {x});
Tensor<int> b("b", {dim}, {y});
Tensor<int> c("c", {dim}, {Dense});
Tensor<int> a("a", {dim}, Format{x});
Tensor<int> b("b", {dim}, Format{y});
Tensor<int> c("c", {dim}, Format{Dense});
for (int i = 0; i < dim; i++) {
a.insert({i}, i);
b.insert({i}, i);
Expand Down Expand Up @@ -237,7 +237,7 @@ INSTANTIATE_TEST_CASE_P(
// transformations and different mode formats.
TEST(windowing, transformations) {
auto dim = 10;
Tensor<int> expected("expected", {2, 2}, {Dense, Dense});
Tensor<int> expected("expected", {2, 2}, Format{Dense, Dense});
expected.insert({0, 0}, 12);
expected.insert({0, 1}, 14);
expected.insert({1, 0}, 14);
Expand Down Expand Up @@ -300,20 +300,20 @@ TEST_P(assignment, windowing) {
IndexVar i, j;

// First assign a window of A to a window of B.
Tensor<int> B("B", {dim, dim}, {Dense, Dense});
Tensor<int> B("B", {dim, dim}, Format{Dense, Dense});
B(i(2, 4), j(3, 5)) = A(i(4, 6), j(5, 7));
B.evaluate();
Tensor<int> expected("expected", {dim, dim}, {Dense, Dense});
Tensor<int> expected("expected", {dim, dim}, Format{Dense, Dense});
expected.insert({2, 3}, 9); expected.insert({2, 4}, 10);
expected.insert({3, 3}, 10); expected.insert({3, 4}, 11);
expected.pack();
ASSERT_TRUE(equals(B, expected)) << B << std::endl << expected << std::endl;

// Assign a window of A to b.
B = Tensor<int>("B", {2, 2}, {Dense, Dense});
B = Tensor<int>("B", {2, 2}, Format{Dense, Dense});
B(i, j) = A(i(4, 6), j(5, 7));
B.evaluate();
expected = Tensor<int>("expected", {2, 2}, {Dense, Dense});
expected = Tensor<int>("expected", {2, 2}, Format{Dense, Dense});
expected.insert({0, 0}, 9); expected.insert({0, 1}, 10);
expected.insert({1, 0}, 10); expected.insert({1, 1}, 11);
expected.pack();
Expand All @@ -324,10 +324,10 @@ TEST_P(assignment, windowing) {
A.insert({0, 0}, 0); A.insert({0, 1}, 1);
A.insert({1, 0}, 1); A.insert({1, 1}, 2);
A.pack();
B = Tensor<int>("B", {dim, dim}, {Dense, Dense});
B = Tensor<int>("B", {dim, dim}, Format{Dense, Dense});
B(i(4, 6), j(5, 7)) = A(i, j);
B.evaluate();
expected = Tensor<int>("expected", {dim, dim}, {Dense, Dense});
expected = Tensor<int>("expected", {dim, dim}, Format{Dense, Dense});
expected.insert({4, 5}, 0); expected.insert({4, 6}, 1);
expected.insert({5, 5}, 1); expected.insert({5, 6}, 2);
expected.pack();
Expand All @@ -347,16 +347,16 @@ TEST_P(cuda, windowing) {
return;
}
auto dim = 10;
Tensor<int> expected("expected", {2, 2}, {Dense, Dense});
Tensor<int> expected("expected", {2, 2}, Format{Dense, Dense});
expected.insert({0, 0}, 12); expected.insert({0, 1}, 14);
expected.insert({1, 0}, 14); expected.insert({1, 1}, 16);
expected.pack();

auto x = std::get<0>(GetParam());
auto y = std::get<1>(GetParam());
Tensor<int> a("a", {dim, dim}, {Dense, x});
Tensor<int> b("b", {dim, dim}, {Dense, y});
Tensor<int> c("c", {2, 2}, {Dense, Dense});
Tensor<int> a("a", {dim, dim}, Format{Dense, x});
Tensor<int> b("b", {dim, dim}, Format{Dense, y});
Tensor<int> c("c", {2, 2}, Format{Dense, Dense});

for (int i = 0; i < dim; i++) {
for (int j = 0; j < dim; j++) {
Expand Down