From 18e97c24a1eb51e90791466f2e87b0ac1c5bf52e Mon Sep 17 00:00:00 2001 From: RaggleDodo Date: Tue, 1 Jan 2019 21:45:14 -0800 Subject: [PATCH] fix bwd for mappedtensors dis-similar shaper and coorder --- ade/cmap.hpp | 34 +++-- ade/src/cmap.cpp | 42 +++++++ bwd/src/grader.cpp | 10 +- bwd/test/test_grader.cpp | 264 +++++++++++++++++++++++++++++++++++---- 4 files changed, 302 insertions(+), 48 deletions(-) diff --git a/ade/cmap.hpp b/ade/cmap.hpp index 6c0e750de..8d6c075b3 100644 --- a/ade/cmap.hpp +++ b/ade/cmap.hpp @@ -19,7 +19,7 @@ struct MappedTensor final logs::fatal("cannot map a null tensor"); } map_io_ = tensor_->shape().n_elems() > shape().n_elems(); - if (shaper == ade::identity || map_io_) + if (shaper == identity || map_io_) { coorder_ = shaper; } @@ -42,25 +42,7 @@ struct MappedTensor final } /// Return shape of tensor filtered through coordinate mapper - Shape shape (void) const - { - const Shape& shape = tensor_->shape(); - CoordT out; - CoordT in; - std::copy(shape.begin(), shape.end(), in.begin()); - shaper_->forward(out.begin(), in.begin()); - std::vector slist(rank_cap); - std::transform(out.begin(), out.end(), slist.begin(), - [](CDimT cd) -> DimT - { - if (cd < 0) - { - cd = -cd - 1; - } - return std::round(cd); - }); - return Shape(slist); - } + Shape shape (void) const; TensptrT get_tensor (void) const { @@ -83,6 +65,18 @@ struct MappedTensor final return coorder_; } + /// Return MappedTesnor connecting this instance to lhs' + /// shaper and coorder info + MappedTensor connect (MappedTensor lhs) const; + + /// Return MappedTensor taking input tens and reverse of + /// this instance's shaper and coorder info + MappedTensor reverse (TensptrT tens) const + { + return MappedTensor(tens, CoordptrT(shaper_->reverse()), + !map_io_, coorder_); + } + private: /// Tensor reference TensptrT tensor_; diff --git a/ade/src/cmap.cpp b/ade/src/cmap.cpp index 5c6523ccd..54a9536d5 100644 --- a/ade/src/cmap.cpp +++ b/ade/src/cmap.cpp @@ -5,6 +5,48 @@ namespace ade { +static Shape calc_shape (CoordptrT shaper, const Shape& shape) +{ + CoordT out; + CoordT in; + std::copy(shape.begin(), shape.end(), in.begin()); + shaper->forward(out.begin(), in.begin()); + std::vector slist(rank_cap); + std::transform(out.begin(), out.end(), slist.begin(), + [](CDimT cd) -> DimT + { + if (cd < 0) + { + cd = -cd - 1; + } + return std::round(cd); + }); + return Shape(slist); +} + +Shape MappedTensor::shape (void) const +{ + return calc_shape(shaper_, tensor_->shape()); +} + +MappedTensor MappedTensor::connect (MappedTensor lhs) const +{ + CoordptrT outshaper(shaper_->connect(*lhs.get_shaper())); + Shape inshape = tensor_->shape(); + Shape outshape = calc_shape(outshaper, inshape); + bool outmap_io = inshape.n_elems() > outshape.n_elems(); + CoordptrT rhs_coorder = outmap_io == map_io_ ? coorder_ : + CoordptrT(coorder_->reverse()); + CoordptrT lhs_coorder = lhs.get_coorder(); + if (outmap_io != lhs.map_io()) + { + lhs_coorder = CoordptrT(lhs_coorder->reverse()); + } + CoordptrT outcoorder(outmap_io ? rhs_coorder->connect(*lhs_coorder) : + lhs_coorder->connect(*rhs_coorder)); + return MappedTensor(tensor_, outshaper, outmap_io, outcoorder); +} + MappedTensor identity_map (TensptrT tensor) { return MappedTensor(tensor, ade::identity); diff --git a/bwd/src/grader.cpp b/bwd/src/grader.cpp index 525a72a6c..9b63b6faa 100644 --- a/bwd/src/grader.cpp +++ b/bwd/src/grader.cpp @@ -62,7 +62,7 @@ void Grader::visit (ade::iFunctor* func) { ade::TensT args; ade::MappedTensor& child = children[i]; - ade::CoordptrT bwd_shaper(child.get_shaper()->reverse()); + ade::MappedTensor mapped_bwd = child.reverse(bwd); for (size_t j = 0; j < nchildren; ++j) { ade::MappedTensor& kid = children[j]; @@ -73,12 +73,10 @@ void Grader::visit (ade::iFunctor* func) } else { - ade::CoordptrT shaper(kid.get_shaper()->connect(*bwd_shaper)); // reverse children[j] to child's shape/coord space args.push_back(ade::TensptrT( ade::Functor::get(rules_->sum_opcode(), { - ade::MappedTensor(tens, shaper), - }))); + kid.connect(mapped_bwd)}))); } } // pass down forward-gradient pair @@ -88,9 +86,7 @@ void Grader::visit (ade::iFunctor* func) ade::Functor::get(rules_->prod_opcode(), { ade::identity_map(grad), ade::identity_map(ade::TensptrT( - ade::Functor::get(rules_->sum_opcode(), { - ade::MappedTensor(bwd, bwd_shaper), - }) + ade::Functor::get(rules_->sum_opcode(), {mapped_bwd}) )), }))); } diff --git a/bwd/test/test_grader.cpp b/bwd/test/test_grader.cpp index 8b44e11c4..1f82fecfe 100644 --- a/bwd/test/test_grader.cpp +++ b/bwd/test/test_grader.cpp @@ -860,13 +860,13 @@ TEST(GRADER, ReduceExtend) TEST(GRADER, PermuteReduce) { - std::vector slist = {3, 2, 1, 4}; + std::vector slist = {4, 2, 1, 3}; std::vector slist1 = {2, 3, 4, 5}; ade::TensptrT outside(new MockTensor(ade::Shape({7}))); ade::TensptrT leaf(new MockTensor(ade::Shape(slist))); ade::TensptrT leaf1(new MockTensor(ade::Shape(slist1))); - auto left = ade::permute_map(leaf, {1, 0, 3}); + auto left = ade::permute_map(leaf, {1, 3, 0}); auto right = ade::reduce_map(leaf1, 3, {5}); ade::TensptrT fwd( ade::Functor::get(mock_rules->sum_opcode(), {left, right})); @@ -901,21 +901,21 @@ TEST(GRADER, PermuteReduce) ostr << "([2\\3\\4\\1\\1\\1\\1\\1])\n"; zstr << "([7\\1\\1\\1\\1\\1\\1\\1])\n"; lstr << - "(*[3\\2\\1\\4\\1\\1\\1\\1])\n" << - " `--(*[3\\2\\1\\4\\1\\1\\1\\1])\n" << // chain rule (derivative of SUM is PROD) - " | `--([3\\2\\1\\4\\1\\1\\1\\1])\n" << - " | `--(+[3\\2\\1\\4\\1\\1\\1\\1])\n" << + "(*[4\\2\\1\\3\\1\\1\\1\\1])\n" << + " `--(*[4\\2\\1\\3\\1\\1\\1\\1])\n" << // chain rule (derivative of SUM is PROD) + " | `--([4\\2\\1\\3\\1\\1\\1\\1])\n" << + " | `--(+[4\\2\\1\\3\\1\\1\\1\\1])\n" << " | `--([2\\3\\4\\5\\1\\1\\1\\1])\n" << - " `--(+[3\\2\\1\\4\\1\\1\\1\\1])\n" << - " `--([2\\3\\4\\1\\1\\1\\1\\1])\n"; // derivative of leaf wrt leaf + " `--(+[4\\2\\1\\3\\1\\1\\1\\1])\n" << // derivative of leaf wrt leaf + " `--([2\\3\\4\\1\\1\\1\\1\\1])\n"; rstr << "(*[2\\3\\4\\5\\1\\1\\1\\1])\n" << " `--(*[2\\3\\4\\5\\1\\1\\1\\1])\n" << // chain rule " | `--(+[2\\3\\4\\5\\1\\1\\1\\1])\n" << - " | | `--([3\\2\\1\\4\\1\\1\\1\\1])\n" << + " | | `--([4\\2\\1\\3\\1\\1\\1\\1])\n" << " | `--([2\\3\\4\\5\\1\\1\\1\\1])\n" << - " `--(+[2\\3\\4\\5\\1\\1\\1\\1])\n" << - " `--([2\\3\\4\\1\\1\\1\\1\\1])\n"; // derivative of leaf wrt leaf + " `--(+[2\\3\\4\\5\\1\\1\\1\\1])\n" << // derivative of leaf wrt leaf + " `--([2\\3\\4\\1\\1\\1\\1\\1])\n"; TREE_EQ(ostr, g1); TREE_EQ(zstr, g0); @@ -939,24 +939,23 @@ TEST(GRADER, PermuteReduce) ASSERT_NE(nullptr, child); auto gchildren = child->get_children(); - EXPECT_FALSE(gchildren[0].map_io()); + EXPECT_TRUE(gchildren[0].map_io()); auto target_shaper = gchildren[0].get_shaper(); auto target_mapper = gchildren[0].get_coorder(); { - std::vector expectshape{3,2,1,4,1,1,1,1}; - std::vector expectcoord{2,3,4,1,1,1,1,1}; + std::vector expectout{4,2,1,3,1,1,1,1}; ade::CDimT out[ade::rank_cap]; // shaper is always input to output ade::CDimT cin[ade::rank_cap] = {2,3,4,1,1,1,1,1}; target_shaper->forward(out, cin); - ARR_EQ(expectshape, out, out + ade::rank_cap); + ARR_EQ(expectout, out, out + ade::rank_cap); // simulate out to input - ade::CDimT cin2[ade::rank_cap] = {3,2,1,4,1,1,1,1}; + ade::CDimT cin2[ade::rank_cap] = {2,3,4,1,1,1,1,1}; target_mapper->forward(out, cin2); - ARR_EQ(expectcoord, out, out + ade::rank_cap); + ARR_EQ(expectout, out, out + ade::rank_cap); } COORD_EQ(leftrev, target_shaper); - COORD_EQ(leftmapper, target_mapper); + COORD_EQ(leftrev, target_mapper); } { @@ -982,7 +981,7 @@ TEST(GRADER, PermuteReduce) auto target_mapper = ggchildren[0].get_coorder(); ade::CoordptrT right2left(rightmapper->connect(*leftrev)); { - std::vector expectout{3,2,1,4,1,1,1,1}; + std::vector expectout{4,2,1,3,1,1,1,1}; ade::CDimT out[ade::rank_cap]; // shaper is always input to output ade::CDimT cin[ade::rank_cap] = {2,3,4,5,1,1,1,1}; @@ -1058,10 +1057,10 @@ TEST(GRADER, PermuteReduce) ade::CoordptrT left2right(leftmapper->connect(*rightrev)); { std::vector expectshape{2,3,4,5,1,1,1,1}; - std::vector expectcoord{3,2,1,4,1,1,1,1}; + std::vector expectcoord{4,2,1,3,1,1,1,1}; ade::CDimT out[ade::rank_cap]; // shaper is always input to output - ade::CDimT cin[ade::rank_cap] = {3,2,1,4,1,1,1,1}; + ade::CDimT cin[ade::rank_cap] = {4,2,1,3,1,1,1,1}; target_shaper->forward(out, cin); ARR_EQ(expectshape, out, out + ade::rank_cap); // simulate out to input @@ -1078,4 +1077,227 @@ TEST(GRADER, PermuteReduce) } +TEST(GRADER, DiffShaperCoorder) +{ + std::vector slist = {4, 4, 3, 3}; + std::vector slist1 = {3, 3, 4, 4}; + ade::TensptrT outside(new MockTensor(ade::Shape({7}))); + ade::TensptrT leaf(new MockTensor(ade::Shape(slist))); + ade::TensptrT leaf1(new MockTensor(ade::Shape(slist1))); + + ade::CoordptrT leftshaper = ade::permute({2, 1, 3, 0}); + ade::CoordptrT rightshaper = ade::permute({0, 3, 1, 2}); + ade::CoordptrT leftmapper = ade::permute({1, 3, 0, 2}); + ade::CoordptrT rightmapper = ade::permute({1, 2, 0, 3}); + ade::MappedTensor left(leaf, leftshaper, false, leftmapper); + ade::MappedTensor right(leaf1, rightshaper, true, rightmapper); + ade::TensptrT fwd( + ade::Functor::get(mock_rules->sum_opcode(), {left, right})); + + ade::CoordptrT leftshaperev(leftshaper->reverse()); + ade::CoordptrT rightshaperev(rightshaper->reverse()); + ade::CoordptrT leftcoordrev(leftmapper->reverse()); + ade::CoordptrT rightcoordrev(rightmapper->reverse()); + + ade::TensptrT g1(derive(fwd, fwd.get())); + ade::TensptrT g0(derive(fwd, outside.get())); + ade::TensptrT gl(derive(fwd, leaf.get())); + ade::TensptrT gr(derive(fwd, leaf1.get())); + + auto mock1 = dynamic_cast(g1.get()); + auto mock0 = dynamic_cast(g0.get()); + + ASSERT_NE(nullptr, mock1); + ASSERT_NE(nullptr, mock0); + + EXPECT_EQ(1, mock1->val_); + EXPECT_EQ(0, mock0->val_); + + std::stringstream ostr; + std::stringstream zstr; + std::stringstream lstr; + std::stringstream rstr; + + ostr << "([3\\4\\3\\4\\1\\1\\1\\1])\n"; + zstr << "([7\\1\\1\\1\\1\\1\\1\\1])\n"; + lstr << + "(*[4\\4\\3\\3\\1\\1\\1\\1])\n" << + " `--(*[4\\4\\3\\3\\1\\1\\1\\1])\n" << // chain rule (derivative of SUM is PROD) + " | `--([4\\4\\3\\3\\1\\1\\1\\1])\n" << + " | `--(+[4\\4\\3\\3\\1\\1\\1\\1])\n" << + " | `--([3\\3\\4\\4\\1\\1\\1\\1])\n" << + " `--(+[4\\4\\3\\3\\1\\1\\1\\1])\n" << // derivative of leaf wrt leaf + " `--([3\\4\\3\\4\\1\\1\\1\\1])\n"; + rstr << + "(*[3\\3\\4\\4\\1\\1\\1\\1])\n" << + " `--(*[3\\3\\4\\4\\1\\1\\1\\1])\n" << // chain rule + " | `--(+[3\\3\\4\\4\\1\\1\\1\\1])\n" << + " | | `--([4\\4\\3\\3\\1\\1\\1\\1])\n" << + " | `--([3\\3\\4\\4\\1\\1\\1\\1])\n" << + " `--(+[3\\3\\4\\4\\1\\1\\1\\1])\n" << // derivative of leaf wrt leaf + " `--([3\\4\\3\\4\\1\\1\\1\\1])\n"; + + TREE_EQ(ostr, g1); + TREE_EQ(zstr, g0); + TREE_EQ(lstr, gl); + TREE_EQ(rstr, gr); + + { + auto fl = dynamic_cast(gl.get()); + ASSERT_NE(nullptr, fl); + + auto children = fl->get_children(); + EXPECT_EQ(2, children.size()); + EXPECT_EQ(ade::identity, children[0].get_shaper()); + EXPECT_EQ(ade::identity, children[0].get_coorder()); + EXPECT_EQ(ade::identity, children[1].get_shaper()); + EXPECT_EQ(ade::identity, children[1].get_coorder()); + + { + auto child = dynamic_cast( + children[1].get_tensor().get()); + ASSERT_NE(nullptr, child); + + auto gchildren = child->get_children(); + EXPECT_TRUE(gchildren[0].map_io()); + auto target_shaper = gchildren[0].get_shaper(); + auto target_mapper = gchildren[0].get_coorder(); + { + std::vector expectshape{4,4,3,3,1,1,1,1}; + std::vector expectcoord{3,5,2,4,1,1,1,1}; + ade::CDimT out[ade::rank_cap]; + // shaper is always input to output + ade::CDimT cin[ade::rank_cap] = {3,4,3,4,1,1,1,1}; + target_shaper->forward(out, cin); + ARR_EQ(expectshape, out, out + ade::rank_cap); + // simulate out to input + ade::CDimT cin2[ade::rank_cap] = {2,3,4,5,1,1,1,1}; + target_mapper->forward(out, cin2); + ARR_EQ(expectcoord, out, out + ade::rank_cap); + } + COORD_EQ(leftshaperev, target_shaper); + COORD_EQ(leftmapper, target_mapper); + } + + { + auto child = dynamic_cast( + children[0].get_tensor().get()); + ASSERT_NE(nullptr, child); + + auto gchildren = child->get_children(); + EXPECT_EQ(2, gchildren.size()); + EXPECT_EQ(ade::identity, gchildren[0].get_shaper()); + EXPECT_EQ(ade::identity, gchildren[0].get_coorder()); + EXPECT_EQ(ade::identity, gchildren[1].get_shaper()); + EXPECT_EQ(ade::identity, gchildren[1].get_coorder()); + + { + auto gchild = dynamic_cast(gchildren[1].get_tensor().get()); + ASSERT_NE(nullptr, gchild); + + auto ggchildren = gchild->get_children(); + EXPECT_EQ(1, ggchildren.size()); + EXPECT_FALSE(ggchildren[0].map_io()); + auto target_shaper = ggchildren[0].get_shaper(); + auto target_mapper = ggchildren[0].get_coorder(); + { + std::vector expectshape{4,4,3,3,1,1,1,1}; + std::vector expectcoord{5,4,2,3,1,1,1,1}; + ade::CDimT out[ade::rank_cap]; + // shaper is always input to output + ade::CDimT cin[ade::rank_cap] = {3,3,4,4,1,1,1,1}; + target_shaper->forward(out, cin); + ARR_EQ(expectshape, out, out + ade::rank_cap); + // simulate out to input + ade::CDimT cin2[ade::rank_cap] = {2,3,4,5,1,1,1,1}; + target_mapper->forward(out, cin2); + ARR_EQ(expectcoord, out, out + ade::rank_cap); + } + ade::CoordptrT exshaper(rightshaper->connect(*leftshaperev)); + COORD_EQ(exshaper, target_shaper); + ade::CoordptrT excoorder(leftcoordrev->connect(*rightcoordrev)); + COORD_EQ(excoorder, target_mapper); + } + } + } + + { + auto fr = dynamic_cast(gr.get()); + ASSERT_NE(nullptr, fr); + + auto children = fr->get_children(); + EXPECT_EQ(2, children.size()); + EXPECT_EQ(ade::identity, children[0].get_shaper()); + EXPECT_EQ(ade::identity, children[0].get_coorder()); + EXPECT_EQ(ade::identity, children[1].get_shaper()); + EXPECT_EQ(ade::identity, children[1].get_coorder()); + + { + auto child = dynamic_cast( + children[1].get_tensor().get()); + ASSERT_NE(nullptr, child); + + auto gchildren = child->get_children(); + EXPECT_FALSE(gchildren[0].map_io()); + auto target_shaper = gchildren[0].get_shaper(); + auto target_mapper = gchildren[0].get_coorder(); + { + std::vector expectshape{3,3,4,4,1,1,1,1}; + std::vector expectcoord{3,4,2,5,1,1,1,1}; + ade::CDimT out[ade::rank_cap]; + ade::CDimT cin[ade::rank_cap] = {3,4,3,4,1,1,1,1}; + target_shaper->forward(out, cin); + ARR_EQ(expectshape, out, out + ade::rank_cap); + // simulate input to output + ade::CDimT cin2[ade::rank_cap] = {2,3,4,5,1,1,1,1}; + target_mapper->forward(out, cin2); + ARR_EQ(expectcoord, out, out + ade::rank_cap); + } + COORD_EQ(rightshaperev, target_shaper); + COORD_EQ(rightmapper, target_mapper); + } + + { + auto child = dynamic_cast(children[0].get_tensor().get()); + ASSERT_NE(nullptr, child); + + auto gchildren = child->get_children(); + EXPECT_EQ(2, gchildren.size()); + EXPECT_EQ(ade::identity, gchildren[0].get_shaper()); + EXPECT_EQ(ade::identity, gchildren[0].get_coorder()); + EXPECT_EQ(ade::identity, gchildren[1].get_shaper()); + EXPECT_EQ(ade::identity, gchildren[1].get_coorder()); + + { + auto gchild = dynamic_cast(gchildren[0].get_tensor().get()); + ASSERT_NE(nullptr, gchild); + + auto ggchildren = gchild->get_children(); + EXPECT_EQ(1, ggchildren.size()); + EXPECT_FALSE(ggchildren[0].map_io()); + auto target_shaper = ggchildren[0].get_shaper(); + auto target_mapper = ggchildren[0].get_coorder(); + { + std::vector expectshape{3,3,4,4,1,1,1,1}; + std::vector expectcoord{4,5,3,2,1,1,1,1}; + ade::CDimT out[ade::rank_cap]; + // shaper is always input to output + ade::CDimT cin[ade::rank_cap] = {4,4,3,3,1,1,1,1}; + target_shaper->forward(out, cin); + ARR_EQ(expectshape, out, out + ade::rank_cap); + // simulate out to input + ade::CDimT cin2[ade::rank_cap] = {2,3,4,5,1,1,1,1}; + target_mapper->forward(out, cin2); + ARR_EQ(expectcoord, out, out + ade::rank_cap); + } + ade::CoordptrT exshaper(leftshaper->connect(*rightshaperev)); + COORD_EQ(exshaper, target_shaper); + ade::CoordptrT excoorder(rightmapper->connect(*leftmapper)); + COORD_EQ(excoorder, target_mapper); + } + } + } +} + + #endif // DISABLE_GRADER_TEST