diff --git a/.gitmodules b/.gitmodules index c865dca..0ae42d4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,7 +7,7 @@ [submodule "ext/bulk"] path = ext/bulk url = https://github.com/jwbuurlage/Bulk.git - branch = array_queue + branch = develop [submodule "catch"] path = ext/catch url = https://github.com/philsquared/Catch diff --git a/distributed/overlaps.hpp b/distributed/overlaps.hpp index 034394b..859bf92 100644 --- a/distributed/overlaps.hpp +++ b/distributed/overlaps.hpp @@ -15,6 +15,21 @@ struct overlap { distributed::shadow region; }; +struct pod_shadow { + std::array min_pt; + std::array max_pt; +}; + +auto to_pod(distributed::shadow sh) { + return pod_shadow{math::vec_to_array<2_D, int>(sh.min_pt), + math::vec_to_array<2_D, int>(sh.max_pt)}; +} + +auto from_pod(pod_shadow sh) { + return distributed::shadow{math::array_to_vec<2_D, int>(sh.min_pt), + math::array_to_vec<2_D, int>(sh.max_pt)}; +} + void communicate_overlaps(bulk::world& world, projections<3_D, T>& local_proj_stack, distributed::restricted_geometry& local_geometry, @@ -22,7 +37,7 @@ void communicate_overlaps(bulk::world& world, auto buffer = std::vector(); auto& proj_data = local_proj_stack.mutable_data(); - auto q = bulk::queue(world); + auto q = bulk::queue(world); for (auto ov : overlaps) { auto offset = local_proj_stack.offset(ov.projection); auto local_region = local_geometry.local_shadow(ov.projection); @@ -40,12 +55,13 @@ void communicate_overlaps(bulk::world& world, } } - q(ov.target).send_many(buffer, ov.projection, ov.region); + q(ov.target).send(buffer, ov.projection, to_pod(ov.region)); } world.sync(); - for (auto && [ xs, proj, sh ] : q) { + for (auto && [ xs, proj, shpod ] : q) { + auto sh = from_pod(shpod); auto local_region = local_geometry.local_shadow(proj); auto w = distributed::shadow{sh.min_pt - local_region.min_pt, sh.max_pt - local_region.min_pt}; @@ -66,14 +82,9 @@ std::vector compute_overlaps(bulk::world& world, const distributed::restricted_geometry& geometry) { - struct pod_shadow { - std::array min_pt; - std::array max_pt; - }; - auto overlaps = std::vector(); - auto s = world.processor_id(); + auto s = world.rank(); auto p = world.active_processors(); // STEP 1: communicate shadows for all projections @@ -82,8 +93,8 @@ compute_overlaps(bulk::world& world, bulk::coarray(world, p * geometry.projection_count()); for (int i = 0; i < geometry.projection_count(); ++i) { + auto sh = geometry.local_shadow(i); for (int t = 0; t < p; ++t) { - auto sh = geometry.local_shadow(i); shadows(t)[p * i + s] = {math::vec_to_array<2_D, int>(sh.min_pt), math::vec_to_array<2_D, int>(sh.max_pt)}; } diff --git a/distributed/simple_distributed.cpp b/distributed/simple_distributed.cpp index 91fc284..9190bff 100644 --- a/distributed/simple_distributed.cpp +++ b/distributed/simple_distributed.cpp @@ -1,3 +1,10 @@ +/* + * FIXME: + * - Can we group sends together? this should greatly reduce data size + * with a constant factor. + */ + +#include #include #include "bulk/backends/mpi/mpi.hpp" @@ -40,7 +47,6 @@ auto calculate_local_volume(bulk::rectangular_partitioning<3, G>& partitioning, struct shared_pixel { int target; - int projection; int local_line; int remote_line; }; @@ -48,36 +54,126 @@ struct shared_pixel { std::pair, std::vector> compute_contributions(bulk::world& world, const distributed::restricted_geometry& geometry) { - world.sync(); - (void)geometry; + auto s = world.rank(); + auto p = world.active_processors(); + auto& gg = geometry.global_geometry(); + + // block_size is number of local projections + auto block_size = ((geometry.projection_count() - 1) / p) + 1; + auto shadows = bulk::coarray(world, p * block_size); + auto offsets = bulk::coarray(world, p * block_size); + // 1. send all `p` windows for all projections `i` to processor `i % p` + for (int i = 0; i < geometry.projection_count(); ++i) { + auto sh = geometry.local_shadow(i); + shadows(i % p)[(i / p) * p + s] = to_pod(sh); + offsets(i % p)[(i / p) * p + s] = geometry.offset(i); + } + world.sync(); + // 2. for each projection, for each pixel store the contributors + auto global_shape = gg.projection_shape(0); + auto pixel_count = math::product<2_D, int>(global_shape); + auto pixels = std::vector>(pixel_count * block_size); + for (int b = 0; b < block_size; ++b) { + for (int t = 0; t < p; ++t) { + auto w = shadows[b * p + t]; + for (int j = w.min_pt[1]; j <= w.max_pt[1]; ++j) { + for (int i = w.min_pt[0]; i <= w.max_pt[0]; ++i) { + if (j * global_shape[0] + i >= pixel_count) { + std::cout << b << " [" << w.min_pt[0] << ", " + << w.max_pt[0] << "] -> " << i << " " << j + << " / " << pixel_count << " = " + << global_shape[0] << " x " << global_shape[1] + << "\n"; + assert(false); + } + pixels[b * pixel_count + j * global_shape[0] + i].push_back( + t); + } + } + } + } + // 3. decide who will be responsible for the pixel // maybe (line_number % #contributors) - // 4. send to the owner the contributors, to the contributors the owner + auto local_idx = [](auto offset, auto lw, auto gw, auto idx) { + auto x = idx % gw[0] - lw.min_pt[0]; + auto y = idx / gw[0] - lw.min_pt[1]; + auto idx_in_proj = y * (lw.max_pt[0] - lw.min_pt[0] + 1) + x; + return offset + idx_in_proj; + }; + + auto my_contributions = bulk::queue(world); + auto my_responsibilities = bulk::queue(world); + + for (int b = 0; b < block_size; ++b) { + for (int i = 0; i < pixel_count; ++i) { + auto pixel_idx = b * pixel_count + i; + if (pixels[pixel_idx].size() == 0) { + continue; + } + auto owner = + pixels[pixel_idx][pixel_idx % pixels[pixel_idx].size()]; + // for contributors, we can compute the local index if we know the + // offset.. lets just send them... aaaaand got 'em.. + + auto owner_line = + local_idx(offsets[b * p + owner], shadows[b * p + owner], + gg.projection_shape(0), i); + + for (auto j = 0u; j < pixels[pixel_idx].size(); ++j) { + auto t = pixels[pixel_idx][j]; + if (t == owner) { + continue; + } + + // 4. send to the owner the contributors, to the contributors + // the owner + auto contributor_line = + local_idx(offsets[b * p + t], shadows[b * p + t], + gg.projection_shape(0), i); + my_contributions(t).send({owner, contributor_line, owner_line}); + my_responsibilities(owner).send( + {t, owner_line, contributor_line}); + } + } + } + + world.sync(); + // 5. store contributors and owners in a vector - return {{}, {}}; + auto contributions = std::vector(my_contributions.size()); + auto responsibilities = + std::vector(my_responsibilities.size()); + std::copy(my_contributions.begin(), my_contributions.end(), + contributions.begin()); + std::copy(my_responsibilities.begin(), my_responsibilities.end(), + responsibilities.begin()); + + return {std::move(contributions), std::move(responsibilities)}; } void communicate_contributions(bulk::world& world, projections<3_D, T>& projs, const std::vector& contributions, const std::vector& results) { - auto q = bulk::queue(world); + auto q = bulk::queue(world); auto share = [&](const auto& xs) { - for (auto[target, projection, local, remote] : xs) { - q(target).send(projection, remote, - projs[projs.offset(projection) + local]); + for (auto [target, local, remote] : xs) { + q(target).send(remote, projs[local]); } - world.sync(); }; share(contributions); - for (auto[projection, idx, value] : q) { - projs[projs.offset(projection) + idx] += value; + world.sync(); + for (auto [idx, value] : q) { + projs[idx] += value; } + share(results); - for (auto[projection, idx, value] : q) { - projs[projs.offset(projection) + idx] = value; + world.sync(); + for (auto [idx, value] : q) { + projs[idx] = value; } } @@ -99,8 +195,8 @@ void run(util::args options) { // {k, k, k}, {2, 1, 4}); env.spawn(processors, [&](auto& world) { - auto s = world.processor_id(); - auto vs = calculate_local_volume(partitioning, world.processor_id()); + auto s = world.rank(); + auto vs = calculate_local_volume(partitioning, world.rank()); // Make scene with s = 0, connect to this scene from other procs util::ext_plotter<3_D, T> plotter("tcp://localhost:5555"); @@ -150,8 +246,7 @@ void run(util::args options) { } }; - auto overlaps = compute_overlaps(world, gs); - communicate_overlaps(world, p, gs, overlaps); + auto [go_forth, and_back] = compute_contributions(world, gs); // communicate row and column sums auto invert = [](auto x) { return (T)1.0 / x; }; @@ -160,7 +255,7 @@ void run(util::args options) { }; auto r = projections<3_D, T>(gs); fp(image<3_D, T>(vs, (T)1.0), gs, vs, r); - communicate_overlaps(world, r, gs, overlaps); + communicate_contributions(world, r, go_forth, and_back); invert_all(r.mutable_data()); auto c = image<3_D, T>(vs); bp(projections<3_D, T>(gs, (T)1.0), gs, vs, c); @@ -175,7 +270,7 @@ void run(util::args options) { for (int l = 0; l < gs.lines(); ++l) { q[l] = r[l] * (p[l] - q[l]); } - communicate_overlaps(world, q, gs, overlaps); + communicate_contributions(world, q, go_forth, and_back); bp(q, gs, vs, z); for (int i = 0; i < vs.cells(); ++i) { x[i] += c[i] * z[i]; diff --git a/ext/bulk b/ext/bulk index b774363..5c5ab58 160000 --- a/ext/bulk +++ b/ext/bulk @@ -1 +1 @@ -Subproject commit b774363e4894dacb810ba1e04e7e67d405841d45 +Subproject commit 5c5ab584e513147af994896cb5f9a5ad1e9c27ba diff --git a/ext/catch b/ext/catch index 8ebe94c..d5613fb 160000 --- a/ext/catch +++ b/ext/catch @@ -1 +1 @@ -Subproject commit 8ebe94ca2ea1e6ae7765f65d2f3aec08c9a85ae2 +Subproject commit d5613fb18a6a2bc54ac5a4ce50f3e36ea2560166 diff --git a/ext/cpptoml b/ext/cpptoml index 918f7f8..789b388 160000 --- a/ext/cpptoml +++ b/ext/cpptoml @@ -1 +1 @@ -Subproject commit 918f7f835bbff2602ef9a93a1085ed9930ac6330 +Subproject commit 789b3881992c6775b902aa096482850a683aa9d5 diff --git a/ext/cppzmq b/ext/cppzmq index b0e6d4b..d9f0f01 160000 --- a/ext/cppzmq +++ b/ext/cppzmq @@ -1 +1 @@ -Subproject commit b0e6d4bacd176aec2c41b54d46b70a4682bb88d7 +Subproject commit d9f0f016c07046742738c65e1eb84722ae32d7d4 diff --git a/ext/fmt b/ext/fmt index 3028344..6655e80 160000 --- a/ext/fmt +++ b/ext/fmt @@ -1 +1 @@ -Subproject commit 30283443805745af3761be1682e6849766246e76 +Subproject commit 6655e804c49f66b6c978fcdd01ef6b03b55208fa diff --git a/ext/libzmq b/ext/libzmq index 30ab0ed..834b9e4 160000 --- a/ext/libzmq +++ b/ext/libzmq @@ -1 +1 @@ -Subproject commit 30ab0ed897ae1d8ed82da89543f8bf5856a902e7 +Subproject commit 834b9e4cd7ff05918c336df8b2a858a5efe4d7ec diff --git a/include/tomos/distributed/recursive_bisectioning.hpp b/include/tomos/distributed/recursive_bisectioning.hpp index c944359..ceae29b 100644 --- a/include/tomos/distributed/recursive_bisectioning.hpp +++ b/include/tomos/distributed/recursive_bisectioning.hpp @@ -55,12 +55,13 @@ split_t find_split(const std::vector>& lines, std::size_t idx = 0; for (auto line : lines) { auto intersections = math::intersect_bounds(line, bounds); - // FIXME: check if intersects, since line may start inside bounds if (intersections) { auto a = intersections.value().first; auto b = intersections.value().second; crossings.push_back({a, idx, math::sign(b - a)}); crossings.push_back({b, idx, math::sign(a - b)}); + } else { + std::cout << "Line that should intersect does indeed not\n"; } ++idx; } @@ -190,11 +191,8 @@ partition_bisection(const tomo::geometry::base& geometry, assert(1 << depth == processors); // containers for the current left and right lines - // TODO: needed here? std::vector> all_lines; for (auto l : geometry) { - // FIXME actually; we need intersections here, not the truncation - // because it does a 'discrete intersection' auto line = math::truncate_to_volume(l, object_volume); if (line) { all_lines.push_back(line.value()); diff --git a/include/tomos/geometry.hpp b/include/tomos/geometry.hpp index dd0a9bb..663765c 100644 --- a/include/tomos/geometry.hpp +++ b/include/tomos/geometry.hpp @@ -190,8 +190,7 @@ class base { /** Construct the geometry with a given number of lines. */ base(int projection_count, bool parallel = false) - : projection_count_(projection_count), parallel_(parallel) { - } + : projection_count_(projection_count), parallel_(parallel) {} virtual ~base() = default; @@ -217,12 +216,27 @@ class base { virtual std::array, D - 1> projection_delta(int i) const = 0; + int offset(int idx) const { return offsets_[idx]; } protected: + std::vector offsets_; + + void compute_offsets_() { + offsets_.resize(projection_count()); + offsets_[0] = 0; + std::iota(offsets_.begin() + 1, offsets_.end(), 0); + std::transform( + offsets_.begin() + 1, offsets_.end(), offsets_.begin() + 1, + [&](int i) { return math::reduce(projection_shape(i)); }); + std::partial_sum(offsets_.begin() + 1, offsets_.end(), + offsets_.begin() + 1); + } + void compute_lines_() { for (auto i = 0; i < projection_count_; ++i) { line_count_ += math::reduce(this->projection_shape(i)); } + this->compute_offsets_(); } int projection_count_ = 0; diff --git a/include/tomos/math.hpp b/include/tomos/math.hpp index 70c9024..2894976 100644 --- a/include/tomos/math.hpp +++ b/include/tomos/math.hpp @@ -322,7 +322,8 @@ intersect_bounds(line l, std::array, D> bounds) { auto for_sure_far_enough = product(bounds_sides); auto result = aabb_intersection( - l.origin, for_sure_far_enough * l.delta, bounds_sides, bounds_origin); + l.origin - for_sure_far_enough * l.delta, for_sure_far_enough * l.delta, + bounds_sides, bounds_origin); if (result) { return std::pair, vec>{result.value().first, diff --git a/include/tomos/projections.hpp b/include/tomos/projections.hpp index 2673b92..8fe3bc4 100644 --- a/include/tomos/projections.hpp +++ b/include/tomos/projections.hpp @@ -22,15 +22,11 @@ class projections { /** Construct default-initialized projections for a geometry. */ projections(const geometry::base& geometry) - : geometry_(geometry), data_(geometry.lines()) { - compute_offsets_(); - } + : geometry_(geometry), data_(geometry.lines()) {} /** Construct projections for a geometry with constant value. */ projections(const geometry::base& geometry, T value) - : geometry_(geometry), data_(geometry.lines(), value) { - compute_offsets_(); - } + : geometry_(geometry), data_(geometry.lines(), value) {} /** * Obtain a reference to the i-th measurement, corresponding to the i-th @@ -51,7 +47,7 @@ class projections { ///** Get a projection as an image */ image get_projection(int idx) const { - auto offset = offsets_[idx]; + auto offset = this->offset(idx); auto shape = geometry_.projection_shape(idx); auto img = image(volume(shape)); for (int i = 0; i < math::reduce(shape); ++i) { @@ -62,30 +58,18 @@ class projections { /** Set a projection using an image */ void set_projection(int idx, const image& img) { - auto offset = offsets_[idx]; + auto offset = this->offset(idx); auto pixels = math::reduce(geometry_.projection_shape(idx)); for (int i = 0; i < pixels; ++i) { data_[offset + i] = img[i]; } } - int offset(int idx) const { return offsets_[idx]; } + int offset(int idx) const { return geometry_.offset(idx); } - private: - void compute_offsets_() { - offsets_.resize(geometry_.projection_count()); - offsets_[0] = 0; - std::iota(offsets_.begin() + 1, offsets_.end(), 0); - std::transform(offsets_.begin() + 1, offsets_.end(), - offsets_.begin() + 1, [&](int i) { - return math::reduce( - geometry_.projection_shape(i)); - }); - std::partial_sum(offsets_.begin() + 1, offsets_.end(), - offsets_.begin() + 1); - } + auto size() const { return geometry_.lines(); } - std::vector offsets_; + private: const geometry::base& geometry_; int projection_count_; std::vector data_; diff --git a/python/build/pybind11 b/python/build/pybind11 index 6b51619..e06077b 160000 --- a/python/build/pybind11 +++ b/python/build/pybind11 @@ -1 +1 @@ -Subproject commit 6b51619a7cc5a34bbcd4474fb70954d009cebb64 +Subproject commit e06077bf4718050c4f95c3b4741c75f7f7041b83