Add Matching.decode_to_edges_array (#47)

* Start work on decoding to edges * Use separate neighbor_markers property to mark edges in decode_to_edges * decode_to_edges added, with perf and tests * add decode_to_edges to python api with test * pin ninja==1.10.2.4 Co-authored-by: Oscar Higgott <oscarhiggott@users.noreply.github.com>
oscarhiggott · Nov 5, 2022 · ca7d0f8 · ca7d0f8
1 parent 672980b
commit ca7d0f8
Show file tree

Hide file tree

Showing 13 changed files with 461 additions and 26 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -188,7 +188,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Add requirements
-        run: python -m pip install --upgrade cmake>=3.12 ninja pytest flake8 pytest-cov
+        run: python -m pip install --upgrade cmake>=3.12 ninja==1.10.2.4 pytest flake8 pytest-cov
 
       - name: Build and install
         run: pip install --verbose -e .
@@ -239,7 +239,7 @@ jobs:
         with:
           python-version: '3.10'
       - name: Add requirements
-        run: python -m pip install --upgrade cmake>=3.12 ninja pytest flake8 pytest-cov stim~=1.10.dev1666411378
+        run: python -m pip install --upgrade cmake>=3.12 ninja==1.10.2.4 pytest flake8 pytest-cov stim~=1.10.dev1666411378
       - name: Build and install
         run: pip install --verbose -e .
       - name: Run tests and collect coverage

diff --git a/src/pymatching/matching.py b/src/pymatching/matching.py
@@ -422,13 +422,66 @@ def decode_batch(
         else:
             return predictions
 
+    def decode_to_edges_array(self,
+                              syndrome: Union[np.ndarray, List[int]]
+                              ) -> np.ndarray:
+        """
+        Decode the syndrome `syndrome` using minimum-weight perfect matching, returning the edges in the
+        solution, given as pairs of detector node indices in a numpy array.
+
+        Parameters
+        ----------
+        syndrome : numpy.ndarray
+            A binary syndrome vector to decode. The number of elements in
+            `syndrome` should equal the number of nodes in the matching graph. If
+            `syndrome` is a 1D array, then `syndrome[i]` is the syndrome at node `i` of
+            the matching graph. If `syndrome` is 2D then `syndrome[i,j]` is the difference
+            (modulo 2) between the (noisy) measurement of stabiliser `i` in time
+            step `j+1` and time step `j` (for the case where the matching graph is
+            constructed from a check matrix with `repetitions>1`).
+
+        Returns
+        -------
+        numpy.ndarray
+            A 2D array `edges` giving the edges in the matching solution as pairs of detector nodes (or as a detector
+            node and the boundary, for a boundary edge). If there are `num_predicted_edges` edges then the shape of
+            `edges` is `edges.shape=(num_predicted_edges, 2)`, and edge `i` is between detector node `edges[i, 0]`
+            and detector node `edges[i, 1]`. For a boundary edge `i` between a detector node `k` and the boundary
+            (either a boundary node or the virtual boundary node), then `pairs[i,0]` is `k`, and `pairs[i,1]=-1`
+            denotes the boundary (the boundary is always denoted by -1 and is always in the second column).
+
+        Examples
+        --------
+        >>> import pymatching
+        >>> m = pymatching.Matching()
+        >>> m.add_boundary_edge(0)
+        >>> m.add_edge(0, 1)
+        >>> m.add_edge(1, 2)
+        >>> m.add_edge(2, 3)
+        >>> m.add_edge(3, 4)
+        >>> m.add_edge(4, 5)
+        >>> m.add_edge(5, 6)
+        >>> edges = m.decode_to_edges_array([0, 1, 0, 0, 1, 0, 1])
+        >>> print(edges)
+        [[ 0  1]
+         [ 0 -1]
+         [ 5  4]
+         [ 5  6]]
+        """
+        detection_events = self._syndrome_array_to_detection_events(syndrome)
+        return self._matching_graph.decode_to_edges_array(detection_events)
+
     def decode_to_matched_dets_array(self,
                                      syndrome: Union[np.ndarray, List[int]]
-                                     ) -> Union[np.ndarray, Tuple[np.ndarray, int]]:
+                                     ) -> np.ndarray:
         """
         Decode the syndrome `syndrome` using minimum-weight perfect matching, returning the pairs of
-        matched detection events (or detection events matched to the boundary) as a 2D numpy array. Note that
-        (unlike `Matching.decode`), this method currently only supports non-negative edge weights.
+        matched detection events (or detection events matched to the boundary) as a 2D numpy array.
+        Each pair of matched detection events returned by this method corresponds to a shortest path
+        between the detection events in the solution to the problem: if you instead want the set of
+        all edges in the solution (pairs of detector nodes), use `Matching.decode_to_edges` instead.
+        Note that, unlike `Matching.decode`, `Matching.decode_batch` and `Matching.decode_to_edges_array`,
+        this method currently only supports non-negative edge weights.
 
         Parameters
         ----------
@@ -444,11 +497,11 @@ def decode_to_matched_dets_array(self,
         Returns
         -------
         numpy.ndarray
-            An 2D array `pairs` giving the endpoints of the paths between detection events in the solution of the matching.
-            If there are `num_paths` paths then the shape of `pairs` is `num_paths.shape=(num_paths, 2)`, and path `i`
-            starts at detection event `pairs[i,0]` and ends at detection event `pairs[i,1]`. For a path `i` connecting
-            a detection event to the boundary (either a boundary node or the virtual boundary node), then `pairs[i,0]` is
-            is the index of the detection event, and `pairs[i,1]=-1` denotes the boundary.
+            An 2D array `pairs` giving the endpoints of the paths between detection events in the solution of the
+            matching. If there are `num_paths` paths then the shape of `pairs` is `pairs.shape=(num_paths, 2)`, and
+            path `i` starts at detection event `pairs[i,0]` and ends at detection event `pairs[i,1]`. For a path `i`
+            connecting a detection event to the boundary (either a boundary node or the virtual boundary node), then
+            `pairs[i,0]` is the index of the detection event, and `pairs[i,1]=-1` denotes the boundary.
 
         Examples
         --------
@@ -459,10 +512,12 @@ def decode_to_matched_dets_array(self,
         >>> m.add_edge(1, 2)
         >>> m.add_edge(2, 3)
         >>> m.add_edge(3, 4)
-        >>> matched_dets = m.decode_to_matched_dets_array([1, 0, 0, 1, 1])
+        >>> m.add_edge(4, 5)
+        >>> m.add_edge(5, 6)
+        >>> matched_dets = m.decode_to_matched_dets_array([0, 1, 0, 0, 1, 0, 1])
         >>> print(matched_dets)
-        [[ 0 -1]
-         [ 3  4]]
+        [[ 1 -1]
+         [ 4  6]]
         """
         detection_events = self._syndrome_array_to_detection_events(syndrome)
         return self._matching_graph.decode_to_matched_detection_events_array(detection_events)

diff --git a/src/pymatching/sparse_blossom/driver/mwpm_decoding.cc b/src/pymatching/sparse_blossom/driver/mwpm_decoding.cc
@@ -85,9 +85,10 @@ void process_timeline_until_completion(pm::Mwpm& mwpm, const std::vector<uint64_
         // Just add detection events if graph has no negative weights
         for (auto& detection : detection_events) {
             if (detection >= mwpm.flooder.graph.nodes.size())
-                throw std::invalid_argument("The detection event with index " + std::to_string(detection)
-                                            + " does not correspond to a node in the graph, which only has "
-                                            + std::to_string(mwpm.flooder.graph.nodes.size()) + " nodes.");
+                throw std::invalid_argument(
+                    "The detection event with index " + std::to_string(detection) +
+                    " does not correspond to a node in the graph, which only has " +
+                    std::to_string(mwpm.flooder.graph.nodes.size()) + " nodes.");
             if (detection + 1 > mwpm.flooder.graph.is_user_graph_boundary_node.size() ||
                 !mwpm.flooder.graph.is_user_graph_boundary_node[detection])
                 mwpm.create_detection_event(&mwpm.flooder.graph.nodes[detection]);
@@ -216,3 +217,72 @@ void pm::decode_detection_events_to_match_edges(pm::Mwpm& mwpm, const std::vecto
     mwpm.flooder.match_edges.clear();
     shatter_blossoms_for_all_detection_events_and_extract_match_edges(mwpm, detection_events);
 }
+
+void flip_edge(const pm::SearchGraphEdge& edge) {
+    edge.detector_node->neighbor_markers[edge.neighbor_index] ^= pm::FLIPPED;
+    auto neighbor = edge.detector_node->neighbors[edge.neighbor_index];
+    if (neighbor) {
+        auto idx_from_neighbor = neighbor->index_of_neighbor(edge.detector_node);
+        neighbor->neighbor_markers[idx_from_neighbor] ^= pm::FLIPPED;
+    }
+}
+
+void pm::decode_detection_events_to_edges(
+    pm::Mwpm& mwpm, const std::vector<uint64_t>& detection_events, std::vector<int64_t>& edges) {
+    if (mwpm.flooder.graph.nodes.size() != mwpm.search_flooder.graph.nodes.size()) {
+        throw std::invalid_argument(
+            "Mwpm object does not contain search flooder, which is required to decode to edges.");
+    }
+    process_timeline_until_completion(mwpm, detection_events);
+    mwpm.flooder.match_edges.clear();
+    shatter_blossoms_for_all_detection_events_and_extract_match_edges(mwpm, detection_events);
+    if (!mwpm.flooder.negative_weight_detection_events.empty())
+        shatter_blossoms_for_all_detection_events_and_extract_match_edges(
+            mwpm, mwpm.flooder.negative_weight_detection_events);
+    // Flip edges with negative weights and add to edges vector.
+    for (const auto& neg_node_pair : mwpm.search_flooder.graph.negative_weight_edges) {
+        auto node1_ptr = &mwpm.search_flooder.graph.nodes[neg_node_pair.first];
+        auto node2_ptr =
+            neg_node_pair.second != SIZE_MAX ? &mwpm.search_flooder.graph.nodes[neg_node_pair.second] : nullptr;
+        SearchGraphEdge neg_edge = {node1_ptr, node1_ptr->index_of_neighbor(node2_ptr)};
+        flip_edge(neg_edge);
+        int64_t node1 = neg_edge.detector_node - &mwpm.search_flooder.graph.nodes[0];
+        int64_t node2 = node2_ptr ? node2_ptr - &mwpm.search_flooder.graph.nodes[0] : -1;
+        edges.push_back(node1);
+        edges.push_back(node2);
+    }
+    // Flip edges along a shortest path between matched detection events and add to edges vector
+    for (const auto& match_edge : mwpm.flooder.match_edges) {
+        size_t node_from = match_edge.loc_from - &mwpm.flooder.graph.nodes[0];
+        size_t node_to = match_edge.loc_to ? match_edge.loc_to - &mwpm.flooder.graph.nodes[0] : SIZE_MAX;
+        mwpm.search_flooder.iter_edges_on_shortest_path_from_middle(node_from, node_to, [&](const SearchGraphEdge& e) {
+            flip_edge(e);
+            int64_t node1 = e.detector_node - &mwpm.search_flooder.graph.nodes[0];
+            auto node2_ptr = e.detector_node->neighbors[e.neighbor_index];
+            int64_t node2 = node2_ptr ? node2_ptr - &mwpm.search_flooder.graph.nodes[0] : -1;
+            edges.push_back(node1);
+            edges.push_back(node2);
+        });
+    }
+    // Remove any edges in the edges vector that are no longer flipped. Also unflip edges.
+    for (size_t i = 0; i < edges.size() / 2;) {
+        int64_t u = edges[2 * i];
+        int64_t v = edges[2 * i + 1];
+        auto& u_node = mwpm.search_flooder.graph.nodes[u];
+        size_t idx;
+        if (v == -1) {
+            idx = 0;
+        } else {
+            auto v_ptr = &mwpm.search_flooder.graph.nodes[v];
+            idx = u_node.index_of_neighbor(v_ptr);
+        }
+        if (!(u_node.neighbor_markers[idx] & pm::FLIPPED)) {
+            edges[2 * i] = edges[edges.size() - 2];
+            edges[2 * i + 1] = edges[edges.size() - 1];
+            edges.resize(edges.size() - 2);
+        } else {
+            flip_edge({&u_node, idx});
+            i++;
+        }
+    }
+}
diff --git a/src/pymatching/sparse_blossom/driver/mwpm_decoding.h b/src/pymatching/sparse_blossom/driver/mwpm_decoding.h
@@ -71,6 +71,16 @@ void decode_detection_events(
 /// matched to each other via paths.
 void decode_detection_events_to_match_edges(pm::Mwpm& mwpm, const std::vector<uint64_t>& detection_events);
 
+/// Decode detection events using a Mwpm object and vector of detection event indices.
+/// Returns the edges in the matching: these are pairs of *detectors* forming *edges* in the
+/// matching solution (rather than pairs of detection *events* matched via *paths* as returned
+/// instead by `decode_detection_events_to_match_edges`).
+void decode_detection_events_to_edges(
+    pm::Mwpm& mwpm,
+    const std::vector<uint64_t>& detection_events,
+    std::vector<int64_t>& edges
+    );
+
 }  // namespace pm
 
 #endif  // PYMATCHING2_MWPM_DECODING_H
diff --git a/src/pymatching/sparse_blossom/driver/mwpm_decoding.perf.cc b/src/pymatching/sparse_blossom/driver/mwpm_decoding.perf.cc
@@ -270,6 +270,32 @@ BENCHMARK(Decode_surface_r21_d21_p100_with_dijkstra) {
     }
 }
 
+BENCHMARK(Decode_surface_r21_d21_p100_to_edges) {
+    size_t rounds = 21;
+    auto data = generate_data(21, rounds, 0.01, 8);
+    auto &dem = data.first;
+    const auto &shots = data.second;
+
+    size_t num_buckets = pm::NUM_DISTINCT_WEIGHTS;
+    auto mwpm = pm::detector_error_model_to_mwpm(dem, num_buckets, true);
+
+    size_t num_dets = 0;
+    for (const auto &shot : shots) {
+        num_dets += shot.hits.size();
+    }
+    std::vector<int64_t> edges;
+    benchmark_go([&]() {
+        for (const auto &shot : shots) {
+            edges.clear();
+            pm::decode_detection_events_to_edges(mwpm, shot.hits, edges);
+        }
+    })
+        .goal_millis(8.2)
+        .show_rate("dets", (double)num_dets)
+        .show_rate("layers", (double)rounds * shots.size())
+        .show_rate("shots", (double)shots.size());
+}
+
 BENCHMARK(Decode_surface_r21_d21_p1000) {
     size_t rounds = 21;
     auto data = generate_data(21, rounds, 0.001, 256);
@@ -329,7 +355,7 @@ BENCHMARK(Decode_surface_r21_d21_p1000_with_dijkstra) {
             res.reset();
         }
     })
-        .goal_millis(10)
+        .goal_millis(7.7)
         .show_rate("dets", (double)num_dets)
         .show_rate("layers", (double)rounds * shots.size())
         .show_rate("shots", (double)shots.size());
@@ -338,6 +364,32 @@ BENCHMARK(Decode_surface_r21_d21_p1000_with_dijkstra) {
     }
 }
 
+BENCHMARK(Decode_surface_r21_d21_p1000_to_edges) {
+    size_t rounds = 21;
+    auto data = generate_data(21, rounds, 0.001, 256);
+    auto &dem = data.first;
+    const auto &shots = data.second;
+
+    size_t num_buckets = pm::NUM_DISTINCT_WEIGHTS;
+    auto mwpm = pm::detector_error_model_to_mwpm(dem, num_buckets, true);
+
+    size_t num_dets = 0;
+    for (const auto &shot : shots) {
+        num_dets += shot.hits.size();
+    }
+    std::vector<int64_t> edges;
+    benchmark_go([&]() {
+        for (const auto &shot : shots) {
+            edges.clear();
+            pm::decode_detection_events_to_edges(mwpm, shot.hits, edges);
+        }
+    })
+        .goal_millis(8.4)
+        .show_rate("dets", (double)num_dets)
+        .show_rate("layers", (double)rounds * shots.size())
+        .show_rate("shots", (double)shots.size());
+}
+
 BENCHMARK(Decode_surface_r21_d21_p10000) {
     size_t rounds = 21;
     auto data = generate_data(21, rounds, 0.0001, 512);
@@ -406,6 +458,32 @@ BENCHMARK(Decode_surface_r21_d21_p10000_with_dijkstra) {
     }
 }
 
+BENCHMARK(Decode_surface_r21_d21_p10000_to_edges) {
+    size_t rounds = 21;
+    auto data = generate_data(21, rounds, 0.0001, 512);
+    auto &dem = data.first;
+    const auto &shots = data.second;
+
+    size_t num_buckets = pm::NUM_DISTINCT_WEIGHTS;
+    auto mwpm = pm::detector_error_model_to_mwpm(dem, num_buckets, true);
+
+    size_t num_dets = 0;
+    for (const auto &shot : shots) {
+        num_dets += shot.hits.size();
+    }
+    std::vector<int64_t> edges;
+    benchmark_go([&]() {
+        for (const auto &shot : shots) {
+            edges.clear();
+            pm::decode_detection_events_to_edges(mwpm, shot.hits, edges);
+        }
+    })
+        .goal_millis(1.4)
+        .show_rate("dets", (double)num_dets)
+        .show_rate("layers", (double)rounds * shots.size())
+        .show_rate("shots", (double)shots.size());
+}
+
 BENCHMARK(Decode_surface_r21_d21_p100000) {
     size_t rounds = 21;
     auto data = generate_data(21, rounds, 0.00001, 512);
@@ -472,4 +550,30 @@ BENCHMARK(Decode_surface_r21_d21_p100000_with_dijkstra) {
     if (num_mistakes == shots.size()) {
         std::cerr << "data dependence";
     }
+}
+
+BENCHMARK(Decode_surface_r21_d21_p100000_to_edges) {
+    size_t rounds = 21;
+    auto data = generate_data(21, rounds, 0.00001, 512);
+    auto &dem = data.first;
+    const auto &shots = data.second;
+
+    size_t num_buckets = pm::NUM_DISTINCT_WEIGHTS;
+    auto mwpm = pm::detector_error_model_to_mwpm(dem, num_buckets, true);
+
+    size_t num_dets = 0;
+    for (const auto &shot : shots) {
+        num_dets += shot.hits.size();
+    }
+    std::vector<int64_t> edges;
+    benchmark_go([&]() {
+        for (const auto &shot : shots) {
+            edges.clear();
+            pm::decode_detection_events_to_edges(mwpm, shot.hits, edges);
+        }
+    })
+        .goal_micros(130)
+        .show_rate("dets", (double)num_dets)
+        .show_rate("layers", (double)rounds * shots.size())
+        .show_rate("shots", (double)shots.size());
 }