Permalink
Browse files

Update priority_queue.h documentation

  • Loading branch information...
1 parent f01ea8a commit 45caf24dc21e47417ceed155a7306ea6f9212fe5 @Mortal Mortal committed Oct 12, 2012
Showing with 70 additions and 36 deletions.
  1. +57 −27 tpie/priority_queue.h
  2. +13 −9 tpie/priority_queue.inl
View
@@ -49,38 +49,47 @@ namespace tpie {
priority_queue_error(const std::string& what) : std::logic_error(what)
{ }
};
-
-/////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+/// \class priority_queue
+/// \brief External memory priority queue implementation.
+///
+/// Originally implemented by Lars Hvam Petersen for his Master's thesis
+/// titled "External Priority Queues in Practice", June 2007.
+/// This implementation, named "PQSequence3", is the fastest among the
+/// priority queue implementations studied in the paper.
+/// Inspiration: Sanders - Fast priority queues for cached memory (1999).
///
-/// \class priority_queue
-/// \author Lars Hvam Petersen
+/// For an overview of the algorithm, refer to Sanders (1999) section 2 and
+/// figure 1, or Lars Hvam's thesis, section 4.4.
///
-/// Inspiration: Sanders - Fast priority queues for cached memory (1999)
-/// Refer to Section 2 and Figure 1 for an overview of the algorithm
+/// In the debug log, the priority queue reports two values setting_k and
+/// setting_m. The priority queue has a maximum capacity which is on the order
+/// of setting_m * setting_k**setting_k elements (where ** denotes
+/// exponentiation).
///
-/////////////////////////////////////////////////////////
+/// However, even with as little as 8 MB of memory, this maximum capacity in
+/// practice exceeds 2**48, corresponding to a petabyte-sized dataset of 32-bit
+/// integers.
+///////////////////////////////////////////////////////////////////////////////
template<typename T, typename Comparator = std::less<T>, typename OPQType = pq_overflow_heap<T, Comparator> >
class priority_queue {
typedef memory_size_type group_type;
typedef memory_size_type slot_type;
public:
- /////////////////////////////////////////////////////////
- ///
- /// Constructor
- ///
- /// \param f Factor of memory that the priority queue is
- /// allowed to use.
+ ///////////////////////////////////////////////////////////////////////////
+ /// \brief Constructor.
+ ///
+ /// \param f Factor of memory that the priority queue is allowed to use.
/// \param b Block factor
- ///
- /////////////////////////////////////////////////////////
- priority_queue(double f=1.0, float b=0.0625);
+ ///////////////////////////////////////////////////////////////////////////
+ priority_queue(double f=1.0, float b=0.0625);
#ifndef DOXYGEN
- // \param mmavail Number of bytes the priority queue is
- // allowed to use.
+ // \param mmavail Number of bytes the priority queue is allowed to use.
// \param b Block factor
- priority_queue(memory_size_type mm_avail, float b=0.0625);
+ priority_queue(memory_size_type mm_avail, float b=0.0625);
#endif
@@ -154,17 +163,38 @@ class priority_queue {
T min;
bool min_in_buffer;
- tpie::auto_ptr<OPQType> opq; // insert heap
- tpie::array<T> buffer; // deletion buffer
- tpie::array<T> gbuffer0; // group buffer 0
- tpie::array<T> mergebuffer; // merge buffer for merging deletion buffer and group buffer 0
+ /** Overflow priority queue (for buffering inserted elements). Capacity m. */
+ tpie::auto_ptr<OPQType> opq;
+
+ /** Deletion buffer containing the m' top elements in the entire structure. */
+ tpie::array<T> buffer;
+
+ /** Group buffers contain at most m elements all less or equal to elements
+ * in the corresponding group slots. Elements in group buffers are *not*
+ * repeated in actual group slots. For efficiency, we keep group buffer 0
+ * in memory. */
+ tpie::array<T> gbuffer0;
+
+ /** Merge buffer of size 2*m. */
+ tpie::array<T> mergebuffer;
+
+ /** 3*(#slots) integers. Slot i contains its elements in cyclic ascending order,
+ * starting at index slot_state[3*i]. Slot i contains slot_state[3*i+1] elements.
+ * Its data is in data file index slot_state[3*i+2]. */
tpie::array<memory_size_type> slot_state;
+
+ /** 2*(#groups) integers. Group buffer i has its elements in cyclic ascending order,
+ * starting at index group_state[2*i]. Gbuffer i contains group_state[2*i+1] elements. */
tpie::array<memory_size_type> group_state;
- memory_size_type setting_k;
- memory_size_type current_r;
- memory_size_type setting_m;
- memory_size_type setting_mmark;
+ /** k, the fanout of each group and the max fanout R. */
+ memory_size_type setting_k;
+ /** Number of groups in use. */
+ memory_size_type current_r;
+ /** m, the size of a slot and the size of the group buffers. */
+ memory_size_type setting_m;
+ /** m', the size of the deletion buffer. */
+ memory_size_type setting_mmark;
memory_size_type slot_data_id;
@@ -216,19 +216,22 @@ template <typename T, typename Comparator, typename OPQType>
void priority_queue<T, Comparator, OPQType>::push(const T& x) {
if(opq->full()) {
+ // When the overflow priority queue (aka. insertion buffer) is full,
+ // insert its contents into a new slot in group 0.
+ //
+ // To maintain the heap invariant
+ // deletion buffer <= group buffer 0 <= group 0 slots
+ // we bubble lesser elements from insertion buffer down into
+ // deletion buffer and group buffer 0.
- // Merge insertion buffer, deletion buffer and group buffer 0
- // such that deletion buffer <= group buffer 0 <= insertion buffer.
-
- // Afterwards, move insertion buffer to a free slot in group 0.
-
- slot_type slot = free_slot(0); // if group 0 is full, we recursively empty group i
- // by merging it into a slot in group i+1
+ slot_type slot = free_slot(0); // (if group 0 is full, we recursively empty group i
+ // by merging it into a slot in group i+1)
assert(opq->sorted_size() == setting_m);
T* arr = opq->sorted_array();
- if(buffer_size > 0) { // maintain heap invariant for deletion buffer
+ // Bubble lesser elements down into deletion buffer
+ if(buffer_size > 0) {
// fetch insertion buffer
memcpy(&mergebuffer[0], &arr[0], sizeof(T)*opq->sorted_size());
@@ -246,7 +249,8 @@ void priority_queue<T, Comparator, OPQType>::push(const T& x) {
memcpy(&arr[0], mergebuffer.get()+buffer_size, sizeof(T)*opq->sorted_size());
}
- if(group_size(0)> 0) { // maintain heap invariant for gbuffer0
+ // Bubble lesser elements down into group buffer 0
+ if(group_size(0)> 0) {
// Merge insertion buffer and group buffer 0
assert(group_size(0)+opq->sorted_size() <= setting_m*2);

0 comments on commit 45caf24

Please sign in to comment.