Permalink
Browse files

Rearrange lazy_scan_heap to avoid visibility map race conditions.

We must set the visibility map bit before releasing our exclusive lock
on the heap page; otherwise, someone might clear the heap page bit
before we set the visibility map bit, leading to a situation where the
visibility map thinks the page is all-visible but it's really not.

This problem has existed since 8.4, but it wasn't critical before we
had index-only scans, since the worst case scenario was that the page
wouldn't get vacuumed until the next scan_all vacuum.

Along the way, a couple of minor, related improvements: (1) if we
pause the heap scan to do an index vac cycle, release any visibility
map page we're holding, since really long-running pins are not good
for a variety of reasons; and (2) warn if we see a page that's marked
all-visible in the visibility map but not on the page level, since
that should never happen any more (it was allowed in previous
releases, but not in 9.2).
  • Loading branch information...
1 parent 85efd5f commit 7ab9b2f3b79177e501a1ef90ed004cc68788abaf Robert Haas committed Apr 24, 2012
Showing with 46 additions and 39 deletions.
  1. +46 −39 src/backend/commands/vacuumlazy.c
@@ -490,6 +490,18 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
vacrelstats->num_dead_tuples > 0)
{
+ /*
+ * Before beginning index vacuuming, we release any pin we may hold
+ * on the visibility map page. This isn't necessary for correctness,
+ * but we do it anyway to avoid holding the pin across a lengthy,
+ * unrelated operation.
+ */
+ if (BufferIsValid(vmbuffer))
+ {
+ ReleaseBuffer(vmbuffer);
+ vmbuffer = InvalidBuffer;
+ }
+
/* Log cleanup info before we touch indexes */
vacuum_log_cleanup_info(onerel, vacrelstats);
@@ -510,6 +522,16 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->num_index_scans++;
}
+ /*
+ * Pin the visibility map page in case we need to mark the page
+ * all-visible. In most cases this will be very cheap, because we'll
+ * already have the correct page pinned anyway. However, it's possible
+ * that (a) next_not_all_visible_block is covered by a different VM page
+ * than the current block or (b) we released our pin and did a cycle of
+ * index vacuuming.
+ */
+ visibilitymap_pin(onerel, blkno, &vmbuffer);
+
buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
RBM_NORMAL, vac_strategy);
@@ -600,26 +622,15 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
empty_pages++;
freespace = PageGetHeapFreeSpace(page);
+ /* empty pages are always all-visible */
if (!PageIsAllVisible(page))
{
PageSetAllVisible(page);
MarkBufferDirty(buf);
+ visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
}
- LockBuffer(buf, BUFFER_LOCK_UNLOCK);
-
- /* Update the visibility map */
- if (!all_visible_according_to_vm)
- {
- visibilitymap_pin(onerel, blkno, &vmbuffer);
- LockBuffer(buf, BUFFER_LOCK_SHARE);
- if (PageIsAllVisible(page))
- visibilitymap_set(onerel, blkno, InvalidXLogRecPtr,
- vmbuffer);
- LockBuffer(buf, BUFFER_LOCK_UNLOCK);
- }
-
- ReleaseBuffer(buf);
+ UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, blkno, freespace);
continue;
}
@@ -834,11 +845,26 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
freespace = PageGetHeapFreeSpace(page);
- /* Update the all-visible flag on the page */
- if (!PageIsAllVisible(page) && all_visible)
+ /* mark page all-visible, if appropriate */
+ if (all_visible && !all_visible_according_to_vm)
{
- PageSetAllVisible(page);
- MarkBufferDirty(buf);
+ if (!PageIsAllVisible(page))
+ {
+ PageSetAllVisible(page);
+ MarkBufferDirty(buf);
+ }
+ visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
+ }
+
+ /*
+ * As of PostgreSQL 9.2, the visibility map bit should never be set if
+ * the page-level bit is clear.
+ */
+ else if (all_visible_according_to_vm && !PageIsAllVisible(page))
+ {
+ elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
+ relname, blkno);
+ visibilitymap_clear(onerel, blkno, vmbuffer);
}
/*
@@ -859,30 +885,11 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
relname, blkno);
PageClearAllVisible(page);
- SetBufferCommitInfoNeedsSave(buf);
-
- /*
- * Normally, we would drop the lock on the heap page before
- * updating the visibility map, but since this case shouldn't
- * happen anyway, don't worry about that.
- */
- visibilitymap_pin(onerel, blkno, &vmbuffer);
+ MarkBufferDirty(buf);
visibilitymap_clear(onerel, blkno, vmbuffer);
}
- LockBuffer(buf, BUFFER_LOCK_UNLOCK);
-
- /* Update the visibility map */
- if (!all_visible_according_to_vm && all_visible)
- {
- visibilitymap_pin(onerel, blkno, &vmbuffer);
- LockBuffer(buf, BUFFER_LOCK_SHARE);
- if (PageIsAllVisible(page))
- visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
- LockBuffer(buf, BUFFER_LOCK_UNLOCK);
- }
-
- ReleaseBuffer(buf);
+ UnlockReleaseBuffer(buf);
/* Remember the location of the last page with nonremovable tuples */
if (hastup)

0 comments on commit 7ab9b2f

Please sign in to comment.