Skip to content

Commit

Permalink
Track shared buffer hits in pg_stat_io
Browse files Browse the repository at this point in the history
Among other things, this should make it easier to calculate a useful cache hit
ratio by excluding buffer reads via buffer access strategies. As buffer access
strategies reuse buffers (and thus evict the prior buffer contents), it is
normal to see reads on repeated scans of the same data.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com
  • Loading branch information
anarazel committed Mar 31, 2023
1 parent 6c3b697 commit 8aaa04b
Show file tree
Hide file tree
Showing 12 changed files with 109 additions and 47 deletions.
11 changes: 11 additions & 0 deletions doc/src/sgml/monitoring.sgml
Original file line number Diff line number Diff line change
Expand Up @@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</entry>
</row>

<row>
<entry role="catalog_table_entry">
<para role="column_definition">
<structfield>hits</structfield> <type>bigint</type>
</para>
<para>
The number of times a desired block was found in a shared buffer.
</para>
</entry>
</row>

<row>
<entry role="catalog_table_entry">
<para role="column_definition">
Expand Down
1 change: 1 addition & 0 deletions src/backend/catalog/system_views.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,7 @@ SELECT
b.writes,
b.extends,
b.op_bytes,
b.hits,
b.evictions,
b.reuses,
b.fsyncs,
Expand Down
38 changes: 14 additions & 24 deletions src/backend/storage/buffer/bufmgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr, IOContext *io_context);
bool *foundPtr, IOContext io_context);
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
IOObject io_object, IOContext io_context);
static void FindAndDropRelationBuffers(RelFileLocator rlocator,
Expand Down Expand Up @@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (isLocalBuf)
{
/*
* LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
* do not use a BufferAccessStrategy for I/O of temporary tables.
* We do not use a BufferAccessStrategy for I/O of temporary tables.
* However, in some cases, the "strategy" may not be NULL, so we can't
* rely on IOContextForStrategy() to set the right IOContext for us.
* This may happen in cases like CREATE TEMPORARY TABLE AS...
*/
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
io_context = IOCONTEXT_NORMAL;
io_object = IOOBJECT_TEMP_RELATION;
bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
if (found)
pgBufferUsage.local_blks_hit++;
else if (isExtend)
Expand All @@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
* not currently in memory.
*/
io_context = IOContextForStrategy(strategy);
io_object = IOOBJECT_RELATION;
bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
strategy, &found, &io_context);
strategy, &found, io_context);
if (found)
pgBufferUsage.shared_blks_hit++;
else if (isExtend)
Expand All @@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
/* Just need to update stats before we exit */
*hit = true;
VacuumPageHit++;
pgstat_count_io_op(io_object, io_context, IOOP_HIT);

if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageHit;
Expand Down Expand Up @@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */

if (isLocalBuf)
{
bufBlock = LocalBufHdrGetBlock(bufHdr);
io_object = IOOBJECT_TEMP_RELATION;
}
else
{
bufBlock = BufHdrGetBlock(bufHdr);
io_object = IOOBJECT_RELATION;
}
bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);

if (isExtend)
{
Expand Down Expand Up @@ -1139,7 +1134,7 @@ static BufferDesc *
BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr, IOContext *io_context)
bool *foundPtr, IOContext io_context)
{
bool from_ring;
BufferTag newTag; /* identity of requested block */
Expand Down Expand Up @@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
{
/*
* If we get here, previous attempts to read the buffer must
* have failed ... but we shall bravely try again. Set
* io_context since we will in fact need to count an IO
* Operation.
* have failed ... but we shall bravely try again.
*/
*io_context = IOContextForStrategy(strategy);
*foundPtr = false;
}
}
Expand All @@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
LWLockRelease(newPartitionLock);

*io_context = IOContextForStrategy(strategy);

/* Loop here in case we have to try another victim buffer */
for (;;)
{
Expand Down Expand Up @@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgr->smgr_rlocator.locator.dbOid,
smgr->smgr_rlocator.locator.relNumber);

FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context);
LWLockRelease(BufferDescriptorGetContentLock(buf));

ScheduleBufferTagForWriteback(&BackendWritebackContext,
Expand Down Expand Up @@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* we may have been forced to release the buffer due to concurrent
* pinners or erroring out.
*/
pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
pgstat_count_io_op(IOOBJECT_RELATION, io_context,
from_ring ? IOOP_REUSE : IOOP_EVICT);
}

Expand Down
11 changes: 2 additions & 9 deletions src/backend/storage/buffer/localbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
*/
BufferDesc *
LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
bool *foundPtr, IOContext *io_context)
bool *foundPtr)
{
BufferTag newTag; /* identity of requested block */
LocalBufferLookupEnt *hresult;
Expand All @@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
hresult = (LocalBufferLookupEnt *)
hash_search(LocalBufHash, &newTag, HASH_FIND, NULL);

/*
* IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set
* io_context here (instead of after a buffer hit would have returned) for
* convenience since we don't have to worry about the overhead of calling
* IOContextForStrategy().
*/
*io_context = IOCONTEXT_NORMAL;

if (hresult)
{
b = hresult->id;
Expand Down Expand Up @@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
buf_state &= ~BM_DIRTY;
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);

/* Temporary table I/O does not use Buffer Access Strategies */
pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
pgBufferUsage.local_blks_written++;
}
Expand Down
2 changes: 1 addition & 1 deletion src/backend/utils/activity/pgstat_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
* Some BackendTypes will not do certain IOOps.
*/
if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
(io_op == IOOP_READ || io_op == IOOP_EVICT))
(io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
return false;

if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
Expand Down
11 changes: 7 additions & 4 deletions src/backend/utils/adt/pgstatfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,7 @@ typedef enum io_stat_col
IO_COL_WRITES,
IO_COL_EXTENDS,
IO_COL_CONVERSION,
IO_COL_HITS,
IO_COL_EVICTIONS,
IO_COL_REUSES,
IO_COL_FSYNCS,
Expand All @@ -1277,16 +1278,18 @@ pgstat_get_io_op_index(IOOp io_op)
{
case IOOP_EVICT:
return IO_COL_EVICTIONS;
case IOOP_EXTEND:
return IO_COL_EXTENDS;
case IOOP_FSYNC:
return IO_COL_FSYNCS;
case IOOP_HIT:
return IO_COL_HITS;
case IOOP_READ:
return IO_COL_READS;
case IOOP_REUSE:
return IO_COL_REUSES;
case IOOP_WRITE:
return IO_COL_WRITES;
case IOOP_EXTEND:
return IO_COL_EXTENDS;
case IOOP_FSYNC:
return IO_COL_FSYNCS;
}

elog(ERROR, "unrecognized IOOp value: %d", io_op);
Expand Down
6 changes: 3 additions & 3 deletions src/include/catalog/pg_proc.dat
Original file line number Diff line number Diff line change
Expand Up @@ -5749,9 +5749,9 @@
proname => 'pg_stat_get_io', provolatile => 'v',
prorows => '30', proretset => 't',
proparallel => 'r', prorettype => 'record', proargtypes => '',
proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}',
proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
prosrc => 'pg_stat_get_io' },

{ oid => '1136', descr => 'statistics: information about WAL activity',
Expand Down
1 change: 1 addition & 0 deletions src/include/pgstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ typedef enum IOOp
IOOP_EVICT,
IOOP_EXTEND,
IOOP_FSYNC,
IOOP_HIT,
IOOP_READ,
IOOP_REUSE,
IOOP_WRITE,
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/buf_internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
ForkNumber forkNum,
BlockNumber blockNum);
extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum, bool *foundPtr, IOContext *io_context);
BlockNumber blockNum, bool *foundPtr);
extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelationLocalBuffers(RelFileLocator rlocator,
ForkNumber forkNum,
Expand Down
3 changes: 2 additions & 1 deletion src/test/regress/expected/rules.out
Original file line number Diff line number Diff line change
Expand Up @@ -1884,11 +1884,12 @@ pg_stat_io| SELECT backend_type,
writes,
extends,
op_bytes,
hits,
evictions,
reuses,
fsyncs,
stats_reset
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset);
FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
pg_stat_progress_analyze| SELECT s.pid,
s.datid,
d.datname,
Expand Down
46 changes: 44 additions & 2 deletions src/test/regress/expected/stats.out
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
-- - writes of shared buffers to permanent storage
-- - extends of relations using shared buffers
-- - fsyncs done to ensure the durability of data dirtying shared buffers
-- - shared buffer hits
-- There is no test for blocks evicted from shared buffers, because we cannot
-- be sure of the state of shared buffers at the point the test is run.
-- Create a regular table and insert some data to generate IOCONTEXT_NORMAL
Expand Down Expand Up @@ -1208,6 +1209,47 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
t
(1 row)

SELECT sum(hits) AS io_sum_shared_before_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
-- Select from the table again to count hits.
-- Ensure we generate hits by forcing a nested loop self-join with no
-- materialize node. The outer side's buffer will stay pinned, preventing its
-- eviction, while we loop through the inner side and generate hits.
BEGIN;
SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off;
SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off;
-- ensure plan stays as we expect it to
EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
QUERY PLAN
-------------------------------------------
Aggregate
-> Nested Loop
Join Filter: (t1.a = t2.a)
-> Seq Scan on test_io_shared t1
-> Seq Scan on test_io_shared t2
(5 rows)

SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
count
-------
100
(1 row)

COMMIT;
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
--------------------------

(1 row)

SELECT sum(hits) AS io_sum_shared_after_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
?column?
----------
t
(1 row)

DROP TABLE test_io_shared;
-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
-- - eviction of local buffers in order to reuse them
Expand Down Expand Up @@ -1342,15 +1384,15 @@ SELECT pg_stat_have_stats('io', 0, 0);
t
(1 row)

SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
FROM pg_stat_io \gset
SELECT pg_stat_reset_shared('io');
pg_stat_reset_shared
----------------------

(1 row)

SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
FROM pg_stat_io \gset
SELECT :io_stats_post_reset < :io_stats_pre_reset;
?column?
Expand Down
24 changes: 22 additions & 2 deletions src/test/regress/sql/stats.sql
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
-- - writes of shared buffers to permanent storage
-- - extends of relations using shared buffers
-- - fsyncs done to ensure the durability of data dirtying shared buffers
-- - shared buffer hits

-- There is no test for blocks evicted from shared buffers, because we cannot
-- be sure of the state of shared buffers at the point the test is run.
Expand Down Expand Up @@ -587,6 +588,25 @@ SELECT pg_stat_force_next_flush();
SELECT sum(reads) AS io_sum_shared_after_reads
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;

SELECT sum(hits) AS io_sum_shared_before_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
-- Select from the table again to count hits.
-- Ensure we generate hits by forcing a nested loop self-join with no
-- materialize node. The outer side's buffer will stay pinned, preventing its
-- eviction, while we loop through the inner side and generate hits.
BEGIN;
SET LOCAL enable_nestloop TO on; SET LOCAL enable_mergejoin TO off;
SET LOCAL enable_hashjoin TO off; SET LOCAL enable_material TO off;
-- ensure plan stays as we expect it to
EXPLAIN (COSTS OFF) SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
COMMIT;
SELECT pg_stat_force_next_flush();
SELECT sum(hits) AS io_sum_shared_after_hits
FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;

DROP TABLE test_io_shared;

-- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
Expand Down Expand Up @@ -674,10 +694,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext

-- Test IO stats reset
SELECT pg_stat_have_stats('io', 0, 0);
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
FROM pg_stat_io \gset
SELECT pg_stat_reset_shared('io');
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
FROM pg_stat_io \gset
SELECT :io_stats_post_reset < :io_stats_pre_reset;

Expand Down

0 comments on commit 8aaa04b

Please sign in to comment.