diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index e6c0e9ce7e0..3929f2d63ab 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -620,70 +620,28 @@ SimpleLruWritePage(SlruCtl ctl, int slotno) /* - * NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer - * to download them on demand to reduce startup time. - * If SLRU segment is not found, we try to download it from page server + * NEON: we do not want to include large pg_xact/multixact files in the + * basebackup and prefer to download them on demand to reduce startup time. + * + * If SLRU segment is not found, we try to download it from the pageserver. + * + * Returns: + * true if the file was successfully downloaded + * false if the file was not found in pageserver + * ereports if some other error happened */ -static int -SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path) +static bool +SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const *path) { - int segno; - int fd = -1; - int n_blocks; - char* buffer; - - static SMgrRelationData dummy_smgr_rel = {0}; + int segno; /* If page is greater than latest written page, then do not try to download segment from server */ if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno)) - return -1; + return false; - if (!dummy_smgr_rel.smgr) - { - RelFileLocator rlocator = {0}; - dummy_smgr_rel.smgr = smgr(InvalidBackendId, rlocator); - } segno = pageno / SLRU_PAGES_PER_SEGMENT; - if (neon_use_communicator_worker) { - buffer = NULL; - } else { - buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT); - } - - n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer); - if (n_blocks > 0) - { - fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY); - if (fd < 0) - { - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - pfree(buffer); - return -1; - } - - if (!neon_use_communicator_worker) { - errno = 0; - pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE); - if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ) - { - pgstat_report_wait_end(); - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - slru_errcause = SLRU_WRITE_FAILED; - slru_errno = errno; - - CloseTransientFile(fd); - pfree(buffer); - return -1; - } - pgstat_report_wait_end(); - } - } - pfree(buffer); - return fd; + return smgr_read_slru_segment(path, segno); } /* @@ -702,29 +660,34 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) int fd; bool result; off_t endpos; + bool attempted_download = false; /* update the stats counter of checked pages */ pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx); SlruFileName(ctl, path, segno); + retry_after_download: fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) { - /* expected: file doesn't exist */ - if (errno == ENOENT) - { - fd = SimpleLruDownloadSegment(ctl, pageno, path); - if (fd < 0) - return false; - } - else + if (errno == ENOENT && !attempted_download) { - /* report error normally */ - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - SlruReportIOError(ctl, pageno, 0); + /* Try to download the file from the pageserver */ + attempted_download = true; + if (SimpleLruDownloadSegment(ctl, pageno, path)) + goto retry_after_download; + errno = ENOENT; } + + /* expected: file doesn't exist */ + if (errno == ENOENT) + return false; + + /* report error normally */ + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + SlruReportIOError(ctl, pageno, 0); } if ((endpos = lseek(fd, 0, SEEK_END)) < 0) @@ -765,6 +728,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd; + bool attempted_download = false; SlruFileName(ctl, path, segno); @@ -775,33 +739,31 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case * where the file doesn't exist, and return zeroes instead. */ + retry_after_download: fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) { - if (errno != ENOENT) + if (errno == ENOENT && !attempted_download) + { + /* Try to download the file from the pageserver */ + attempted_download = true; + if (SimpleLruDownloadSegment(ctl, pageno, path)) + goto retry_after_download; + errno = ENOENT; + } + + if (errno != ENOENT || !InRecovery) { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; return false; } - fd = SimpleLruDownloadSegment(ctl, pageno, path); - if (fd < 0) - { - if (!InRecovery) - { - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - return false; - } - else - { - ereport(LOG, - (errmsg("file \"%s\" doesn't exist, reading as zeroes", - path))); - MemSet(shared->page_buffer[slotno], 0, BLCKSZ); - return true; - } - } + + ereport(LOG, + (errmsg("file \"%s\" doesn't exist, reading as zeroes", + path))); + MemSet(shared->page_buffer[slotno], 0, BLCKSZ); + return true; } errno = 0; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7123866e971..53169043b6c 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -49,6 +49,13 @@ static const f_smgr smgr_md = { .smgr_immedsync = mdimmedsync, }; +/* + * SLRU download isn't really part of the smgr API, as SLRUs are not + * relations. But we define this here anyway, to keep it close to the smgr + * hooks in Neon. + */ +read_slru_segment_hook_type read_slru_segment_hook; + /* * Each backend has a hashtable that stores all extant SMgrRelation objects. * In addition, "unowned" SMgrRelation objects are chained together in a list. @@ -776,22 +783,24 @@ smgr_end_unlogged_build(SMgrRelation reln) } /* - * NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer - * to download them on demand to reduce startup time. - * If SLRU segment is not found, we try to download it from page server + * NEON: Attempt to download an SLRU file from remote storage. + * + * To reduce startup time, we don't want to include large pg_xact/multixact + * files in the basebackup. Instead, we have this hook to download them on + * demand. If an SLRU segment is not found, the code in slru.c calls this to + * check if it can be downloaded from the pageserver. * - * This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case - * of partial segment, it can be smaller. Zero value means that segment doesn't exist. - * From Postgres point of view empty segment is the same as absent segment. + * If the segment is found in remote storage, the hook writes it to the local + * file and returns 'true'. If the file is not found, returns 'false'. */ -int -smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer) +bool +smgr_read_slru_segment(const char *path, int segno) { - return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0; + if (read_slru_segment_hook) + return read_slru_segment_hook(path, segno); + return false; } - - /* * AtEOXact_SMgr * diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 95f3cb19f38..011ec18015a 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -113,7 +113,6 @@ bool IsPostmasterEnvironment = false; bool IsUnderPostmaster = false; bool IsBinaryUpgrade = false; bool IsBackgroundWorker = false; -bool neon_use_communicator_worker = false; bool ExitOnAnyError = false; diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 5d7d58b2d07..f5b954a2858 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -170,12 +170,6 @@ extern PGDLLIMPORT bool IsPostmasterEnvironment; extern PGDLLIMPORT bool IsUnderPostmaster; extern PGDLLIMPORT bool IsBackgroundWorker; extern PGDLLIMPORT bool IsBinaryUpgrade; -/* Whether the communicator worker is used or not. Defined here so that - * it is also accessible from the main postgres code easily without - * having to look up the value using strings and chain of other - * functions. - */ -extern PGDLLIMPORT bool neon_use_communicator_worker; extern PGDLLIMPORT bool ExitOnAnyError; diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 704ba0c0cae..5b4af0e4ac5 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -130,8 +130,6 @@ typedef struct f_smgr void (*smgr_start_unlogged_build) (SMgrRelation reln); void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln); void (*smgr_end_unlogged_build) (SMgrRelation reln); - - int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer); } f_smgr; typedef void (*smgr_init_hook_type) (void); @@ -141,6 +139,10 @@ extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook; extern void smgr_init_standard(void); extern void smgr_shutdown_standard(void); +/* NEON: Hook for reading an SLRU segment from e.g. remote storage */ +typedef bool (*read_slru_segment_hook_type) (const char *path, int segno); +extern read_slru_segment_hook_type read_slru_segment_hook; + // Alternative implementation of calculate_database_size() typedef int64 (*dbsize_hook_type) (Oid dbOid); extern PGDLLIMPORT dbsize_hook_type dbsize_hook; @@ -192,6 +194,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln); extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln); extern void smgr_end_unlogged_build(SMgrRelation reln); -extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer); +extern bool smgr_read_slru_segment(const char *path, int segno); #endif /* SMGR_H */