Skip to content

Refactor SLRU download interface between Postgres and the neon extension #709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 49 additions & 87 deletions src/backend/access/transam/slru.c
Original file line number Diff line number Diff line change
Expand Up @@ -620,70 +620,28 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)


/*
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
* to download them on demand to reduce startup time.
* If SLRU segment is not found, we try to download it from page server
* NEON: we do not want to include large pg_xact/multixact files in the
* basebackup and prefer to download them on demand to reduce startup time.
*
* If SLRU segment is not found, we try to download it from the pageserver.
*
* Returns:
* true if the file was successfully downloaded
* false if the file was not found in pageserver
* ereports if some other error happened
*/
static int
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path)
static bool
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const *path)
{
int segno;
int fd = -1;
int n_blocks;
char* buffer;

static SMgrRelationData dummy_smgr_rel = {0};
int segno;

/* If page is greater than latest written page, then do not try to download segment from server */
if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno))
return -1;
return false;

if (!dummy_smgr_rel.smgr)
{
RelFileLocator rlocator = {0};
dummy_smgr_rel.smgr = smgr(InvalidBackendId, rlocator);
}
segno = pageno / SLRU_PAGES_PER_SEGMENT;

if (neon_use_communicator_worker) {
buffer = NULL;
} else {
buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT);
}

n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer);
if (n_blocks > 0)
{
fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
if (fd < 0)
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
pfree(buffer);
return -1;
}

if (!neon_use_communicator_worker) {
errno = 0;
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ)
{
pgstat_report_wait_end();
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
slru_errcause = SLRU_WRITE_FAILED;
slru_errno = errno;

CloseTransientFile(fd);
pfree(buffer);
return -1;
}
pgstat_report_wait_end();
}
}
pfree(buffer);
return fd;
return smgr_read_slru_segment(path, segno);
}

/*
Expand All @@ -702,29 +660,34 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
int fd;
bool result;
off_t endpos;
bool attempted_download = false;

/* update the stats counter of checked pages */
pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);

SlruFileName(ctl, path, segno);

retry_after_download:
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
if (fd < 0)
{
/* expected: file doesn't exist */
if (errno == ENOENT)
{
fd = SimpleLruDownloadSegment(ctl, pageno, path);
if (fd < 0)
return false;
}
else
if (errno == ENOENT && !attempted_download)
{
/* report error normally */
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
SlruReportIOError(ctl, pageno, 0);
/* Try to download the file from the pageserver */
attempted_download = true;
if (SimpleLruDownloadSegment(ctl, pageno, path))
goto retry_after_download;
errno = ENOENT;
}

/* expected: file doesn't exist */
if (errno == ENOENT)
return false;

/* report error normally */
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
SlruReportIOError(ctl, pageno, 0);
}

if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
Expand Down Expand Up @@ -765,6 +728,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
off_t offset = rpageno * BLCKSZ;
char path[MAXPGPATH];
int fd;
bool attempted_download = false;

SlruFileName(ctl, path, segno);

Expand All @@ -775,33 +739,31 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
* SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
* where the file doesn't exist, and return zeroes instead.
*/
retry_after_download:
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
if (fd < 0)
{
if (errno != ENOENT)
if (errno == ENOENT && !attempted_download)
{
/* Try to download the file from the pageserver */
attempted_download = true;
if (SimpleLruDownloadSegment(ctl, pageno, path))
goto retry_after_download;
errno = ENOENT;
}

if (errno != ENOENT || !InRecovery)
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
return false;
}
fd = SimpleLruDownloadSegment(ctl, pageno, path);
if (fd < 0)
{
if (!InRecovery)
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
return false;
}
else
{
ereport(LOG,
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
path)));
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
return true;
}
}

ereport(LOG,
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
path)));
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
return true;
}

errno = 0;
Expand Down
31 changes: 20 additions & 11 deletions src/backend/storage/smgr/smgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ static const f_smgr smgr_md = {
.smgr_immedsync = mdimmedsync,
};

/*
* SLRU download isn't really part of the smgr API, as SLRUs are not
* relations. But we define this here anyway, to keep it close to the smgr
* hooks in Neon.
*/
read_slru_segment_hook_type read_slru_segment_hook;

/*
* Each backend has a hashtable that stores all extant SMgrRelation objects.
* In addition, "unowned" SMgrRelation objects are chained together in a list.
Expand Down Expand Up @@ -776,22 +783,24 @@ smgr_end_unlogged_build(SMgrRelation reln)
}

/*
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
* to download them on demand to reduce startup time.
* If SLRU segment is not found, we try to download it from page server
* NEON: Attempt to download an SLRU file from remote storage.
*
* To reduce startup time, we don't want to include large pg_xact/multixact
* files in the basebackup. Instead, we have this hook to download them on
* demand. If an SLRU segment is not found, the code in slru.c calls this to
* check if it can be downloaded from the pageserver.
*
* This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case
* of partial segment, it can be smaller. Zero value means that segment doesn't exist.
* From Postgres point of view empty segment is the same as absent segment.
* If the segment is found in remote storage, the hook writes it to the local
* file and returns 'true'. If the file is not found, returns 'false'.
*/
int
smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
bool
smgr_read_slru_segment(const char *path, int segno)
{
return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0;
if (read_slru_segment_hook)
return read_slru_segment_hook(path, segno);
return false;
}



/*
* AtEOXact_SMgr
*
Expand Down
1 change: 0 additions & 1 deletion src/backend/utils/init/globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ bool IsPostmasterEnvironment = false;
bool IsUnderPostmaster = false;
bool IsBinaryUpgrade = false;
bool IsBackgroundWorker = false;
bool neon_use_communicator_worker = false;

bool ExitOnAnyError = false;

Expand Down
6 changes: 0 additions & 6 deletions src/include/miscadmin.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,6 @@ extern PGDLLIMPORT bool IsPostmasterEnvironment;
extern PGDLLIMPORT bool IsUnderPostmaster;
extern PGDLLIMPORT bool IsBackgroundWorker;
extern PGDLLIMPORT bool IsBinaryUpgrade;
/* Whether the communicator worker is used or not. Defined here so that
* it is also accessible from the main postgres code easily without
* having to look up the value using strings and chain of other
* functions.
*/
extern PGDLLIMPORT bool neon_use_communicator_worker;

extern PGDLLIMPORT bool ExitOnAnyError;

Expand Down
8 changes: 5 additions & 3 deletions src/include/storage/smgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,6 @@ typedef struct f_smgr
void (*smgr_start_unlogged_build) (SMgrRelation reln);
void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln);
void (*smgr_end_unlogged_build) (SMgrRelation reln);

int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer);
} f_smgr;

typedef void (*smgr_init_hook_type) (void);
Expand All @@ -141,6 +139,10 @@ extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook;
extern void smgr_init_standard(void);
extern void smgr_shutdown_standard(void);

/* NEON: Hook for reading an SLRU segment from e.g. remote storage */
typedef bool (*read_slru_segment_hook_type) (const char *path, int segno);
extern read_slru_segment_hook_type read_slru_segment_hook;

// Alternative implementation of calculate_database_size()
typedef int64 (*dbsize_hook_type) (Oid dbOid);
extern PGDLLIMPORT dbsize_hook_type dbsize_hook;
Expand Down Expand Up @@ -192,6 +194,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln);
extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln);
extern void smgr_end_unlogged_build(SMgrRelation reln);

extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer);
extern bool smgr_read_slru_segment(const char *path, int segno);

#endif /* SMGR_H */
Loading