From 45ae77379dffd003713126d5147f966798f623d0 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 15 May 2023 16:05:02 +0200 Subject: [PATCH 1/3] refactor: attach: use create_tenant_files + schedule_local_tenant_processing With this patch, the attach handler now follows the same pattern as tenant create with regards to instantiation of the new tenant: 1. Prepare on-disk state using `create_tenant_files`. 2. Use the same code path as pageserver startup to load it into memory and start background loops (`schedule_local_tenant_processing`). It's a bit sad we can't use the `PageServerConfig::tenant_attaching_mark_file_path` method inside `create_tenant_files` because it operates in a temporary directory. However, it's a small price to pay for the gained simplicity. During implementation, I noticed that we don't handle failures post `create_tenant_files` well. I left TODO comments in the code linking to the issue that I created for this [^1]. Also, I'll dedupe the spawn_load and spawn_attach code in a future commit. [^1]: https://github.com/neondatabase/neon/issues/4233 --- pageserver/src/http/routes.rs | 14 ++++-- pageserver/src/tenant.rs | 80 +++++++++++++++-------------------- pageserver/src/tenant/mgr.rs | 38 ++++++++++++----- 3 files changed, 73 insertions(+), 59 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 3c87e6f79dc2..2a7f6bf97a92 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -394,9 +394,17 @@ async fn tenant_attach_handler(request: Request) -> Result, let state = get_state(&request); if let Some(remote_storage) = &state.remote_storage { - mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone(), &ctx) - .instrument(info_span!("tenant_attach", tenant = %tenant_id)) - .await?; + mgr::attach_tenant( + state.conf, + tenant_id, + // XXX: Attach should provide the config, especially during tenant migration. + // See https://github.com/neondatabase/neon/issues/1555 + TenantConfOpt::default(), + remote_storage.clone(), + &ctx, + ) + .instrument(info_span!("tenant_attach", tenant = %tenant_id)) + .await?; } else { return Err(ApiError::BadRequest(anyhow!( "attach_tenant is not possible because pageserver was configured without remote storage" diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c9d5a54f0b2f..499c301ac902 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -600,12 +600,9 @@ impl Tenant { remote_storage: GenericRemoteStorage, ctx: &RequestContext, ) -> anyhow::Result> { - // XXX: Attach should provide the config, especially during tenant migration. - // See https://github.com/neondatabase/neon/issues/1555 - let tenant_conf = TenantConfOpt::default(); - - Self::attach_idempotent_create_marker_file(conf, tenant_id) - .context("create attach marker file")?; + // TODO dedup with spawn_load + let tenant_conf = + Self::load_tenant_config(conf, tenant_id).context("load tenant config")?; let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id)); let tenant = Arc::new(Tenant::new( @@ -642,45 +639,6 @@ impl Tenant { Ok(tenant) } - fn attach_idempotent_create_marker_file( - conf: &'static PageServerConf, - tenant_id: TenantId, - ) -> anyhow::Result<()> { - // Create directory with marker file to indicate attaching state. - // The load_local_tenants() function in tenant::mgr relies on the marker file - // to determine whether a tenant has finished attaching. - let tenant_dir = conf.tenant_path(&tenant_id); - let marker_file = conf.tenant_attaching_mark_file_path(&tenant_id); - debug_assert_eq!(marker_file.parent().unwrap(), tenant_dir); - // TODO: should use tokio::fs here, but - // 1. caller is not async, for good reason (it holds tenants map lock) - // 2. we'd need to think about cancel safety. Turns out dropping a tokio::fs future - // doesn't wait for the activity in the fs thread pool. - crashsafe::create_dir_all(&tenant_dir).context("create tenant directory")?; - match fs::OpenOptions::new() - .write(true) - .create_new(true) - .open(&marker_file) - { - Ok(_) => {} - Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { - // Either this is a retry of attach or there is a concurrent task also doing attach for this tenant. - // We cannot distinguish this here. - // The caller is responsible for ensuring there's no concurrent attach for a tenant. - {} // fsync again, we don't know if that already happened - } - err => { - err.context("create tenant attaching marker file")?; - unreachable!("we covered the Ok() case above"); - } - } - crashsafe::fsync_file_and_parent(&marker_file) - .context("fsync tenant attaching marker file and parent")?; - debug_assert!(tenant_dir.is_dir()); - debug_assert!(marker_file.is_file()); - Ok(()) - } - /// /// Background task that downloads all data for a tenant and brings it to Active state. /// @@ -2089,6 +2047,7 @@ impl Tenant { // enough to just fsync it always. crashsafe::fsync(target_config_parent)?; + // XXX we're not fsyncing the parent dir, need to do that in case `creating_tenant` Ok(()) }; @@ -2725,10 +2684,16 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a Ok(()) } +pub(crate) enum CreateTenantFilesMode { + Create, + Attach, +} + pub(crate) fn create_tenant_files( conf: &'static PageServerConf, tenant_conf: TenantConfOpt, tenant_id: TenantId, + mode: CreateTenantFilesMode, ) -> anyhow::Result { let target_tenant_directory = conf.tenant_path(&tenant_id); anyhow::ensure!( @@ -2755,6 +2720,7 @@ pub(crate) fn create_tenant_files( conf, tenant_conf, tenant_id, + mode, &temporary_tenant_dir, &target_tenant_directory, ); @@ -2779,9 +2745,28 @@ fn try_create_target_tenant_dir( conf: &'static PageServerConf, tenant_conf: TenantConfOpt, tenant_id: TenantId, + mode: CreateTenantFilesMode, temporary_tenant_dir: &Path, target_tenant_directory: &Path, ) -> Result<(), anyhow::Error> { + match mode { + CreateTenantFilesMode::Create => {} // needs no attach marker, writing tenant conf + atomic rename of dir is good enough + CreateTenantFilesMode::Attach => { + let attach_marker_path = temporary_tenant_dir.join(TENANT_ATTACHING_MARKER_FILENAME); + let file = std::fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&attach_marker_path) + .with_context(|| { + format!("could not create attach marker file {attach_marker_path:?}") + })?; + file.sync_all().with_context(|| { + format!("could not sync attach marker file: {attach_marker_path:?}") + })?; + // fsync of the directory in which the file resides comes later in this function + } + } + let temporary_tenant_timelines_dir = rebase_directory( &conf.timelines_path(&tenant_id), target_tenant_directory, @@ -2808,6 +2793,11 @@ fn try_create_target_tenant_dir( anyhow::bail!("failpoint tenant-creation-before-tmp-rename"); }); + // Make sure the current tenant directory entries are durable before renaming. + // Without this, a crash may reorder any of the directory entry creations above. + crashsafe::fsync(temporary_tenant_dir) + .with_context(|| format!("sync temporary tenant directory {temporary_tenant_dir:?}"))?; + fs::rename(temporary_tenant_dir, target_tenant_directory).with_context(|| { format!( "move tenant {} temporary directory {} into the permanent one {}", diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 8191880bb562..ebacfeafd4d4 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -19,7 +19,7 @@ use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; use crate::task_mgr::{self, TaskKind}; use crate::tenant::config::TenantConfOpt; -use crate::tenant::{Tenant, TenantState}; +use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState}; use crate::IGNORED_TENANT_FILE_NAME; use utils::fs_ext::PathExt; @@ -282,9 +282,15 @@ pub async fn create_tenant( // We're holding the tenants lock in write mode while doing local IO. // If this section ever becomes contentious, introduce a new `TenantState::Creating` // and do the work in that state. - let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id)?; + let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Create)?; + // TODO: tenant directory remains on disk if we bail out from here on. + // See https://github.com/neondatabase/neon/issues/4233 + let created_tenant = schedule_local_tenant_processing(conf, &tenant_directory, remote_storage, ctx)?; + // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here. + // See https://github.com/neondatabase/neon/issues/4233 + let crated_tenant_id = created_tenant.tenant_id(); anyhow::ensure!( tenant_id == crated_tenant_id, @@ -466,19 +472,29 @@ pub async fn list_tenants() -> Result, TenantMapLis pub async fn attach_tenant( conf: &'static PageServerConf, tenant_id: TenantId, + tenant_conf: TenantConfOpt, remote_storage: GenericRemoteStorage, ctx: &RequestContext, ) -> Result<(), TenantMapInsertError> { tenant_map_insert(tenant_id, |vacant_entry| { - let tenant_path = conf.tenant_path(&tenant_id); - anyhow::ensure!( - !tenant_path.exists(), - "Cannot attach tenant {tenant_id}, local tenant directory already exists" - ); - - let tenant = - Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx).context("spawn_attach")?; - vacant_entry.insert(tenant); + let tenant_dir = create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Attach)?; + // TODO: tenant directory remains on disk if we bail out from here on. + // See https://github.com/neondatabase/neon/issues/4233 + + // Without the attach marker, schedule_local_tenant_processing will treat the attached tenant as fully attached + let marker_file_exists = conf.tenant_attaching_mark_file_path(&tenant_id).try_exists().context("check for attach marker file existence")?; + assert!(marker_file_exists, "create_tenant_files should have created the attach marker file"); + + let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, Some(remote_storage), ctx)?; + // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here. + // See https://github.com/neondatabase/neon/issues/4233 + + let attached_tenant_id = attached_tenant.tenant_id(); + anyhow::ensure!( + tenant_id == attached_tenant_id, + "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {attached_tenant_id})", + ); + vacant_entry.insert(Arc::clone(&attached_tenant)); Ok(()) }) .await From 2e636917879a9799aa83d91b7e78c7c91b562e78 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 16 May 2023 13:33:38 +0200 Subject: [PATCH 2/3] fix test failures caused by changed error message And make error message wording fit both create & attach use case. --- pageserver/src/tenant.rs | 6 ++++-- test_runner/regress/test_remote_storage.py | 4 +--- test_runner/regress/test_tenant_detach.py | 12 ++++-------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 499c301ac902..e9801f9dad2b 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2697,8 +2697,10 @@ pub(crate) fn create_tenant_files( ) -> anyhow::Result { let target_tenant_directory = conf.tenant_path(&tenant_id); anyhow::ensure!( - !target_tenant_directory.exists(), - "cannot create new tenant repo: '{tenant_id}' directory already exists", + !target_tenant_directory + .try_exists() + .context("check existence of tenant directory")?, + "tenant directory already exists", ); let temporary_tenant_dir = diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 6de5f7db0479..aacf8127ea1a 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -81,9 +81,7 @@ def test_remote_storage_backup_and_restore( env.pageserver.allowed_errors.append(".*failed to load remote timeline.*") # we have a bunch of pytest.raises for these below env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*") - env.pageserver.allowed_errors.append( - ".*Cannot attach tenant .*?, local tenant directory already exists.*" - ) + env.pageserver.allowed_errors.append(".*tenant directory already exists.*") env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*") pageserver_http = env.pageserver.http_client() diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index 847ae4b2b84f..82664cff94f7 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -685,12 +685,10 @@ def test_load_attach_negatives( pageserver_http.tenant_ignore(tenant_id) - env.pageserver.allowed_errors.append( - ".*Cannot attach tenant .*?, local tenant directory already exists.*" - ) + env.pageserver.allowed_errors.append(".*tenant directory already exists.*") with pytest.raises( expected_exception=PageserverApiException, - match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists", + match="tenant directory already exists", ): pageserver_http.tenant_attach(tenant_id) @@ -734,12 +732,10 @@ def test_ignore_while_attaching( pageserver_http.tenant_ignore(tenant_id) # Cannot attach it due to some local files existing - env.pageserver.allowed_errors.append( - ".*Cannot attach tenant .*?, local tenant directory already exists.*" - ) + env.pageserver.allowed_errors.append(".*tenant directory already exists.*") with pytest.raises( expected_exception=PageserverApiException, - match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists", + match="tenant directory already exists", ): pageserver_http.tenant_attach(tenant_id) From 5e0c147a7eb764356dfb65441246389f84ba92e7 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 16 May 2023 17:11:31 +0200 Subject: [PATCH 3/3] anyhow::ensure instead of assert + rustfmt --- pageserver/src/tenant/mgr.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index ebacfeafd4d4..1542d34a6632 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -482,18 +482,21 @@ pub async fn attach_tenant( // See https://github.com/neondatabase/neon/issues/4233 // Without the attach marker, schedule_local_tenant_processing will treat the attached tenant as fully attached - let marker_file_exists = conf.tenant_attaching_mark_file_path(&tenant_id).try_exists().context("check for attach marker file existence")?; - assert!(marker_file_exists, "create_tenant_files should have created the attach marker file"); + let marker_file_exists = conf + .tenant_attaching_mark_file_path(&tenant_id) + .try_exists() + .context("check for attach marker file existence")?; + anyhow::ensure!(marker_file_exists, "create_tenant_files should have created the attach marker file"); let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, Some(remote_storage), ctx)?; // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here. // See https://github.com/neondatabase/neon/issues/4233 let attached_tenant_id = attached_tenant.tenant_id(); - anyhow::ensure!( - tenant_id == attached_tenant_id, - "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {attached_tenant_id})", - ); + anyhow::ensure!( + tenant_id == attached_tenant_id, + "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {attached_tenant_id})", + ); vacant_entry.insert(Arc::clone(&attached_tenant)); Ok(()) })