diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 26cd02e5edcb..6457d55dff5f 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -398,9 +398,17 @@ async fn tenant_attach_handler(request: Request) -> Result, let state = get_state(&request); if let Some(remote_storage) = &state.remote_storage { - mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone(), &ctx) - .instrument(info_span!("tenant_attach", tenant = %tenant_id)) - .await?; + mgr::attach_tenant( + state.conf, + tenant_id, + // XXX: Attach should provide the config, especially during tenant migration. + // See https://github.com/neondatabase/neon/issues/1555 + TenantConfOpt::default(), + remote_storage.clone(), + &ctx, + ) + .instrument(info_span!("tenant_attach", tenant = %tenant_id)) + .await?; } else { return Err(ApiError::BadRequest(anyhow!( "attach_tenant is not possible because pageserver was configured without remote storage" diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 9ab02624074b..cccad3e4d5ea 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -602,12 +602,9 @@ impl Tenant { remote_storage: GenericRemoteStorage, ctx: &RequestContext, ) -> anyhow::Result> { - // XXX: Attach should provide the config, especially during tenant migration. - // See https://github.com/neondatabase/neon/issues/1555 - let tenant_conf = TenantConfOpt::default(); - - Self::attach_idempotent_create_marker_file(conf, tenant_id) - .context("create attach marker file")?; + // TODO dedup with spawn_load + let tenant_conf = + Self::load_tenant_config(conf, tenant_id).context("load tenant config")?; let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id)); let tenant = Arc::new(Tenant::new( @@ -644,45 +641,6 @@ impl Tenant { Ok(tenant) } - fn attach_idempotent_create_marker_file( - conf: &'static PageServerConf, - tenant_id: TenantId, - ) -> anyhow::Result<()> { - // Create directory with marker file to indicate attaching state. - // The load_local_tenants() function in tenant::mgr relies on the marker file - // to determine whether a tenant has finished attaching. - let tenant_dir = conf.tenant_path(&tenant_id); - let marker_file = conf.tenant_attaching_mark_file_path(&tenant_id); - debug_assert_eq!(marker_file.parent().unwrap(), tenant_dir); - // TODO: should use tokio::fs here, but - // 1. caller is not async, for good reason (it holds tenants map lock) - // 2. we'd need to think about cancel safety. Turns out dropping a tokio::fs future - // doesn't wait for the activity in the fs thread pool. - crashsafe::create_dir_all(&tenant_dir).context("create tenant directory")?; - match fs::OpenOptions::new() - .write(true) - .create_new(true) - .open(&marker_file) - { - Ok(_) => {} - Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { - // Either this is a retry of attach or there is a concurrent task also doing attach for this tenant. - // We cannot distinguish this here. - // The caller is responsible for ensuring there's no concurrent attach for a tenant. - {} // fsync again, we don't know if that already happened - } - err => { - err.context("create tenant attaching marker file")?; - unreachable!("we covered the Ok() case above"); - } - } - crashsafe::fsync_file_and_parent(&marker_file) - .context("fsync tenant attaching marker file and parent")?; - debug_assert!(tenant_dir.is_dir()); - debug_assert!(marker_file.is_file()); - Ok(()) - } - /// /// Background task that downloads all data for a tenant and brings it to Active state. /// @@ -2118,6 +2076,7 @@ impl Tenant { // enough to just fsync it always. crashsafe::fsync(target_config_parent)?; + // XXX we're not fsyncing the parent dir, need to do that in case `creating_tenant` Ok(()) }; @@ -2761,15 +2720,23 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a Ok(()) } +pub(crate) enum CreateTenantFilesMode { + Create, + Attach, +} + pub(crate) fn create_tenant_files( conf: &'static PageServerConf, tenant_conf: TenantConfOpt, tenant_id: TenantId, + mode: CreateTenantFilesMode, ) -> anyhow::Result { let target_tenant_directory = conf.tenant_path(&tenant_id); anyhow::ensure!( - !target_tenant_directory.exists(), - "cannot create new tenant repo: '{tenant_id}' directory already exists", + !target_tenant_directory + .try_exists() + .context("check existence of tenant directory")?, + "tenant directory already exists", ); let temporary_tenant_dir = @@ -2791,6 +2758,7 @@ pub(crate) fn create_tenant_files( conf, tenant_conf, tenant_id, + mode, &temporary_tenant_dir, &target_tenant_directory, ); @@ -2815,9 +2783,28 @@ fn try_create_target_tenant_dir( conf: &'static PageServerConf, tenant_conf: TenantConfOpt, tenant_id: TenantId, + mode: CreateTenantFilesMode, temporary_tenant_dir: &Path, target_tenant_directory: &Path, ) -> Result<(), anyhow::Error> { + match mode { + CreateTenantFilesMode::Create => {} // needs no attach marker, writing tenant conf + atomic rename of dir is good enough + CreateTenantFilesMode::Attach => { + let attach_marker_path = temporary_tenant_dir.join(TENANT_ATTACHING_MARKER_FILENAME); + let file = std::fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&attach_marker_path) + .with_context(|| { + format!("could not create attach marker file {attach_marker_path:?}") + })?; + file.sync_all().with_context(|| { + format!("could not sync attach marker file: {attach_marker_path:?}") + })?; + // fsync of the directory in which the file resides comes later in this function + } + } + let temporary_tenant_timelines_dir = rebase_directory( &conf.timelines_path(&tenant_id), target_tenant_directory, @@ -2844,6 +2831,11 @@ fn try_create_target_tenant_dir( anyhow::bail!("failpoint tenant-creation-before-tmp-rename"); }); + // Make sure the current tenant directory entries are durable before renaming. + // Without this, a crash may reorder any of the directory entry creations above. + crashsafe::fsync(temporary_tenant_dir) + .with_context(|| format!("sync temporary tenant directory {temporary_tenant_dir:?}"))?; + fs::rename(temporary_tenant_dir, target_tenant_directory).with_context(|| { format!( "move tenant {} temporary directory {} into the permanent one {}", diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 8191880bb562..1542d34a6632 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -19,7 +19,7 @@ use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; use crate::task_mgr::{self, TaskKind}; use crate::tenant::config::TenantConfOpt; -use crate::tenant::{Tenant, TenantState}; +use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState}; use crate::IGNORED_TENANT_FILE_NAME; use utils::fs_ext::PathExt; @@ -282,9 +282,15 @@ pub async fn create_tenant( // We're holding the tenants lock in write mode while doing local IO. // If this section ever becomes contentious, introduce a new `TenantState::Creating` // and do the work in that state. - let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id)?; + let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Create)?; + // TODO: tenant directory remains on disk if we bail out from here on. + // See https://github.com/neondatabase/neon/issues/4233 + let created_tenant = schedule_local_tenant_processing(conf, &tenant_directory, remote_storage, ctx)?; + // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here. + // See https://github.com/neondatabase/neon/issues/4233 + let crated_tenant_id = created_tenant.tenant_id(); anyhow::ensure!( tenant_id == crated_tenant_id, @@ -466,19 +472,32 @@ pub async fn list_tenants() -> Result, TenantMapLis pub async fn attach_tenant( conf: &'static PageServerConf, tenant_id: TenantId, + tenant_conf: TenantConfOpt, remote_storage: GenericRemoteStorage, ctx: &RequestContext, ) -> Result<(), TenantMapInsertError> { tenant_map_insert(tenant_id, |vacant_entry| { - let tenant_path = conf.tenant_path(&tenant_id); + let tenant_dir = create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Attach)?; + // TODO: tenant directory remains on disk if we bail out from here on. + // See https://github.com/neondatabase/neon/issues/4233 + + // Without the attach marker, schedule_local_tenant_processing will treat the attached tenant as fully attached + let marker_file_exists = conf + .tenant_attaching_mark_file_path(&tenant_id) + .try_exists() + .context("check for attach marker file existence")?; + anyhow::ensure!(marker_file_exists, "create_tenant_files should have created the attach marker file"); + + let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, Some(remote_storage), ctx)?; + // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here. + // See https://github.com/neondatabase/neon/issues/4233 + + let attached_tenant_id = attached_tenant.tenant_id(); anyhow::ensure!( - !tenant_path.exists(), - "Cannot attach tenant {tenant_id}, local tenant directory already exists" + tenant_id == attached_tenant_id, + "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {attached_tenant_id})", ); - - let tenant = - Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx).context("spawn_attach")?; - vacant_entry.insert(tenant); + vacant_entry.insert(Arc::clone(&attached_tenant)); Ok(()) }) .await diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index cce9cdc17529..02f1aac99ca6 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -83,9 +83,7 @@ def test_remote_storage_backup_and_restore( env.pageserver.allowed_errors.append(".*failed to load remote timeline.*") # we have a bunch of pytest.raises for these below env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*") - env.pageserver.allowed_errors.append( - ".*Cannot attach tenant .*?, local tenant directory already exists.*" - ) + env.pageserver.allowed_errors.append(".*tenant directory already exists.*") env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*") pageserver_http = env.pageserver.http_client() diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index 847ae4b2b84f..82664cff94f7 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -685,12 +685,10 @@ def test_load_attach_negatives( pageserver_http.tenant_ignore(tenant_id) - env.pageserver.allowed_errors.append( - ".*Cannot attach tenant .*?, local tenant directory already exists.*" - ) + env.pageserver.allowed_errors.append(".*tenant directory already exists.*") with pytest.raises( expected_exception=PageserverApiException, - match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists", + match="tenant directory already exists", ): pageserver_http.tenant_attach(tenant_id) @@ -734,12 +732,10 @@ def test_ignore_while_attaching( pageserver_http.tenant_ignore(tenant_id) # Cannot attach it due to some local files existing - env.pageserver.allowed_errors.append( - ".*Cannot attach tenant .*?, local tenant directory already exists.*" - ) + env.pageserver.allowed_errors.append(".*tenant directory already exists.*") with pytest.raises( expected_exception=PageserverApiException, - match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists", + match="tenant directory already exists", ): pageserver_http.tenant_attach(tenant_id)