From 5aca29f9899a238147f69c8bb564808e19b40628 Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Thu, 5 Sep 2019 14:13:10 +0200 Subject: [PATCH] PS-5932: Implementing --tokudb-force-recovery=6 to skip reading the logs This could be useful when the server crashed with a corrupted rollback file, and is unable to start up. Specifying --tokudb--force-recovery=6 --super-read-only should start it up in a read-only, but usable state. Some data may be lost and unrecoverable. Starting the server without the read only option is *NOT* supported. --- ft/ft-ops.cc | 16 ++++++++-------- ft/ft-ops.h | 4 ++-- ft/ft.h | 10 +++++----- src/ydb.cc | 47 +++++++++++++++++++++++++++++++++++++++++------ src/ydb_db.cc | 8 ++++++-- src/ydb_db.h | 1 + 6 files changed, 63 insertions(+), 23 deletions(-) diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc index ad9f5c2e4..20a2e6e89 100644 --- a/ft/ft-ops.cc +++ b/ft/ft-ops.cc @@ -2802,9 +2802,9 @@ static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) } // open a file for use by the ft. if the file does not exist, error -static int ft_open_file(const char *fname, int *fdp) { +static int ft_open_file(const char *fname, int *fdp, bool rw) { int fd; - fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); + fd = ft_open_maybe_direct(fname, (rw ? O_RDWR : O_RDONLY) | O_BINARY, file_mode); if (fd==-1) { return get_error_errno(); } @@ -2955,7 +2955,7 @@ toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) { // The checkpointed version (checkpoint_lsn) of the dictionary must be no later than max_acceptable_lsn . // Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. static int -ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn) { +ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn, bool open_rw = true) { int r; bool txn_created = false; char *fname_in_cwd = NULL; @@ -2977,7 +2977,7 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env); { int fd = -1; - r = ft_open_file(fname_in_cwd, &fd); + r = ft_open_file(fname_in_cwd, &fd, open_rw); if (reserved_filenum.fileid == FILENUM_NONE.fileid) { reserved_filenum = toku_cachetable_reserve_filenum(cachetable); } @@ -3123,15 +3123,15 @@ toku_ft_handle_open_recovery(FT_HANDLE t, const char *fname_in_env, int is_creat // Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. // Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. int -toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn) { +toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, bool open_rw) { int r; - r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN); + r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN, open_rw); return r; } // clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree int -toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn) { +toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw) { FT_HANDLE result_ft_handle; toku_ft_handle_create(&result_ft_handle); @@ -3146,7 +3146,7 @@ toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN t CACHEFILE cf = ft_handle->ft->cf; CACHETABLE ct = toku_cachefile_get_cachetable(cf); const char *fname_in_env = toku_cachefile_fname_in_env(cf); - int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn); + int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn, open_rw); if (r != 0) { toku_ft_handle_close(result_ft_handle); result_ft_handle = NULL; diff --git a/ft/ft-ops.h b/ft/ft-ops.h index df8ffe287..7b6d0634c 100644 --- a/ft/ft-ops.h +++ b/ft/ft-ops.h @@ -125,12 +125,12 @@ typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun); int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env, - int is_create, int only_create, CACHETABLE ct, TOKUTXN txn) __attribute__ ((warn_unused_result)); + int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, bool open_rw=true) __attribute__ ((warn_unused_result)); int toku_ft_handle_open_recovery(FT_HANDLE, const char *fname_in_env, int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, FILENUM use_filenum, LSN max_acceptable_lsn) __attribute__ ((warn_unused_result)); // clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree -int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn); +int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw=true); // close an ft handle during normal operation. the underlying ft may or may not close, // depending if there are still references. an lsn for this close will come from the logger. diff --git a/ft/ft.h b/ft/ft.h index 7a3c4fa78..08743e258 100644 --- a/ft/ft.h +++ b/ft/ft.h @@ -181,11 +181,11 @@ void tokuft_update_product_name_strings(void); extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH]; struct toku_product_name_strings_struct { - char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256]; - char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment")]; - char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory")]; - char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me")]; - char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback")]; + char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256 + 1]; + char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment") + 1]; + char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory") + 1]; + char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me") + 1]; + char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback") + 1]; }; extern struct toku_product_name_strings_struct toku_product_name_strings; diff --git a/src/ydb.cc b/src/ydb.cc index 66e7502f6..f489d63c6 100644 --- a/src/ydb.cc +++ b/src/ydb.cc @@ -87,6 +87,10 @@ const char *toku_copyright_string = "Copyright (c) 2006, 2015, Percona and/or it int toku_close_trace_file (void) { return 0; } #endif +extern "C" { + uint force_recovery = 0; +} + // Set when env is panicked, never cleared. static int env_is_panicked = 0; @@ -223,6 +227,9 @@ env_fs_redzone(DB_ENV *env, uint64_t total) { // Check the available space in the file systems used by tokuft and erect barriers when available space gets low. static int env_fs_poller(void *arg) { + if(force_recovery == 6) { + return 0; + } DB_ENV *env = (DB_ENV *) arg; int r; @@ -307,6 +314,9 @@ env_fs_init(DB_ENV *env) { // Initialize the minicron that polls file system space static int env_fs_init_minicron(DB_ENV *env) { + if(force_recovery == 6) { + return 0; + } int r = toku_minicron_setup(&env->i->fs_poller, env->i->fs_poll_time*1000, env_fs_poller, env); if (r == 0) env->i->fs_poller_is_init = true; @@ -709,7 +719,7 @@ static int validate_env(DB_ENV *env, } // Test for fileops directory - if (r == 0) { + if (r == 0 && force_recovery != 6) { path = toku_construct_full_name( 2, env->i->dir, toku_product_name_strings.fileopsdirectory); assert(path); @@ -752,7 +762,7 @@ static int validate_env(DB_ENV *env, } // Test for recovery log - if ((r == 0) && (env->i->open_flags & DB_INIT_LOG)) { + if ((r == 0) && (env->i->open_flags & DB_INIT_LOG) && force_recovery != 6) { // if using transactions, test for existence of log r = ydb_recover_log_exists(env); // return 0 or ENOENT if (expect_newenv && (r != ENOENT)) @@ -813,6 +823,27 @@ unlock_single_process(DB_ENV *env) { // (The set of necessary files is defined in the function validate_env() above.) static int env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { + + if(force_recovery == 6) { + { + const int len = strlen(toku_product_name_strings.rollback_cachefile); + toku_product_name_strings.rollback_cachefile[len] = '2'; + toku_product_name_strings.rollback_cachefile[len+1] = 0; + } + + { + const int len = strlen(toku_product_name_strings.single_process_lock); + toku_product_name_strings.single_process_lock[len] = '2'; + toku_product_name_strings.single_process_lock[len+1] = 0; + } + + { + const int len = strlen(toku_product_name_strings.environmentdictionary); + toku_product_name_strings.environmentdictionary[len] = '2'; + toku_product_name_strings.environmentdictionary[len+1] = 0; + } + } + HANDLE_PANICKED_ENV(env); int r; bool newenv; // true iff creating a new environment @@ -903,7 +934,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { bool need_rollback_cachefile; need_rollback_cachefile = false; - if (flags & (DB_INIT_TXN | DB_INIT_LOG)) { + if (flags & (DB_INIT_TXN | DB_INIT_LOG) && force_recovery != 6) { need_rollback_cachefile = true; } @@ -916,7 +947,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { r = ydb_maybe_upgrade_env(env, &last_lsn_of_clean_shutdown_read_from_log, &upgrade_in_progress); if (r!=0) goto cleanup; - if (upgrade_in_progress) { + if (upgrade_in_progress || force_recovery == 6) { // Delete old rollback file. There was a clean shutdown, so it has nothing useful, // and there is no value in upgrading it. It is simpler to just create a new one. char* rollback_filename = toku_construct_full_name(2, env->i->dir, toku_product_name_strings.rollback_cachefile); @@ -934,9 +965,13 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { unused_flags &= ~DB_INIT_TXN & ~DB_INIT_LOG; + if(force_recovery == 6) { + flags |= DB_INIT_LOG | DB_INIT_TXN; + } + // do recovery only if there exists a log and recovery is requested // otherwise, a log is created when the logger is opened later - if (!newenv) { + if (!newenv && force_recovery == 0) { if (flags & DB_INIT_LOG) { // the log does exist if (flags & DB_RECOVER) { @@ -1005,7 +1040,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { assert (using_txns); toku_logger_set_cachetable(env->i->logger, env->i->cachetable); if (!toku_logger_rollback_is_open(env->i->logger)) { - bool create_new_rollback_file = newenv | upgrade_in_progress; + bool create_new_rollback_file = newenv | upgrade_in_progress | (force_recovery == 6); r = toku_logger_open_rollback(env->i->logger, env->i->cachetable, create_new_rollback_file); if (r != 0) { r = toku_ydb_do_error(env, r, "Cant open rollback\n"); diff --git a/src/ydb_db.cc b/src/ydb_db.cc index 40c4a7f65..ac44b8e7f 100644 --- a/src/ydb_db.cc +++ b/src/ydb_db.cc @@ -323,6 +323,7 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP // DB_THREAD is implicitly supported and DB_BLACKHOLE is supported at the ft-layer unused_flags &= ~DB_THREAD; unused_flags &= ~DB_BLACKHOLE; + unused_flags &= ~DB_RDONLY; // check for unknown or conflicting flags if (unused_flags) return EINVAL; // unknown flags @@ -404,7 +405,7 @@ int toku_db_lt_on_create_callback(toku::locktree *lt, void *extra) { FT_HANDLE ft_handle = info->ft_handle; FT_HANDLE cloned_ft_handle; - r = toku_ft_handle_clone(&cloned_ft_handle, ft_handle, ttxn); + r = toku_ft_handle_clone(&cloned_ft_handle, ft_handle, ttxn, info->open_rw); if (r == 0) { assert(lt->get_userdata() == NULL); lt->set_userdata(cloned_ft_handle); @@ -465,6 +466,7 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags&=~DB_READ_COMMITTED; flags&=~DB_SERIALIZABLE; flags&=~DB_IS_HOT_INDEX; + flags&=~DB_RDONLY; // unknown or conflicting flags are bad int unknown_flags = flags & ~DB_THREAD; unknown_flags &= ~DB_BLACKHOLE; @@ -479,11 +481,12 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t db->i->open_flags = flags; db->i->open_mode = mode; + bool open_rw = mode & (S_IWUSR | S_IWOTH | S_IWGRP); FT_HANDLE ft_handle = db->i->ft_handle; int r = toku_ft_handle_open(ft_handle, iname_in_env, is_db_create, is_db_excl, db->dbenv->i->cachetable, - txn ? db_txn_struct_i(txn)->tokutxn : nullptr); + txn ? db_txn_struct_i(txn)->tokutxn : nullptr, open_rw); if (r != 0) { goto out; } @@ -505,6 +508,7 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t struct lt_on_create_callback_extra on_create_extra = { .txn = txn, .ft_handle = db->i->ft_handle, + open_rw }; db->i->lt = db->dbenv->i->ltm.get_lt(db->i->dict_id, toku_ft_get_comparator(db->i->ft_handle), diff --git a/src/ydb_db.h b/src/ydb_db.h index ab8fcd2a4..c260e9d0f 100644 --- a/src/ydb_db.h +++ b/src/ydb_db.h @@ -67,6 +67,7 @@ void ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp); struct lt_on_create_callback_extra { DB_TXN *txn; FT_HANDLE ft_handle; + bool open_rw; }; int toku_db_lt_on_create_callback(toku::locktree *lt, void *extra); void toku_db_lt_on_destroy_callback(toku::locktree *lt);