From 8415f00eeedd96934d3578572d3802900e61a556 Mon Sep 17 00:00:00 2001 From: costan Date: Mon, 10 Jul 2017 13:32:58 -0700 Subject: [PATCH] leveldb: Report missing CURRENT manifest file as database corruption. BTRFS reorders rename and write operations, so it is possible that a filesystem crash and recovery results in a situation where the file pointed to by CURRENT does not exist. DB::Open currently reports an I/O error in this case. Reporting database corruption is a better hint to the caller, which can attempt to recover the database or erase it and start over. This issue is not merely theoretical. It was reported as having showed up in the wild at https://github.com/google/leveldb/issues/195 and at https://crbug.com/738961. Also, asides from the BTRFS case described above, incorrect data in CURRENT seems like a possible corruption case that should be handled gracefully. The Env API changes here can be considered backwards compatible, because an implementation that returns Status::IOError instead of Status::NotFound will still get the same functionality as before. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=161432630 --- db/recovery_test.cc | 21 ++++++++++++++-- db/version_set.cc | 4 +++ include/leveldb/env.h | 6 +++-- util/env_posix.cc | 58 +++++++++++++++++++++++-------------------- util/env_test.cc | 18 ++++++++++++++ 5 files changed, 76 insertions(+), 31 deletions(-) diff --git a/db/recovery_test.cc b/db/recovery_test.cc index 9596f4288a..e5a7c804a0 100644 --- a/db/recovery_test.cc +++ b/db/recovery_test.cc @@ -47,7 +47,7 @@ class RecoveryTest { db_ = NULL; } - void Open(Options* options = NULL) { + Status OpenWithStatus(Options* options = NULL) { Close(); Options opts; if (options != NULL) { @@ -59,7 +59,11 @@ class RecoveryTest { if (opts.env == NULL) { opts.env = env_; } - ASSERT_OK(DB::Open(opts, dbname_, &db_)); + return DB::Open(opts, dbname_, &db_); + } + + void Open(Options* options = NULL) { + ASSERT_OK(OpenWithStatus(options)); ASSERT_EQ(1, NumLogs()); } @@ -100,6 +104,10 @@ class RecoveryTest { return logs.size(); } + void DeleteManifestFile() { + ASSERT_OK(env_->DeleteFile(ManifestFileName())); + } + uint64_t FirstLogFile() { return GetFiles(kLogFile)[0]; } @@ -317,6 +325,15 @@ TEST(RecoveryTest, MultipleLogFiles) { ASSERT_EQ("there", Get("hi")); } +TEST(RecoveryTest, ManifestMissing) { + ASSERT_OK(Put("foo", "bar")); + Close(); + DeleteManifestFile(); + + Status status = OpenWithStatus(); + ASSERT_TRUE(status.IsCorruption()); +} + } // namespace leveldb int main(int argc, char** argv) { diff --git a/db/version_set.cc b/db/version_set.cc index b1256f90e1..020c37ea2c 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -925,6 +925,10 @@ Status VersionSet::Recover(bool *save_manifest) { SequentialFile* file; s = env_->NewSequentialFile(dscname, &file); if (!s.ok()) { + if (s.IsNotFound()) { + return Status::Corruption( + "CURRENT points to a non-existent file", s.ToString()); + } return s; } diff --git a/include/leveldb/env.h b/include/leveldb/env.h index 99b6c21414..cb8b6a465b 100644 --- a/include/leveldb/env.h +++ b/include/leveldb/env.h @@ -43,7 +43,8 @@ class Env { // Create a brand new sequentially-readable file with the specified name. // On success, stores a pointer to the new file in *result and returns OK. // On failure stores NULL in *result and returns non-OK. If the file does - // not exist, returns a non-OK status. + // not exist, returns a non-OK status. Implementations should return a + // NotFound status when the file does not exist. // // The returned file will only be accessed by one thread at a time. virtual Status NewSequentialFile(const std::string& fname, @@ -53,7 +54,8 @@ class Env { // specified name. On success, stores a pointer to the new file in // *result and returns OK. On failure stores NULL in *result and // returns non-OK. If the file does not exist, returns a non-OK - // status. + // status. Implementations should return a NotFound status when the file does + // not exist. // // The returned file may be concurrently accessed by multiple threads. virtual Status NewRandomAccessFile(const std::string& fname, diff --git a/util/env_posix.cc b/util/env_posix.cc index 84aabb20a7..b21f515a37 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -34,8 +34,12 @@ namespace { static int open_read_only_file_limit = -1; static int mmap_limit = -1; -static Status IOError(const std::string& context, int err_number) { - return Status::IOError(context, strerror(err_number)); +static Status PosixError(const std::string& context, int err_number) { + if (err_number == ENOENT) { + return Status::NotFound(context, strerror(err_number)); + } else { + return Status::IOError(context, strerror(err_number)); + } } // Helper class to limit resource usage to avoid exhaustion. @@ -108,7 +112,7 @@ class PosixSequentialFile: public SequentialFile { // We leave status as ok if we hit the end of the file } else { // A partial read with an error: return a non-ok status - s = IOError(filename_, errno); + s = PosixError(filename_, errno); } } return s; @@ -116,7 +120,7 @@ class PosixSequentialFile: public SequentialFile { virtual Status Skip(uint64_t n) { if (fseek(file_, n, SEEK_CUR)) { - return IOError(filename_, errno); + return PosixError(filename_, errno); } return Status::OK(); } @@ -154,7 +158,7 @@ class PosixRandomAccessFile: public RandomAccessFile { if (temporary_fd_) { fd = open(filename_.c_str(), O_RDONLY); if (fd < 0) { - return IOError(filename_, errno); + return PosixError(filename_, errno); } } @@ -163,7 +167,7 @@ class PosixRandomAccessFile: public RandomAccessFile { *result = Slice(scratch, (r < 0) ? 0 : r); if (r < 0) { // An error: return a non-ok status - s = IOError(filename_, errno); + s = PosixError(filename_, errno); } if (temporary_fd_) { // Close the temporary file descriptor opened earlier. @@ -199,7 +203,7 @@ class PosixMmapReadableFile: public RandomAccessFile { Status s; if (offset + n > length_) { *result = Slice(); - s = IOError(filename_, EINVAL); + s = PosixError(filename_, EINVAL); } else { *result = Slice(reinterpret_cast(mmapped_region_) + offset, n); } @@ -226,7 +230,7 @@ class PosixWritableFile : public WritableFile { virtual Status Append(const Slice& data) { size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_); if (r != data.size()) { - return IOError(filename_, errno); + return PosixError(filename_, errno); } return Status::OK(); } @@ -234,7 +238,7 @@ class PosixWritableFile : public WritableFile { virtual Status Close() { Status result; if (fclose(file_) != 0) { - result = IOError(filename_, errno); + result = PosixError(filename_, errno); } file_ = NULL; return result; @@ -242,7 +246,7 @@ class PosixWritableFile : public WritableFile { virtual Status Flush() { if (fflush_unlocked(file_) != 0) { - return IOError(filename_, errno); + return PosixError(filename_, errno); } return Status::OK(); } @@ -263,10 +267,10 @@ class PosixWritableFile : public WritableFile { if (basename.starts_with("MANIFEST")) { int fd = open(dir.c_str(), O_RDONLY); if (fd < 0) { - s = IOError(dir, errno); + s = PosixError(dir, errno); } else { if (fsync(fd) < 0) { - s = IOError(dir, errno); + s = PosixError(dir, errno); } close(fd); } @@ -337,7 +341,7 @@ class PosixEnv : public Env { FILE* f = fopen(fname.c_str(), "r"); if (f == NULL) { *result = NULL; - return IOError(fname, errno); + return PosixError(fname, errno); } else { *result = new PosixSequentialFile(fname, f); return Status::OK(); @@ -350,7 +354,7 @@ class PosixEnv : public Env { Status s; int fd = open(fname.c_str(), O_RDONLY); if (fd < 0) { - s = IOError(fname, errno); + s = PosixError(fname, errno); } else if (mmap_limit_.Acquire()) { uint64_t size; s = GetFileSize(fname, &size); @@ -359,7 +363,7 @@ class PosixEnv : public Env { if (base != MAP_FAILED) { *result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_); } else { - s = IOError(fname, errno); + s = PosixError(fname, errno); } } close(fd); @@ -378,7 +382,7 @@ class PosixEnv : public Env { FILE* f = fopen(fname.c_str(), "w"); if (f == NULL) { *result = NULL; - s = IOError(fname, errno); + s = PosixError(fname, errno); } else { *result = new PosixWritableFile(fname, f); } @@ -391,7 +395,7 @@ class PosixEnv : public Env { FILE* f = fopen(fname.c_str(), "a"); if (f == NULL) { *result = NULL; - s = IOError(fname, errno); + s = PosixError(fname, errno); } else { *result = new PosixWritableFile(fname, f); } @@ -407,7 +411,7 @@ class PosixEnv : public Env { result->clear(); DIR* d = opendir(dir.c_str()); if (d == NULL) { - return IOError(dir, errno); + return PosixError(dir, errno); } struct dirent* entry; while ((entry = readdir(d)) != NULL) { @@ -420,7 +424,7 @@ class PosixEnv : public Env { virtual Status DeleteFile(const std::string& fname) { Status result; if (unlink(fname.c_str()) != 0) { - result = IOError(fname, errno); + result = PosixError(fname, errno); } return result; } @@ -428,7 +432,7 @@ class PosixEnv : public Env { virtual Status CreateDir(const std::string& name) { Status result; if (mkdir(name.c_str(), 0755) != 0) { - result = IOError(name, errno); + result = PosixError(name, errno); } return result; } @@ -436,7 +440,7 @@ class PosixEnv : public Env { virtual Status DeleteDir(const std::string& name) { Status result; if (rmdir(name.c_str()) != 0) { - result = IOError(name, errno); + result = PosixError(name, errno); } return result; } @@ -446,7 +450,7 @@ class PosixEnv : public Env { struct stat sbuf; if (stat(fname.c_str(), &sbuf) != 0) { *size = 0; - s = IOError(fname, errno); + s = PosixError(fname, errno); } else { *size = sbuf.st_size; } @@ -456,7 +460,7 @@ class PosixEnv : public Env { virtual Status RenameFile(const std::string& src, const std::string& target) { Status result; if (rename(src.c_str(), target.c_str()) != 0) { - result = IOError(src, errno); + result = PosixError(src, errno); } return result; } @@ -466,12 +470,12 @@ class PosixEnv : public Env { Status result; int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644); if (fd < 0) { - result = IOError(fname, errno); + result = PosixError(fname, errno); } else if (!locks_.Insert(fname)) { close(fd); result = Status::IOError("lock " + fname, "already held by process"); } else if (LockOrUnlock(fd, true) == -1) { - result = IOError("lock " + fname, errno); + result = PosixError("lock " + fname, errno); close(fd); locks_.Remove(fname); } else { @@ -487,7 +491,7 @@ class PosixEnv : public Env { PosixFileLock* my_lock = reinterpret_cast(lock); Status result; if (LockOrUnlock(my_lock->fd_, false) == -1) { - result = IOError("unlock", errno); + result = PosixError("unlock", errno); } locks_.Remove(my_lock->name_); close(my_lock->fd_); @@ -524,7 +528,7 @@ class PosixEnv : public Env { FILE* f = fopen(fname.c_str(), "w"); if (f == NULL) { *result = NULL; - return IOError(fname, errno); + return PosixError(fname, errno); } else { *result = new PosixLogger(f, &PosixEnv::gettid); return Status::OK(); diff --git a/util/env_test.cc b/util/env_test.cc index 839ae56a1a..4d8ecc4a7f 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -99,6 +99,24 @@ TEST(EnvTest, StartThread) { ASSERT_EQ(state.val, 3); } +TEST(EnvTest, TestOpenNonExistentFile) { + // Write some test data to a single file that will be opened |n| times. + std::string test_dir; + ASSERT_OK(env_->GetTestDirectory(&test_dir)); + + std::string non_existent_file = test_dir + "/non_existent_file"; + ASSERT_TRUE(!env_->FileExists(non_existent_file)); + + RandomAccessFile* random_access_file; + Status status = env_->NewRandomAccessFile( + non_existent_file, &random_access_file); + ASSERT_TRUE(status.IsNotFound()); + + SequentialFile* sequential_file; + status = env_->NewSequentialFile(non_existent_file, &sequential_file); + ASSERT_TRUE(status.IsNotFound()); +} + } // namespace leveldb int main(int argc, char** argv) {