Skip to content

Commit

Permalink
leveldb: Report missing CURRENT manifest file as database corruption.
Browse files Browse the repository at this point in the history
BTRFS reorders rename and write operations, so it is possible that a filesystem crash and recovery results in a situation where the file pointed to by CURRENT does not exist. DB::Open currently reports an I/O error in this case. Reporting database corruption is a better hint to the caller, which can attempt to recover the database or erase it and start over.

This issue is not merely theoretical. It was reported as having showed up in the wild at google#195 and at https://crbug.com/738961. Also, asides from the BTRFS case described above, incorrect data in CURRENT seems like a possible corruption case that should be handled gracefully.

The Env API changes here can be considered backwards compatible, because an implementation that returns Status::IOError instead of Status::NotFound will still get the same functionality as before.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=161432630
  • Loading branch information
pwnall committed Jul 10, 2017
1 parent 69e2bd2 commit 8415f00
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 31 deletions.
21 changes: 19 additions & 2 deletions db/recovery_test.cc
Expand Up @@ -47,7 +47,7 @@ class RecoveryTest {
db_ = NULL;
}

void Open(Options* options = NULL) {
Status OpenWithStatus(Options* options = NULL) {
Close();
Options opts;
if (options != NULL) {
Expand All @@ -59,7 +59,11 @@ class RecoveryTest {
if (opts.env == NULL) {
opts.env = env_;
}
ASSERT_OK(DB::Open(opts, dbname_, &db_));
return DB::Open(opts, dbname_, &db_);
}

void Open(Options* options = NULL) {
ASSERT_OK(OpenWithStatus(options));
ASSERT_EQ(1, NumLogs());
}

Expand Down Expand Up @@ -100,6 +104,10 @@ class RecoveryTest {
return logs.size();
}

void DeleteManifestFile() {
ASSERT_OK(env_->DeleteFile(ManifestFileName()));
}

uint64_t FirstLogFile() {
return GetFiles(kLogFile)[0];
}
Expand Down Expand Up @@ -317,6 +325,15 @@ TEST(RecoveryTest, MultipleLogFiles) {
ASSERT_EQ("there", Get("hi"));
}

TEST(RecoveryTest, ManifestMissing) {
ASSERT_OK(Put("foo", "bar"));
Close();
DeleteManifestFile();

Status status = OpenWithStatus();
ASSERT_TRUE(status.IsCorruption());
}

} // namespace leveldb

int main(int argc, char** argv) {
Expand Down
4 changes: 4 additions & 0 deletions db/version_set.cc
Expand Up @@ -925,6 +925,10 @@ Status VersionSet::Recover(bool *save_manifest) {
SequentialFile* file;
s = env_->NewSequentialFile(dscname, &file);
if (!s.ok()) {
if (s.IsNotFound()) {
return Status::Corruption(
"CURRENT points to a non-existent file", s.ToString());
}
return s;
}

Expand Down
6 changes: 4 additions & 2 deletions include/leveldb/env.h
Expand Up @@ -43,7 +43,8 @@ class Env {
// Create a brand new sequentially-readable file with the specified name.
// On success, stores a pointer to the new file in *result and returns OK.
// On failure stores NULL in *result and returns non-OK. If the file does
// not exist, returns a non-OK status.
// not exist, returns a non-OK status. Implementations should return a
// NotFound status when the file does not exist.
//
// The returned file will only be accessed by one thread at a time.
virtual Status NewSequentialFile(const std::string& fname,
Expand All @@ -53,7 +54,8 @@ class Env {
// specified name. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores NULL in *result and
// returns non-OK. If the file does not exist, returns a non-OK
// status.
// status. Implementations should return a NotFound status when the file does
// not exist.
//
// The returned file may be concurrently accessed by multiple threads.
virtual Status NewRandomAccessFile(const std::string& fname,
Expand Down
58 changes: 31 additions & 27 deletions util/env_posix.cc
Expand Up @@ -34,8 +34,12 @@ namespace {
static int open_read_only_file_limit = -1;
static int mmap_limit = -1;

static Status IOError(const std::string& context, int err_number) {
return Status::IOError(context, strerror(err_number));
static Status PosixError(const std::string& context, int err_number) {
if (err_number == ENOENT) {
return Status::NotFound(context, strerror(err_number));
} else {
return Status::IOError(context, strerror(err_number));
}
}

// Helper class to limit resource usage to avoid exhaustion.
Expand Down Expand Up @@ -108,15 +112,15 @@ class PosixSequentialFile: public SequentialFile {
// We leave status as ok if we hit the end of the file
} else {
// A partial read with an error: return a non-ok status
s = IOError(filename_, errno);
s = PosixError(filename_, errno);
}
}
return s;
}

virtual Status Skip(uint64_t n) {
if (fseek(file_, n, SEEK_CUR)) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
return Status::OK();
}
Expand Down Expand Up @@ -154,7 +158,7 @@ class PosixRandomAccessFile: public RandomAccessFile {
if (temporary_fd_) {
fd = open(filename_.c_str(), O_RDONLY);
if (fd < 0) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
}

Expand All @@ -163,7 +167,7 @@ class PosixRandomAccessFile: public RandomAccessFile {
*result = Slice(scratch, (r < 0) ? 0 : r);
if (r < 0) {
// An error: return a non-ok status
s = IOError(filename_, errno);
s = PosixError(filename_, errno);
}
if (temporary_fd_) {
// Close the temporary file descriptor opened earlier.
Expand Down Expand Up @@ -199,7 +203,7 @@ class PosixMmapReadableFile: public RandomAccessFile {
Status s;
if (offset + n > length_) {
*result = Slice();
s = IOError(filename_, EINVAL);
s = PosixError(filename_, EINVAL);
} else {
*result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
}
Expand All @@ -226,23 +230,23 @@ class PosixWritableFile : public WritableFile {
virtual Status Append(const Slice& data) {
size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_);
if (r != data.size()) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
return Status::OK();
}

virtual Status Close() {
Status result;
if (fclose(file_) != 0) {
result = IOError(filename_, errno);
result = PosixError(filename_, errno);
}
file_ = NULL;
return result;
}

virtual Status Flush() {
if (fflush_unlocked(file_) != 0) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
}
return Status::OK();
}
Expand All @@ -263,10 +267,10 @@ class PosixWritableFile : public WritableFile {
if (basename.starts_with("MANIFEST")) {
int fd = open(dir.c_str(), O_RDONLY);
if (fd < 0) {
s = IOError(dir, errno);
s = PosixError(dir, errno);
} else {
if (fsync(fd) < 0) {
s = IOError(dir, errno);
s = PosixError(dir, errno);
}
close(fd);
}
Expand Down Expand Up @@ -337,7 +341,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "r");
if (f == NULL) {
*result = NULL;
return IOError(fname, errno);
return PosixError(fname, errno);
} else {
*result = new PosixSequentialFile(fname, f);
return Status::OK();
Expand All @@ -350,7 +354,7 @@ class PosixEnv : public Env {
Status s;
int fd = open(fname.c_str(), O_RDONLY);
if (fd < 0) {
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else if (mmap_limit_.Acquire()) {
uint64_t size;
s = GetFileSize(fname, &size);
Expand All @@ -359,7 +363,7 @@ class PosixEnv : public Env {
if (base != MAP_FAILED) {
*result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_);
} else {
s = IOError(fname, errno);
s = PosixError(fname, errno);
}
}
close(fd);
Expand All @@ -378,7 +382,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "w");
if (f == NULL) {
*result = NULL;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else {
*result = new PosixWritableFile(fname, f);
}
Expand All @@ -391,7 +395,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "a");
if (f == NULL) {
*result = NULL;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else {
*result = new PosixWritableFile(fname, f);
}
Expand All @@ -407,7 +411,7 @@ class PosixEnv : public Env {
result->clear();
DIR* d = opendir(dir.c_str());
if (d == NULL) {
return IOError(dir, errno);
return PosixError(dir, errno);
}
struct dirent* entry;
while ((entry = readdir(d)) != NULL) {
Expand All @@ -420,23 +424,23 @@ class PosixEnv : public Env {
virtual Status DeleteFile(const std::string& fname) {
Status result;
if (unlink(fname.c_str()) != 0) {
result = IOError(fname, errno);
result = PosixError(fname, errno);
}
return result;
}

virtual Status CreateDir(const std::string& name) {
Status result;
if (mkdir(name.c_str(), 0755) != 0) {
result = IOError(name, errno);
result = PosixError(name, errno);
}
return result;
}

virtual Status DeleteDir(const std::string& name) {
Status result;
if (rmdir(name.c_str()) != 0) {
result = IOError(name, errno);
result = PosixError(name, errno);
}
return result;
}
Expand All @@ -446,7 +450,7 @@ class PosixEnv : public Env {
struct stat sbuf;
if (stat(fname.c_str(), &sbuf) != 0) {
*size = 0;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else {
*size = sbuf.st_size;
}
Expand All @@ -456,7 +460,7 @@ class PosixEnv : public Env {
virtual Status RenameFile(const std::string& src, const std::string& target) {
Status result;
if (rename(src.c_str(), target.c_str()) != 0) {
result = IOError(src, errno);
result = PosixError(src, errno);
}
return result;
}
Expand All @@ -466,12 +470,12 @@ class PosixEnv : public Env {
Status result;
int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
if (fd < 0) {
result = IOError(fname, errno);
result = PosixError(fname, errno);
} else if (!locks_.Insert(fname)) {
close(fd);
result = Status::IOError("lock " + fname, "already held by process");
} else if (LockOrUnlock(fd, true) == -1) {
result = IOError("lock " + fname, errno);
result = PosixError("lock " + fname, errno);
close(fd);
locks_.Remove(fname);
} else {
Expand All @@ -487,7 +491,7 @@ class PosixEnv : public Env {
PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
Status result;
if (LockOrUnlock(my_lock->fd_, false) == -1) {
result = IOError("unlock", errno);
result = PosixError("unlock", errno);
}
locks_.Remove(my_lock->name_);
close(my_lock->fd_);
Expand Down Expand Up @@ -524,7 +528,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "w");
if (f == NULL) {
*result = NULL;
return IOError(fname, errno);
return PosixError(fname, errno);
} else {
*result = new PosixLogger(f, &PosixEnv::gettid);
return Status::OK();
Expand Down
18 changes: 18 additions & 0 deletions util/env_test.cc
Expand Up @@ -99,6 +99,24 @@ TEST(EnvTest, StartThread) {
ASSERT_EQ(state.val, 3);
}

TEST(EnvTest, TestOpenNonExistentFile) {
// Write some test data to a single file that will be opened |n| times.
std::string test_dir;
ASSERT_OK(env_->GetTestDirectory(&test_dir));

std::string non_existent_file = test_dir + "/non_existent_file";
ASSERT_TRUE(!env_->FileExists(non_existent_file));

RandomAccessFile* random_access_file;
Status status = env_->NewRandomAccessFile(
non_existent_file, &random_access_file);
ASSERT_TRUE(status.IsNotFound());

SequentialFile* sequential_file;
status = env_->NewSequentialFile(non_existent_file, &sequential_file);
ASSERT_TRUE(status.IsNotFound());
}

} // namespace leveldb

int main(int argc, char** argv) {
Expand Down

0 comments on commit 8415f00

Please sign in to comment.