Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of the metadata log #61

Merged
merged 20 commits into from Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 21 additions & 0 deletions CMakeLists.txt
Expand Up @@ -658,21 +658,42 @@ if (Seastar_EXPERIMENTAL_FS)
PRIVATE
# SeastarFS source files
include/seastar/fs/block_device.hh
include/seastar/fs/exceptions.hh
include/seastar/fs/file.hh
include/seastar/fs/overloaded.hh
include/seastar/fs/stat.hh
include/seastar/fs/temporary_file.hh
src/fs/bitwise.hh
src/fs/bootstrap_record.cc
src/fs/bootstrap_record.hh
src/fs/cluster.hh
src/fs/cluster_allocator.cc
src/fs/cluster_allocator.hh
src/fs/cluster_writer.hh
src/fs/crc.hh
src/fs/device_reader.cc
src/fs/device_reader.hh
src/fs/file.cc
src/fs/inode.hh
src/fs/inode_info.hh
src/fs/metadata_disk_entries.hh
src/fs/metadata_log.cc
src/fs/metadata_log.hh
src/fs/metadata_log_bootstrap.cc
src/fs/metadata_log_bootstrap.hh
src/fs/metadata_log_operations/create_and_open_unlinked_file.hh
src/fs/metadata_log_operations/create_file.hh
src/fs/metadata_log_operations/link_file.hh
src/fs/metadata_log_operations/read.hh
src/fs/metadata_log_operations/truncate.hh
src/fs/metadata_log_operations/unlink_or_remove_file.hh
src/fs/metadata_log_operations/write.hh
src/fs/metadata_to_disk_buffer.hh
src/fs/path.hh
src/fs/range.hh
src/fs/to_disk_buffer.hh
src/fs/units.hh
src/fs/unix_metadata.hh
src/fs/value_shared_lock.hh
)
endif()
Expand Down
88 changes: 88 additions & 0 deletions include/seastar/fs/exceptions.hh
@@ -0,0 +1,88 @@
/*
* This file is open source software, licensed to you under the terms
* of the Apache License, Version 2.0 (the "License"). See the NOTICE file
* distributed with this work for additional information regarding copyright
* ownership. You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (C) 2020 ScyllaDB
*/

#pragma once

#include <exception>

namespace seastar::fs {

struct fs_exception : public std::exception {
const char* what() const noexcept override = 0;
};

struct cluster_size_too_small_to_perform_operation_exception : public std::exception {
const char* what() const noexcept override { return "Cluster size is too small to perform operation"; }
};

struct invalid_inode_exception : public fs_exception {
const char* what() const noexcept override { return "Invalid inode"; }
};

struct invalid_argument_exception : public fs_exception {
const char* what() const noexcept override { return "Invalid argument"; }
};

struct operation_became_invalid_exception : public fs_exception {
const char* what() const noexcept override { return "Operation became invalid"; }
};

struct no_more_space_exception : public fs_exception {
const char* what() const noexcept override { return "No more space on device"; }
};

struct file_already_exists_exception : public fs_exception {
const char* what() const noexcept override { return "File already exists"; }
};

struct filename_too_long_exception : public fs_exception {
const char* what() const noexcept override { return "Filename too long"; }
};

struct is_directory_exception : public fs_exception {
const char* what() const noexcept override { return "Is a directory"; }
};

struct directory_not_empty_exception : public fs_exception {
const char* what() const noexcept override { return "Directory is not empty"; }
};

struct path_lookup_exception : public fs_exception {
const char* what() const noexcept override = 0;
};

struct path_is_not_absolute_exception : public path_lookup_exception {
const char* what() const noexcept override { return "Path is not absolute"; }
};

struct invalid_path_exception : public path_lookup_exception {
const char* what() const noexcept override { return "Path is invalid"; }
};

struct no_such_file_or_directory_exception : public path_lookup_exception {
const char* what() const noexcept override { return "No such file or directory"; }
};

struct path_component_not_directory_exception : public path_lookup_exception {
const char* what() const noexcept override { return "A component used as a directory is not a directory"; }
};

} // namespace seastar::fs
41 changes: 41 additions & 0 deletions include/seastar/fs/stat.hh
@@ -0,0 +1,41 @@
/*
* This file is open source software, licensed to you under the terms
* of the Apache License, Version 2.0 (the "License"). See the NOTICE file
* distributed with this work for additional information regarding copyright
* ownership. You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (C) 2019 ScyllaDB
*/

#pragma once

#include "seastar/core/file-types.hh"

#include <chrono>
#include <sys/types.h>

namespace seastar::fs {

struct stat_data {
directory_entry_type type;
file_permissions perms;
uid_t uid;
gid_t gid;
std::chrono::system_clock::time_point time_born; // Time of creation
std::chrono::system_clock::time_point time_modified; // Time of last content modification
std::chrono::system_clock::time_point time_changed; // Time of last status change (either content or attributes)
};

} // namespace seastar::fs
85 changes: 85 additions & 0 deletions src/fs/cluster_writer.hh
@@ -0,0 +1,85 @@
/*
* This file is open source software, licensed to you under the terms
* of the Apache License, Version 2.0 (the "License"). See the NOTICE file
* distributed with this work for additional information regarding copyright
* ownership. You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (C) 2020 ScyllaDB
*/

#pragma once

#include "fs/bitwise.hh"
#include "fs/units.hh"
#include "seastar/core/shared_ptr.hh"
#include "seastar/fs/block_device.hh"

#include <cstdlib>
#include <cassert>

namespace seastar::fs {

// Represents buffer that will be written to a block_device. Method init() should be called just after constructor
// in order to finish construction.
class cluster_writer {
protected:
size_t _max_size = 0;
unit_size_t _alignment = 0;
disk_offset_t _cluster_beg_offset = 0;
size_t _next_write_offset = 0;
public:
cluster_writer() = default;

virtual shared_ptr<cluster_writer> virtual_constructor() const {
return make_shared<cluster_writer>();
}

// Total number of bytes appended cannot exceed @p aligned_max_size.
// @p cluster_beg_offset is the disk offset of the beginning of the cluster.
virtual void init(size_t aligned_max_size, unit_size_t alignment, disk_offset_t cluster_beg_offset) {
assert(is_power_of_2(alignment));
assert(mod_by_power_of_2(aligned_max_size, alignment) == 0);
assert(mod_by_power_of_2(cluster_beg_offset, alignment) == 0);

_max_size = aligned_max_size;
_alignment = alignment;
_cluster_beg_offset = cluster_beg_offset;
_next_write_offset = 0;
}

// Writes @p aligned_buffer to @p device just after previous write (or at @p cluster_beg_offset passed to init()
// if it is the first write).
virtual future<size_t> write(const void* aligned_buffer, size_t aligned_len, block_device device) {
assert(reinterpret_cast<uintptr_t>(aligned_buffer) % _alignment == 0);
assert(aligned_len % _alignment == 0);
assert(aligned_len <= bytes_left());

// Make sure the writer is usable before returning from this function
size_t curr_write_offset = _next_write_offset;
_next_write_offset += aligned_len;

return device.write(_cluster_beg_offset + curr_write_offset, aligned_buffer, aligned_len);
}

virtual size_t bytes_left() const noexcept { return _max_size - _next_write_offset; }

// Returns disk offset of the place where the first byte of next appended bytes would be after flush
// TODO: maybe better name for that function? Or any other method to extract that data?
virtual disk_offset_t current_disk_offset() const noexcept {
return _cluster_beg_offset + _next_write_offset;
}
};

} // namespace seastar::fs