| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| - docs, moar tests | ||
|
|
||
| - extended attributes: | ||
| - number of blocks | ||
| - number of chunks | ||
| - number of times opened? | ||
|
|
||
| - per-file "hotness" (how often was a file opened); dump to file upon umount | ||
|
|
||
| - nanofsextract? ---> --unpack | ||
| - readahead? | ||
| - typedef -> using | ||
|
|
||
| - start filename compression early | ||
|
|
||
| - remove multiple blockhash window sizes, one is enough apparently? | ||
|
|
||
| - window-increment-shift seems silly to configure? | ||
|
|
||
| - identify blocks that contain mostly binary data and adjust compressor? | ||
|
|
||
| - get rid of glog dependency | ||
|
|
||
| - get rid of passing by shared/unique ptr where possible | ||
|
|
||
| - weak_ptr, e.g. in inode implementation? | ||
|
|
||
| - --repack and --unpack option | ||
|
|
||
| - metadata stripping (i.e. re-write metadata without owner/time info) | ||
|
|
||
|
|
||
| /* | ||
|
|
||
| scanner: | ||
| bhw= - 388.3s 13.07 GiB | ||
| bhw= 8 812.9s 7.559 GiB | ||
| bhw= 9 693.1s 7.565 GiB | ||
| bhw=10 651.8s 7.617 GiB | ||
| bhw=11 618.7s 7.313 GiB | ||
| bhw=12 603.6s 7.625 GiB | ||
| bhw=13 591.2s 7.858 GiB | ||
| bhw=14 574.1s 8.306 GiB | ||
| bhw=15 553.8s 8.869 GiB | ||
| bhw=16 541.9s 9.529 GiB | ||
|
|
||
|
|
||
| lz4: | ||
| <---- 1m29.535s / 9m31.212s | ||
|
|
||
| lz4hc: | ||
| 1 - 20.94s - 2546 MiB | ||
| 2 - 21.67s - 2441 MiB | ||
| 3 - 24.19s - 2377 MiB | ||
| 4 - 27.29s - 2337 MiB | ||
| 5 - 31.49s - 2311 MiB | ||
| 6 - 36.39s - 2294 MiB | ||
| 7 - 42.04s - 2284 MiB | ||
| 8 - 48.67s - 2277 MiB | ||
| 9 - 56.94s - 2273 MiB <---- 1m27.979s / 9m20.637s | ||
| 10 - 68.03s - 2271 MiB | ||
| 11 - 79.54s - 2269 MiB | ||
| 12 - 94.84s - 2268 MiB | ||
|
|
||
| zstd: | ||
| 1 - 11.42s - 1667 MiB | ||
| 2 - 12.95s - 1591 MiB <---- 2m8.351s / 15m25.752s | ||
| 3 - 22.03s - 1454 MiB | ||
| 4 - 25.64s - 1398 MiB | ||
| 5 - 32.34s - 1383 MiB | ||
| 6 - 41.45s - 1118 MiB <---- 2m4.258s / 14m28.627s | ||
| 7 - 46.26s - 1104 MiB | ||
| 8 - 53.34s - 1077 MiB | ||
| 9 - 59.99s - 1066 MiB | ||
| 10 - 63.3s - 1066 MiB | ||
| 11 - 66.97s - 956 MiB <---- 2m3.496s / 14m17.862s | ||
| 12 - 79.89s - 953 MiB | ||
| 13 - 89.8s - 943 MiB | ||
| 14 - 118.1s - 941 MiB | ||
| 15 - 230s - 951 MiB | ||
| 16 - 247.4s - 863 MiB <---- 2m11.202s / 14m57.245s | ||
| 17 - 294.5s - 854 MiB | ||
| 18 - 634s - 806 MiB | ||
| 19 - 762.5s - 780 MiB | ||
| 20 - 776.8s - 718 MiB <---- 2m16.448s / 15m43.923s | ||
| 21 - 990.4s - 716 MiB | ||
| 22 - 984.3s - 715 MiB <---- 2m18.133s / 15m55.263s | ||
|
|
||
| lzma: | ||
| level=6:dict_size=21 921.9s - 838.8 MiB <---- 5m11.219s / 37m36.002s | ||
|
|
||
| */ | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
| Perl: | ||
| 542 versions of perl | ||
| found/scanned: 152809/152809 dirs, 0/0 links, 1325098/1325098 files | ||
| original size: 32.03 GiB, saved: 19.01 GiB by deduplication (1133032 duplicate files), 5.835 GiB by segmenting | ||
| filesystem size: 7.183 GiB in 460 blocks (499389 chunks, 192066/192066 inodes), 460 blocks/662.3 MiB written | ||
|
|
||
| bench | ||
| build real user | ||
| ----------------------------------------------------------------------------------------------------- | ||
| -rw-r--r-- 1 mhx users 14G Jul 27 23:11 perl-install-0.dwarfs 8:05 0:38 0:45 | ||
| -rw-r--r-- 1 mhx users 4.8G Jul 27 23:18 perl-install-1.dwarfs 6:34 0:14 1:24 | ||
| -rw-r--r-- 1 mhx users 3.8G Jul 27 23:26 perl-install-2.dwarfs 7:31 0:17 1:11 | ||
| -rw-r--r-- 1 mhx users 3.2G Jul 27 23:36 perl-install-3.dwarfs 10:11 0:11 0:59 | ||
| -rw-r--r-- 1 mhx users 1.8G Jul 27 23:47 perl-install-4.dwarfs 11:05 0:14 1:24 | ||
| -rw-r--r-- 1 mhx users 1.2G Jul 27 23:59 perl-install-5.dwarfs 11:53 0:13 1:15 | ||
| -rw-r--r-- 1 mhx users 901M Jul 28 00:16 perl-install-6.dwarfs 17:42 0:14 1:25 | ||
| -rw-r--r-- 1 mhx users 704M Jul 28 00:37 perl-install-7.dwarfs 20:52 0:20 2:14 | ||
| -rw-r--r-- 1 mhx users 663M Jul 28 04:04 perl-install-8.dwarfs 24:13 0:50 6:02 | ||
| -rw-r--r-- 1 mhx users 615M Jul 28 02:50 perl-install-9.dwarfs 34:40 0:51 5:50 | ||
|
|
||
| -rw-r--r-- 1 mhx users 3.6G Jul 28 09:13 perl-install-defaults.squashfs 17:20 | ||
| -rw-r--r-- 1 mhx users 2.4G Jul 28 10:42 perl-install-opt.squashfs 71:49 | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
| soak: | ||
|
|
||
| -7 (cache=1g) | ||
|
|
||
| Passed with 542 of 542 combinations. | ||
|
|
||
| real 75m21.191s | ||
| user 68m3.903s | ||
| sys 6m21.020s | ||
|
|
||
| -9 (cache=1g) | ||
|
|
||
| Passed with 542 of 542 combinations. | ||
|
|
||
| real 118m48.371s | ||
| user 107m35.685s | ||
| sys 7m16.438s | ||
|
|
||
| squashfs-opt | ||
|
|
||
| real 81m36.957s | ||
| user 62m37.369s | ||
| sys 20m52.367s | ||
|
|
||
|
|
||
| -1 (cache=2g) | ||
| mhx@gimli ~ $ time find tmp/mount/ -type f | xargs -n 1 -P 32 -d $'\n' -I {} dd of=/dev/null if={} bs=64K status=none | ||
|
|
||
| real 2m19.927s | ||
| user 0m16.813s | ||
| sys 2m4.293s | ||
|
|
||
| -7 (cache=2g) | ||
| mhx@gimli ~ $ time find tmp/mount/ -type f | xargs -n 1 -P 32 -d $'\n' -I {} dd of=/dev/null if={} bs=64K status=none | ||
|
|
||
| real 2m24.346s | ||
| user 0m17.007s | ||
| sys 1m59.823s | ||
|
|
||
| squash-default | ||
| mhx@gimli ~ $ time find tmp/mount/ -type f | xargs -n 1 -P 32 -d $'\n' -I {} dd of=/dev/null if={} bs=64K status=none | ||
|
|
||
| real 8m41.594s | ||
| user 1m25.346s | ||
| sys 19m12.036s | ||
|
|
||
| squash-opt | ||
| mhx@gimli ~ $ time find tmp/mount/ -type f | xargs -n 1 -P 32 -d $'\n' -I {} dd of=/dev/null if={} bs=64K status=none | ||
|
|
||
| real 141m41.092s | ||
| user 1m12.650s | ||
| sys 59m18.194s | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| function filter(f) | ||
| -- if f.name == 'Jamroot' or (f.name == 'test' and f.type == 'dir') then | ||
| -- return false | ||
| -- end | ||
| return true | ||
| end | ||
|
|
||
| function autovivify(C, args, num) | ||
| for i = 1, num do | ||
| local v = args[i] | ||
| if C[v] == nil then C[v] = {} end | ||
| C = C[v] | ||
| end | ||
| return C | ||
| end | ||
|
|
||
| function incr(C, ...) | ||
| local args = { n = select("#", ...), ... } | ||
| C = autovivify(C, args, args.n - 2) | ||
| local field = args[args.n - 1] | ||
| C[field] = (C[field] or 0) + args[args.n] | ||
| end | ||
|
|
||
| function push(C, ...) | ||
| local args = { n = select("#", ...), ... } | ||
| C = autovivify(C, args, args.n - 1) | ||
| table.insert(C, args[args.n]) | ||
| end | ||
|
|
||
| function sortbysize(tbl) | ||
| return function (a, b) | ||
| return tbl[b]["size"]/tbl[b]["num"] < tbl[a]["size"]/tbl[a]["num"] | ||
| end | ||
| end | ||
|
|
||
| function order(filelist) | ||
| local C = {} | ||
| for _, f in pairs(filelist) do | ||
| local _, _, base, ext = string.find(f.name, "(.*)(%.%w+)$") | ||
| if ext == nil or string.find(ext, "[a-z]") == nil then | ||
| base, ext = f.name, "" | ||
| end | ||
| incr(C, ext, "size", f.size) | ||
| incr(C, ext, "num", 1) | ||
| incr(C, ext, "name", base, "size", f.size) | ||
| incr(C, ext, "name", base, "num", 1) | ||
| push(C, ext, "name", base, "files", f) | ||
| end | ||
| local ordered = {} | ||
| local exts = {} | ||
| for k, _ in pairs(C) do table.insert(exts, k) end | ||
| table.sort(exts, sortbysize(C)) | ||
| for _, ext in ipairs(exts) do | ||
| local N = C[ext]["name"] | ||
| local bases = {} | ||
| for k, _ in pairs(N) do table.insert(bases, k) end | ||
| table.sort(bases, sortbysize(N)) | ||
| for _, base in ipairs(bases) do | ||
| local files = N[base]["files"] | ||
| table.sort(files, function (a, b) | ||
| return b.size < a.size | ||
| end) | ||
| for _, file in ipairs(files) do | ||
| table.insert(ordered, file) | ||
| end | ||
| end | ||
| end | ||
| return ordered | ||
| end |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <future> | ||
| #include <limits> | ||
| #include <memory> | ||
| #include <mutex> | ||
|
|
||
| #include "fstypes.h" | ||
| #include "logger.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| struct block_cache_options; | ||
|
|
||
| class cached_block; | ||
|
|
||
| class block_range { | ||
| public: | ||
| block_range(std::shared_ptr<cached_block const> block, size_t offset, | ||
| size_t size); | ||
|
|
||
| const uint8_t* data() const { return begin_; } | ||
| const uint8_t* begin() const { return begin_; } | ||
| const uint8_t* end() const { return end_; } | ||
| size_t size() const { return end_ - begin_; } | ||
|
|
||
| private: | ||
| const uint8_t* const begin_; | ||
| const uint8_t* const end_; | ||
| std::shared_ptr<cached_block const> block_; | ||
| }; | ||
|
|
||
| class block_cache { | ||
| public: | ||
| block_cache(logger& lgr, const block_cache_options& options); | ||
|
|
||
| size_t block_count() const { return impl_->block_count(); } | ||
|
|
||
| void insert(compression_type comp, const uint8_t* data, size_t size) { | ||
| impl_->insert(comp, data, size); | ||
| } | ||
|
|
||
| void set_block_size(size_t size) { impl_->set_block_size(size); } | ||
|
|
||
| std::future<block_range> | ||
| get(size_t block_no, size_t offset, size_t size) const { | ||
| return impl_->get(block_no, offset, size); | ||
| } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual size_t block_count() const = 0; | ||
| virtual void | ||
| insert(compression_type comp, const uint8_t* data, size_t size) = 0; | ||
| virtual void set_block_size(size_t size) = 0; | ||
| virtual std::future<block_range> | ||
| get(size_t block_no, size_t offset, size_t length) const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,119 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
| #include <vector> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| enum class compression_type : uint8_t { | ||
| NONE = 0, | ||
| LZMA = 1, | ||
| ZSTD = 2, | ||
| LZ4 = 3, | ||
| LZ4HC = 4, | ||
| }; | ||
|
|
||
| class block_compressor { | ||
| public: | ||
| block_compressor(const std::string& spec, size_t block_size = 0); | ||
|
|
||
| block_compressor(const block_compressor& bc) | ||
| : impl_(bc.impl_->clone()) {} | ||
|
|
||
| block_compressor(block_compressor&& bc) = default; | ||
| block_compressor& operator=(block_compressor&& rhs) = default; | ||
|
|
||
| std::vector<uint8_t> compress(const std::vector<uint8_t>& data) const { | ||
| return impl_->compress(data); | ||
| } | ||
|
|
||
| std::vector<uint8_t> compress(std::vector<uint8_t>&& data) const { | ||
| return impl_->compress(std::move(data)); | ||
| } | ||
|
|
||
| void append(const uint8_t* data, size_t size, bool last) { | ||
| impl_->append(data, size, last); | ||
| } | ||
|
|
||
| std::vector<uint8_t> move_data() { return impl_->move_data(); } | ||
|
|
||
| compression_type type() const { return impl_->type(); } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual std::unique_ptr<impl> clone() const = 0; | ||
|
|
||
| // TODO: obsolete | ||
| virtual std::vector<uint8_t> | ||
| compress(const std::vector<uint8_t>& data) const = 0; | ||
| virtual std::vector<uint8_t> | ||
| compress(std::vector<uint8_t>&& data) const = 0; | ||
|
|
||
| virtual void append(const uint8_t* data, size_t size, bool last) = 0; | ||
| virtual std::vector<uint8_t> move_data() = 0; | ||
|
|
||
| virtual compression_type type() const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
|
|
||
| class block_decompressor { | ||
| public: | ||
| block_decompressor(compression_type type, const uint8_t* data, size_t size, | ||
| std::vector<uint8_t>& target); | ||
|
|
||
| bool decompress_frame(size_t frame_size = BUFSIZ) { | ||
| return impl_->decompress_frame(frame_size); | ||
| } | ||
|
|
||
| size_t uncompressed_size() const { return impl_->uncompressed_size(); } | ||
|
|
||
| compression_type type() const { return impl_->type(); } | ||
|
|
||
| static std::vector<uint8_t> | ||
| decompress(compression_type type, const uint8_t* data, size_t size) { | ||
| std::vector<uint8_t> target; | ||
| block_decompressor bd(type, data, size, target); | ||
| bd.decompress_frame(bd.uncompressed_size()); | ||
| return target; | ||
| } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual bool decompress_frame(size_t frame_size) = 0; | ||
| virtual size_t uncompressed_size() const = 0; | ||
|
|
||
| virtual compression_type type() const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
| #include <vector> | ||
|
|
||
| #include "logger.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class filesystem_writer; | ||
| class inode; | ||
| class os_access; | ||
| class progress; | ||
|
|
||
| class block_manager { | ||
| public: | ||
| struct config { | ||
| config(); | ||
|
|
||
| std::vector<size_t> blockhash_window_size; | ||
| unsigned window_increment_shift; | ||
| size_t memory_limit; | ||
| unsigned block_size_bits; | ||
| }; | ||
|
|
||
| block_manager(logger& lgr, progress& prog, const config& cfg, | ||
| std::shared_ptr<os_access> os, filesystem_writer& fsw); | ||
|
|
||
| void add_inode(std::shared_ptr<inode> ino) { impl_->add_inode(ino); } | ||
|
|
||
| void finish_blocks() { impl_->finish_blocks(); } | ||
|
|
||
| size_t total_size() const { return impl_->total_size(); } | ||
|
|
||
| size_t total_blocks() const { return impl_->total_blocks(); } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual void add_inode(std::shared_ptr<inode> ino) = 0; | ||
| virtual void finish_blocks() = 0; | ||
| virtual size_t total_size() const = 0; | ||
| virtual size_t total_blocks() const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| static const unsigned MIN_BLOCK_BITS_SIZE = 12; | ||
| static const unsigned MAX_BLOCK_BITS_SIZE = 28; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <array> | ||
| #include <ostream> | ||
|
|
||
| #include "logger.h" | ||
| #include "progress.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class console_writer : public logger { | ||
| public: | ||
| console_writer(std::ostream& os, bool is_terminal, size_t width, | ||
| level_type threshold); | ||
|
|
||
| void write(level_type level, const std::string& output) override; | ||
|
|
||
| void update(const progress& p, bool last); | ||
|
|
||
| private: | ||
| void rewind(); | ||
|
|
||
| std::ostream& os_; | ||
| std::mutex mx_; | ||
| std::atomic<level_type> threshold_; | ||
| std::string statebuf_; | ||
| double frac_; | ||
| std::atomic<size_t> counter_{0}; | ||
| const bool show_progress_; | ||
| const size_t width_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <array> | ||
| #include <random> | ||
| #include <stdexcept> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| template <typename T> | ||
| class byte_hash { | ||
| public: | ||
| byte_hash() { | ||
| std::default_random_engine generator; | ||
| std::uniform_int_distribution<T> distribution(0, static_cast<T>(-1)); | ||
|
|
||
| for (size_t i = 0; i < hash_.size(); ++i) { | ||
| hash_[i] = distribution(generator); | ||
| } | ||
| } | ||
|
|
||
| T operator()(uint8_t c) const { return hash_[c]; } | ||
|
|
||
| private: | ||
| std::array<T, std::numeric_limits<uint8_t>::max() + 1> hash_; | ||
| }; | ||
|
|
||
| template <typename T> | ||
| class cyclic_hash { | ||
| public: | ||
| cyclic_hash(size_t window_size, const byte_hash<T>& ch) | ||
| : hash_(0) | ||
| , byte_hash_(ch) { | ||
| if (window_size % hash_bits) { | ||
| throw std::runtime_error("unsupported window size"); | ||
| } | ||
| } | ||
|
|
||
| void reset() { hash_ = 0; } | ||
|
|
||
| void update(uint8_t outbyte, uint8_t inbyte) { | ||
| hash_ = rol(hash_) ^ byte_hash_(outbyte) ^ byte_hash_(inbyte); | ||
| } | ||
|
|
||
| void update(uint8_t inbyte) { hash_ = rol(hash_) ^ byte_hash_(inbyte); } | ||
|
|
||
| T operator()() const { return hash_; } | ||
|
|
||
| private: | ||
| static const size_t hash_bits = 8 * sizeof(T); | ||
|
|
||
| inline T rol(T x) const { return (x << 1) | (x >> (hash_bits - 1)); } | ||
|
|
||
| T hash_; | ||
| const byte_hash<T>& byte_hash_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,183 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <array> | ||
| #include <functional> | ||
| #include <memory> | ||
| #include <vector> | ||
|
|
||
| #include <sys/stat.h> | ||
|
|
||
| #include <folly/Range.h> | ||
|
|
||
| #include "file_interface.h" | ||
| #include "fstypes.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class file; | ||
| class link; | ||
| class dir; | ||
| class inode; | ||
| class os_access; | ||
| class progress; | ||
|
|
||
| class entry_visitor { | ||
| public: | ||
| virtual ~entry_visitor() = default; | ||
| virtual void visit(file* p) = 0; | ||
| virtual void visit(link* p) = 0; | ||
| virtual void visit(dir* p) = 0; | ||
| }; | ||
|
|
||
| class entry : public file_interface { | ||
| public: | ||
| enum type_t { E_FILE, E_DIR, E_LINK }; | ||
|
|
||
| entry(const std::string& name, std::shared_ptr<entry> parent, | ||
| const struct ::stat& st); | ||
|
|
||
| void scan(os_access& os, progress& prog); | ||
| bool has_parent() const; | ||
| std::shared_ptr<entry> parent() const; | ||
| void set_name(const std::string& name); | ||
| void set_name_offset(size_t offset); | ||
| std::string path() const override; | ||
| const std::string& name() const override { return name_; } | ||
| size_t size() const override { return stat_.st_size; } | ||
| virtual type_t type() const = 0; | ||
| std::string type_string() const override; | ||
| virtual size_t total_size() const; | ||
| virtual void walk(std::function<void(entry*)> const& f); | ||
| virtual void walk(std::function<void(const entry*)> const& f) const; | ||
| void pack(dir_entry& de) const; | ||
| void pack(dir_entry_ug& de) const; | ||
| void pack(dir_entry_ug_time& de) const; | ||
| virtual void accept(entry_visitor& v, bool preorder = false) = 0; | ||
| virtual uint32_t inode_num() const = 0; | ||
|
|
||
| protected: | ||
| virtual void pack_specific(dir_entry& de) const = 0; | ||
| virtual void scan(os_access& os, const std::string& p, progress& prog) = 0; | ||
|
|
||
| private: | ||
| std::string name_; | ||
| std::weak_ptr<entry> parent_; | ||
| struct ::stat stat_; | ||
| uint32_t name_offset_; | ||
| }; | ||
|
|
||
| class file : public entry { | ||
| public: | ||
| file(const std::string& name, std::shared_ptr<entry> parent, | ||
| const struct ::stat& st, bool with_similarity) | ||
| : entry(name, parent, st) | ||
| , with_similarity_(with_similarity) {} | ||
|
|
||
| type_t type() const override; | ||
| folly::StringPiece hash() const; | ||
| void set_inode(std::shared_ptr<inode> ino); | ||
| std::shared_ptr<inode> get_inode() const; | ||
| void accept(entry_visitor& v, bool preorder) override; | ||
| uint32_t inode_num() const override; | ||
| uint32_t similarity_hash() const { return similarity_hash_; } | ||
|
|
||
| protected: | ||
| void pack_specific(dir_entry& de) const override; | ||
| void scan(os_access& os, const std::string& p, progress& prog) override; | ||
|
|
||
| private: | ||
| uint32_t similarity_hash_{0}; | ||
| const bool with_similarity_; | ||
| std::array<char, 20> hash_{0}; | ||
| std::shared_ptr<inode> inode_; | ||
| }; | ||
|
|
||
| class dir : public entry { | ||
| public: | ||
| using entry::entry; | ||
|
|
||
| type_t type() const override; | ||
| void add(std::shared_ptr<entry> e); | ||
| size_t total_size() const override; | ||
| void walk(std::function<void(entry*)> const& f) override; | ||
| void walk(std::function<void(const entry*)> const& f) const override; | ||
| void accept(entry_visitor& v, bool preorder) override; | ||
| void sort(); | ||
| void set_offset(size_t offset); | ||
| void set_inode(uint32_t inode); | ||
| virtual size_t packed_size() const = 0; | ||
| virtual void | ||
| pack(uint8_t* buf, | ||
| std::function<void(const entry* e, size_t offset)> const& offset_cb) | ||
| const = 0; | ||
| virtual size_t packed_entry_size() const = 0; | ||
| virtual void pack_entry(uint8_t* buf) const = 0; | ||
| uint32_t inode_num() const override { return inode_; } | ||
|
|
||
| protected: | ||
| void pack_specific(dir_entry& de) const override; | ||
| void scan(os_access& os, const std::string& p, progress& prog) override; | ||
|
|
||
| using entry_ptr = std::shared_ptr<entry>; | ||
|
|
||
| std::vector<std::shared_ptr<entry>> entries_; | ||
| uint32_t offset_ = 0; | ||
| uint32_t inode_ = 0; | ||
| }; | ||
|
|
||
| class link : public entry { | ||
| public: | ||
| using entry::entry; | ||
|
|
||
| type_t type() const override; | ||
| const std::string& linkname() const; | ||
| void set_offset(size_t offset); | ||
| void set_inode(uint32_t inode); | ||
| void accept(entry_visitor& v, bool preorder) override; | ||
| uint32_t inode_num() const override { return inode_; } | ||
|
|
||
| protected: | ||
| void pack_specific(dir_entry& de) const override; | ||
| void scan(os_access& os, const std::string& p, progress& prog) override; | ||
|
|
||
| private: | ||
| std::string link_; | ||
| uint32_t offset_ = 0; | ||
| uint32_t inode_ = 0; | ||
| }; | ||
|
|
||
| class entry_factory { | ||
| public: | ||
| static std::shared_ptr<entry_factory> | ||
| create(bool no_owner = false, bool no_time = false, | ||
| bool with_similarity = false); | ||
|
|
||
| virtual ~entry_factory() = default; | ||
|
|
||
| virtual std::shared_ptr<entry> | ||
| create(os_access& os, const std::string& name, | ||
| std::shared_ptr<entry> parent = std::shared_ptr<entry>()) = 0; | ||
| virtual dir_entry_type de_type() const = 0; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <string> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class file_interface { | ||
| public: | ||
| virtual ~file_interface() = default; | ||
|
|
||
| virtual std::string path() const = 0; | ||
| virtual const std::string& name() const = 0; | ||
| virtual std::string type_string() const = 0; | ||
| virtual size_t size() const = 0; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <functional> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class file_vector { | ||
| public: | ||
| virtual ~file_vector() = default; | ||
| virtual const file_interface* operator[](size_t i) const = 0; | ||
| virtual size_t size() const = 0; | ||
| virtual void | ||
| sort(std::function<bool(const file_interface* a, | ||
| const file_interface* b)> const& less) = 0; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,171 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <exception> | ||
| #include <functional> | ||
| #include <memory> | ||
| #include <ostream> | ||
| #include <string> | ||
|
|
||
| #include <sys/stat.h> | ||
| #include <sys/statvfs.h> | ||
| #include <sys/types.h> | ||
|
|
||
| #include "logger.h" | ||
| #include "mmif.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| struct iovec_read_buf; | ||
|
|
||
| class error : public std::exception { | ||
| public: | ||
| error(const std::string& str, int err_no) noexcept | ||
| : what_(str) | ||
| , errno_(err_no) {} | ||
|
|
||
| error(const error& e) noexcept | ||
| : what_(e.what_) | ||
| , errno_(e.errno_) {} | ||
|
|
||
| error& operator=(const error& e) noexcept { | ||
| if (&e != this) { | ||
| what_ = e.what_; | ||
| errno_ = e.errno_; | ||
| } | ||
| return *this; | ||
| } | ||
|
|
||
| const char* what() const noexcept override { return what_.c_str(); } | ||
|
|
||
| int get_errno() const { return errno_; } | ||
|
|
||
| private: | ||
| std::string what_; | ||
| int errno_; | ||
| }; | ||
|
|
||
| struct block_cache_options; | ||
| struct dir_entry; | ||
| struct directory; | ||
|
|
||
| class filesystem_writer; | ||
| class progress; | ||
|
|
||
| class filesystem { | ||
| public: | ||
| filesystem(logger& lgr, std::shared_ptr<mmif> mm, | ||
| const block_cache_options& bc_options, | ||
| const struct ::stat* stat_defaults = nullptr, | ||
| int inode_offset = 0); | ||
|
|
||
| static void rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm, | ||
| filesystem_writer& writer); | ||
|
|
||
| static void identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os); | ||
|
|
||
| void dump(std::ostream& os) const { impl_->dump(os); } | ||
|
|
||
| void walk(std::function<void(const dir_entry*)> const& func) { | ||
| impl_->walk(func); | ||
| } | ||
|
|
||
| const dir_entry* find(const char* path) const { return impl_->find(path); } | ||
|
|
||
| const dir_entry* find(int inode) const { return impl_->find(inode); } | ||
|
|
||
| const dir_entry* find(int inode, const char* name) const { | ||
| return impl_->find(inode, name); | ||
| } | ||
|
|
||
| int getattr(const dir_entry* de, struct ::stat* stbuf) const { | ||
| return impl_->getattr(de, stbuf); | ||
| } | ||
|
|
||
| int access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const { | ||
| return impl_->access(de, mode, uid, gid); | ||
| } | ||
|
|
||
| const directory* opendir(const dir_entry* de) const { | ||
| return impl_->opendir(de); | ||
| } | ||
|
|
||
| const dir_entry* | ||
| readdir(const directory* d, size_t offset, std::string* name) const { | ||
| return impl_->readdir(d, offset, name); | ||
| } | ||
|
|
||
| size_t dirsize(const directory* d) const { return impl_->dirsize(d); } | ||
|
|
||
| int readlink(const dir_entry* de, char* buf, size_t size) const { | ||
| return impl_->readlink(de, buf, size); | ||
| } | ||
|
|
||
| int readlink(const dir_entry* de, std::string* buf) const { | ||
| return impl_->readlink(de, buf); | ||
| } | ||
|
|
||
| int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); } | ||
|
|
||
| int open(const dir_entry* de) const { return impl_->open(de); } | ||
|
|
||
| ssize_t read(uint32_t inode, char* buf, size_t size, off_t offset) const { | ||
| return impl_->read(inode, buf, size, offset); | ||
| } | ||
|
|
||
| ssize_t | ||
| readv(uint32_t inode, iovec_read_buf& buf, size_t size, off_t offset) const { | ||
| return impl_->readv(inode, buf, size, offset); | ||
| } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual void dump(std::ostream& os) const = 0; | ||
| virtual void | ||
| walk(std::function<void(const dir_entry*)> const& func) const = 0; | ||
| virtual const dir_entry* find(const char* path) const = 0; | ||
| virtual const dir_entry* find(int inode) const = 0; | ||
| virtual const dir_entry* find(int inode, const char* name) const = 0; | ||
| virtual int getattr(const dir_entry* de, struct ::stat* stbuf) const = 0; | ||
| virtual int | ||
| access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0; | ||
| virtual const directory* opendir(const dir_entry* de) const = 0; | ||
| virtual const dir_entry* | ||
| readdir(const directory* d, size_t offset, std::string* name) const = 0; | ||
| virtual size_t dirsize(const directory* d) const = 0; | ||
| virtual int readlink(const dir_entry* de, char* buf, size_t size) const = 0; | ||
| virtual int readlink(const dir_entry* de, std::string* buf) const = 0; | ||
| virtual int statvfs(struct ::statvfs* stbuf) const = 0; | ||
| virtual int open(const dir_entry* de) const = 0; | ||
| virtual ssize_t | ||
| read(uint32_t inode, char* buf, size_t size, off_t offset) const = 0; | ||
| virtual ssize_t readv(uint32_t inode, iovec_read_buf& buf, size_t size, | ||
| off_t offset) const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <ostream> | ||
| #include <vector> | ||
|
|
||
| #include "fstypes.h" | ||
| #include "worker_group.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class block_compressor; | ||
| class logger; | ||
| class progress; | ||
|
|
||
| class section { | ||
| public: | ||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| // TODO | ||
| }; | ||
|
|
||
| section(std::unique_ptr<impl>&& i); | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
|
|
||
| class filesystem_writer { | ||
| public: | ||
| filesystem_writer(std::ostream& os, logger& lgr, worker_group& wg, | ||
| progress& prog, const block_compressor& bc, | ||
| size_t max_queue_size); | ||
|
|
||
| // section create_block(); | ||
| // section create_metadata(); | ||
|
|
||
| // void add_section(section&& section); | ||
|
|
||
| void write_block(std::vector<uint8_t>&& data) { | ||
| impl_->write_block(std::move(data)); | ||
| } | ||
|
|
||
| void write_metadata(std::vector<uint8_t>&& data) { | ||
| impl_->write_metadata(std::move(data)); | ||
| } | ||
|
|
||
| void flush() { impl_->flush(); } | ||
|
|
||
| size_t size() const { return impl_->size(); } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual void write_block(std::vector<uint8_t>&& data) = 0; | ||
| virtual void write_metadata(std::vector<uint8_t>&& data) = 0; | ||
| virtual void flush() = 0; | ||
| virtual size_t size() const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,234 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cstdint> | ||
| #include <iostream> | ||
| #include <stdexcept> | ||
|
|
||
| #include "block_compressor.h" // TODO: or the other way round? | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| /************************* | ||
| --------------------- | ||
| file_header | ||
| --------------------- | ||
| section_header [BLOCK] | ||
| block 0 | ||
| --------------------- | ||
| section_header [BLOCK] | ||
| block n | ||
| --------------------- | ||
| section_header [METADATA] | ||
| metadata | ||
| --------------------- | ||
| TODO: better description ;-) | ||
| metadata: | ||
| links_table -> vector<uint8_t> // links first, potential re-use for names | ||
| table :-) | ||
| names_table -> vector<uint8_t> | ||
| inode_table -> vector<chunk> // sizeof(chunk) aligned (64-bit) | ||
| directories... | ||
| inode_index: inode -> dir_entry offset | ||
| chunk_index: (inode - file_inode_offset) -> chunk offset | ||
| *************************/ | ||
|
|
||
| constexpr uint8_t MAJOR_VERSION = 0; | ||
| constexpr uint8_t MINOR_VERSION = 0; | ||
|
|
||
| enum class section_type : uint16_t { | ||
| BLOCK = 0, | ||
| // Optionally compressed block data. | ||
|
|
||
| METADATA = 1, | ||
| // Optionally compressed metadata. This is just | ||
| // another section list. | ||
|
|
||
| META_TABLEDATA = 2, | ||
| // This is raw data that is indexed from the other | ||
| // sections by offset. It contains all names, link | ||
| // targets and chunk lists. | ||
| // Names are referenced by offset/length. Link targets | ||
| // are referenced by offset and actually start with a | ||
| // uint16_t storing the length of the remaining string. | ||
| // Names are free to share data with links targets. | ||
| // Chunk lists are just a vector of chunks, aligned to | ||
| // the size of a chunk for efficient access. | ||
|
|
||
| META_INODE_INDEX = 3, | ||
| // The inode index is a vector of offsets to all inodes | ||
| // (i.e. dir_entry* structs). The vector may be offset | ||
| // by inode_index_offset if inodes do not start at zero. | ||
|
|
||
| META_CHUNK_INDEX = 4, | ||
| // The chunk index is a vector of offsets to the start | ||
| // of the chunk list for file inodes. As all link and | ||
| // directory inodes precede all file inodes, this vector | ||
| // is offset by chunk_index_offset. There is one more | ||
| // element in the chunk index vector that holds an offset | ||
| // to the end of the chunk lists. | ||
|
|
||
| META_DIRECTORIES = 5, | ||
| // All directory structures, in top-down order. These | ||
| // are referenced from within the inode index. The root | ||
| // directory also has its dir_entry* struct stored here. | ||
|
|
||
| META_CONFIG = 6, | ||
| // Configuration data for this filesystem. Defines the | ||
| // type of dir_entry* structure being used as well as | ||
| // the block size which is needed for working with the | ||
| // chunk lists. Also defines inode offsets being used | ||
| // and the total inode count (for out-of-bounds checks). | ||
| }; | ||
|
|
||
| enum class dir_entry_type : uint8_t { | ||
| DIR_ENTRY = 0, // filesystem uses dir_entry | ||
| DIR_ENTRY_UG = 1, // filesystem uses dir_entry_ug | ||
| DIR_ENTRY_UG_TIME = 2 // filesystem uses dir_entry_ug_time | ||
| }; | ||
|
|
||
| struct file_header { | ||
| char magic[6]; // "DWARFS" | ||
| uint8_t major; // major version | ||
| uint8_t minor; // minor version | ||
| }; | ||
|
|
||
| struct section_header { | ||
| section_type type; | ||
| compression_type compression; | ||
| uint8_t unused; | ||
| uint32_t length; | ||
|
|
||
| std::string to_string() const; | ||
| void dump(std::ostream& os) const; | ||
| }; | ||
|
|
||
| struct dir_entry { // 128 bits (16 bytes) / entry | ||
| uint32_t name_offset; | ||
| uint16_t name_size; | ||
| uint16_t mode; | ||
| uint32_t inode; // dirs start at 1, then links, then files | ||
| union { | ||
| uint32_t file_size; // for files only | ||
| uint32_t offset; // for dirs, offset to directory, | ||
| } u; // for links, offset to content in link table | ||
| }; | ||
|
|
||
| struct dir_entry_ug { // 160 bits (20 bytes) / entry | ||
| dir_entry de; | ||
| uint16_t owner; | ||
| uint16_t group; | ||
| }; | ||
|
|
||
| struct dir_entry_ug_time { // 256 bits (32 bytes) / entry | ||
| dir_entry_ug ug; | ||
| uint32_t atime; // yeah, I know... in a few years we can switch to 64 bits | ||
| uint32_t mtime; | ||
| uint32_t ctime; | ||
| }; | ||
|
|
||
| struct directory { | ||
| uint32_t count; | ||
| uint32_t self; | ||
| uint32_t parent; | ||
| union { | ||
| dir_entry entries[1]; | ||
| dir_entry_ug entries_ug[1]; | ||
| dir_entry_ug_time entries_ug_time[1]; | ||
| } u; | ||
| }; | ||
|
|
||
| struct meta_config { | ||
| uint8_t block_size_bits; | ||
| dir_entry_type de_type; | ||
| uint16_t unused; | ||
| uint32_t inode_count; | ||
| uint64_t orig_fs_size; | ||
| uint32_t chunk_index_offset; | ||
| uint32_t inode_index_offset; | ||
| }; | ||
|
|
||
| using chunk_type = uint64_t; | ||
|
|
||
| template <unsigned BlockSizeBits> | ||
| struct chunk_access { | ||
| static_assert(BlockSizeBits < 32, "invalid value for BlockSizeBits"); | ||
|
|
||
| static const unsigned block_bits = 64 - 2 * BlockSizeBits; | ||
| static const unsigned block_shift = 64 - block_bits; | ||
| static const chunk_type block_mask = | ||
| (static_cast<chunk_type>(1) << block_bits) - 1; | ||
| static const unsigned offset_shift = BlockSizeBits; | ||
| static const chunk_type offset_mask = | ||
| (static_cast<chunk_type>(1) << BlockSizeBits) - 1; | ||
| static const unsigned size_shift = 0; | ||
| static const chunk_type size_mask = | ||
| (static_cast<chunk_type>(1) << BlockSizeBits) - 1; | ||
| static const chunk_type max_size = size_mask + 1; | ||
|
|
||
| static void set(chunk_type& chunk, size_t block, size_t offset, size_t size) { | ||
| if (block > block_mask) { | ||
| std::cerr << "block out of range: " << block << " > " << block_mask | ||
| << " [" << block_bits << "]"; | ||
| throw std::runtime_error("block out of range"); | ||
| } | ||
|
|
||
| if (offset > offset_mask) { | ||
| std::cerr << "offset out of range: " << offset << " > " << offset_mask | ||
| << " [" << block_bits << "]"; | ||
| throw std::runtime_error("offset out of range"); | ||
| } | ||
|
|
||
| if (size > max_size or size == 0) { | ||
| std::cerr << "size out of range: " << size << " > " << size_mask << " [" | ||
| << block_bits << "]"; | ||
| throw std::runtime_error("size out of range"); | ||
| } | ||
|
|
||
| chunk = (static_cast<chunk_type>(block) << block_shift) | | ||
| (static_cast<chunk_type>(offset) << offset_shift) | | ||
| (static_cast<chunk_type>(size - 1) << size_shift); | ||
| } | ||
|
|
||
| static size_t block(chunk_type chunk) { | ||
| return (chunk >> block_shift) & block_mask; | ||
| }; | ||
|
|
||
| static size_t offset(chunk_type chunk) { | ||
| return (chunk >> offset_shift) & offset_mask; | ||
| }; | ||
|
|
||
| static size_t size(chunk_type chunk) { | ||
| return ((chunk >> size_shift) & size_mask) + 1; | ||
| }; | ||
| }; | ||
|
|
||
| std::string get_compression_name(compression_type type); | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #include <ostream> | ||
| #include <vector> | ||
|
|
||
| #include "fstypes.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class file; | ||
| class file_interface; | ||
|
|
||
| class inode : public file_interface { | ||
| public: | ||
| virtual void set_file(const file* f) = 0; | ||
| virtual void set_num(uint32_t num) = 0; | ||
| virtual uint32_t num() const = 0; | ||
| virtual uint32_t similarity_hash() const = 0; | ||
| virtual const file_interface* any() const = 0; // TODO | ||
| virtual void add_chunk(size_t block, size_t offset, size_t size) = 0; | ||
| virtual const std::vector<chunk_type>& chunks() const = 0; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <unordered_map> | ||
| #include <vector> | ||
|
|
||
| #include "cyclic_hash.h" | ||
| #include "logger.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| template <typename LoggerPolicy, typename HashType> | ||
| class inode_hasher { | ||
| public: | ||
| using result_type = | ||
| typename std::unordered_map<size_t, std::vector<HashType>>; | ||
|
|
||
| inode_hasher(logger& lgr, byte_hash<HashType>& byte_hasher, | ||
| const std::vector<size_t>& blockhash_window_size) | ||
| : byte_hasher_(byte_hasher) | ||
| , window_(blockhash_window_size) | ||
| , log_(lgr) {} | ||
|
|
||
| void operator()(result_type& m, const uint8_t* data, size_t size) const { | ||
| auto tt = log_.timed_trace(); | ||
|
|
||
| for (size_t wsize : window_) { | ||
| if (size >= wsize) { | ||
| hashit(m[wsize], wsize, data, size); | ||
| } | ||
| } | ||
|
|
||
| tt << "hashed " << size << " bytes"; | ||
| } | ||
|
|
||
| private: | ||
| void hashit(std::vector<HashType>& vec, size_t window, const uint8_t* data, | ||
| size_t size) const { | ||
| cyclic_hash<HashType> hasher(window, byte_hasher_); | ||
|
|
||
| vec.clear(); | ||
| vec.reserve(size - window); | ||
|
|
||
| size_t i = 0; | ||
|
|
||
| while (i < window) { | ||
| hasher.update(data[i++]); | ||
| } | ||
|
|
||
| vec.push_back(hasher()); | ||
|
|
||
| while (i < size) { | ||
| hasher.update(data[i - window], data[i]); | ||
| vec.push_back(hasher()); | ||
| ++i; | ||
| } | ||
| } | ||
|
|
||
| byte_hash<HashType>& byte_hasher_; | ||
| const std::vector<size_t> window_; | ||
| log_proxy<LoggerPolicy> log_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <functional> | ||
| #include <memory> | ||
| #include <ostream> | ||
|
|
||
| #include "file_interface.h" | ||
| #include "inode.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class script; | ||
|
|
||
| class inode_manager { | ||
| public: | ||
| static std::shared_ptr<inode_manager> create(unsigned block_size_bits); | ||
|
|
||
| virtual ~inode_manager() = default; | ||
| virtual std::shared_ptr<inode> create() = 0; | ||
| virtual size_t count() const = 0; | ||
| virtual size_t block_size() const = 0; | ||
| virtual unsigned block_size_bits() const = 0; | ||
| virtual size_t chunk_size() const = 0; | ||
| virtual void order_inodes() = 0; | ||
| virtual void order_inodes(std::shared_ptr<script> scr) = 0; | ||
| virtual void order_inodes_by_similarity() = 0; | ||
| virtual void number_inodes(size_t first_no) = 0; | ||
| virtual void for_each_inode( | ||
| std::function<void(std::shared_ptr<inode> const&)> const& fn) const = 0; | ||
|
|
||
| private: | ||
| template <unsigned BlockSizeBits> | ||
| static std::shared_ptr<inode_manager> create_(unsigned block_size_bits); | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
|
|
||
| #include <sys/uio.h> | ||
|
|
||
| #include <folly/small_vector.h> | ||
|
|
||
| #include "block_cache.h" | ||
| #include "fstypes.h" | ||
| #include "logger.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| struct iovec_read_buf { | ||
| // This covers more than 95% of reads | ||
| static constexpr size_t inline_storage = 16; | ||
|
|
||
| folly::small_vector<struct ::iovec, inline_storage> buf; | ||
| folly::small_vector<block_range, inline_storage> ranges; | ||
| }; | ||
|
|
||
| class inode_reader { | ||
| public: | ||
| inode_reader() = default; | ||
|
|
||
| inode_reader(logger& lgr, block_cache&& bc, unsigned block_size_bits); | ||
|
|
||
| inode_reader& operator=(inode_reader&&) = default; | ||
|
|
||
| ssize_t read(char* buf, size_t size, off_t offset, const chunk_type* chunk, | ||
| size_t chunk_count) const { | ||
| return impl_->read(buf, size, offset, chunk, chunk_count); | ||
| } | ||
|
|
||
| ssize_t readv(iovec_read_buf& buf, size_t size, off_t offset, | ||
| const chunk_type* chunk, size_t chunk_count) const { | ||
| return impl_->readv(buf, size, offset, chunk, chunk_count); | ||
| } | ||
|
|
||
| void dump(std::ostream& os, const std::string& indent, | ||
| const chunk_type* chunk, size_t chunk_count) const { | ||
| impl_->dump(os, indent, chunk, chunk_count); | ||
| } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual ssize_t read(char* buf, size_t size, off_t offset, | ||
| const chunk_type* chunk, size_t chunk_count) const = 0; | ||
| virtual ssize_t | ||
| readv(iovec_read_buf& buf, size_t size, off_t offset, | ||
| const chunk_type* chunk, size_t chunk_count) const = 0; | ||
| virtual void dump(std::ostream& os, const std::string& indent, | ||
| const chunk_type* chunk, size_t chunk_count) const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,317 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <atomic> | ||
| #include <chrono> | ||
| #include <iostream> | ||
| #include <memory> | ||
| #include <mutex> | ||
| #include <sstream> | ||
| #include <string> | ||
| #include <tuple> | ||
| #include <type_traits> | ||
|
|
||
| #include "util.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class logger { | ||
| public: | ||
| enum level_type : unsigned { ERROR, WARN, INFO, DEBUG, TRACE }; | ||
|
|
||
| virtual ~logger() = default; | ||
|
|
||
| virtual void write(level_type level, const std::string& output) = 0; | ||
|
|
||
| const std::string& policy_name() const { return policy_name_; } | ||
|
|
||
| template <class Policy> | ||
| void set_policy() // TODO: construction time arg? | ||
| { | ||
| policy_name_ = Policy::name(); | ||
| } | ||
|
|
||
| void set_policy_name(const std::string& name) // TODO: construction time arg? | ||
| { | ||
| policy_name_ = name; | ||
| } | ||
|
|
||
| static level_type parse_level(const std::string& level); | ||
|
|
||
| private: | ||
| std::string policy_name_; // TODO: const? | ||
| }; | ||
|
|
||
| class stream_logger : public logger { | ||
| public: | ||
| stream_logger(std::ostream& os = std::cerr, level_type threshold = WARN); | ||
|
|
||
| void write(level_type level, const std::string& output) override; | ||
|
|
||
| void set_threshold(level_type threshold); | ||
|
|
||
| private: | ||
| std::ostream& os_; | ||
| std::mutex mx_; | ||
| std::atomic<level_type> threshold_; | ||
| }; | ||
|
|
||
| class level_logger { | ||
| public: | ||
| level_logger(logger& lgr, logger::level_type level) | ||
| : data_(std::make_unique<data>(lgr, level)) {} | ||
|
|
||
| level_logger(level_logger&& ll) | ||
| : data_(std::move(ll.data_)) {} | ||
|
|
||
| ~level_logger() { data_->lgr.write(data_->level, data_->oss.str()); } | ||
|
|
||
| template <typename T> | ||
| level_logger& operator<<(const T& val) { | ||
| data_->oss << val; | ||
| return *this; | ||
| } | ||
|
|
||
| private: | ||
| struct data { | ||
| data(logger& lgr, logger::level_type level) | ||
| : lgr(lgr) | ||
| , level(level) {} | ||
|
|
||
| logger& lgr; | ||
| std::ostringstream oss; | ||
| const logger::level_type level; | ||
| }; | ||
|
|
||
| std::unique_ptr<data> data_; | ||
| }; | ||
|
|
||
| class timed_level_logger { | ||
| public: | ||
| timed_level_logger(logger& lgr, logger::level_type level) | ||
| : data_(std::make_unique<data>(lgr, level)) {} | ||
|
|
||
| timed_level_logger(timed_level_logger&& ll) | ||
| : data_(std::move(ll.data_)) {} | ||
|
|
||
| ~timed_level_logger() { | ||
| std::chrono::duration<double> sec = | ||
| std::chrono::high_resolution_clock::now() - data_->start_time; | ||
| data_->oss << " [" << time_with_unit(sec.count()) << "]"; | ||
| data_->lgr.write(data_->level, data_->oss.str()); | ||
| } | ||
|
|
||
| template <typename T> | ||
| timed_level_logger& operator<<(const T& val) { | ||
| data_->oss << val; | ||
| return *this; | ||
| } | ||
|
|
||
| private: | ||
| struct data { | ||
| data(logger& lgr, logger::level_type level) | ||
| : lgr(lgr) | ||
| , level(level) | ||
| , start_time(std::chrono::high_resolution_clock::now()) {} | ||
|
|
||
| logger& lgr; | ||
| std::ostringstream oss; | ||
| const logger::level_type level; | ||
| std::chrono::time_point<std::chrono::high_resolution_clock> start_time; | ||
| }; | ||
|
|
||
| std::unique_ptr<data> data_; | ||
| }; | ||
|
|
||
| class no_logger { | ||
| public: | ||
| no_logger(logger&, logger::level_type) {} | ||
|
|
||
| template <typename T> | ||
| no_logger& operator<<(const T&) { | ||
| return *this; | ||
| } | ||
| }; | ||
|
|
||
| namespace detail { | ||
|
|
||
| template <bool LoggingEnabled> | ||
| using logger_type = | ||
| typename std::conditional<LoggingEnabled, level_logger, no_logger>::type; | ||
|
|
||
| template <bool LoggingEnabled> | ||
| using timed_logger_type = | ||
| typename std::conditional<LoggingEnabled, timed_level_logger, | ||
| no_logger>::type; | ||
| } // namespace detail | ||
|
|
||
| template <unsigned MinLogLevel> | ||
| class MinimumLogLevelPolicy { | ||
| public: | ||
| template <unsigned Level> | ||
| using logger = detail::logger_type<Level <= MinLogLevel>; | ||
|
|
||
| template <unsigned Level> | ||
| using timed_logger = detail::timed_logger_type<Level <= MinLogLevel>; | ||
| }; | ||
|
|
||
| template <typename LogPolicy> | ||
| class log_proxy { | ||
| public: | ||
| log_proxy(logger& lgr) | ||
| : lgr_(lgr) {} | ||
|
|
||
| auto error() const { | ||
| return | ||
| typename LogPolicy::template logger<logger::ERROR>(lgr_, logger::ERROR); | ||
| } | ||
|
|
||
| auto warn() const { | ||
| return | ||
| typename LogPolicy::template logger<logger::WARN>(lgr_, logger::WARN); | ||
| } | ||
|
|
||
| auto info() const { | ||
| return | ||
| typename LogPolicy::template logger<logger::INFO>(lgr_, logger::INFO); | ||
| } | ||
|
|
||
| auto debug() const { | ||
| return | ||
| typename LogPolicy::template logger<logger::DEBUG>(lgr_, logger::DEBUG); | ||
| } | ||
|
|
||
| auto trace() const { | ||
| return | ||
| typename LogPolicy::template logger<logger::TRACE>(lgr_, logger::TRACE); | ||
| } | ||
|
|
||
| auto timed_error() const { | ||
| return typename LogPolicy::template timed_logger<logger::ERROR>( | ||
| lgr_, logger::ERROR); | ||
| } | ||
|
|
||
| auto timed_warn() const { | ||
| return typename LogPolicy::template timed_logger<logger::WARN>( | ||
| lgr_, logger::WARN); | ||
| } | ||
|
|
||
| auto timed_info() const { | ||
| return typename LogPolicy::template timed_logger<logger::INFO>( | ||
| lgr_, logger::INFO); | ||
| } | ||
|
|
||
| auto timed_debug() const { | ||
| return typename LogPolicy::template timed_logger<logger::DEBUG>( | ||
| lgr_, logger::DEBUG); | ||
| } | ||
|
|
||
| auto timed_trace() const { | ||
| return typename LogPolicy::template timed_logger<logger::TRACE>( | ||
| lgr_, logger::TRACE); | ||
| } | ||
|
|
||
| private: | ||
| logger& lgr_; | ||
| }; | ||
|
|
||
| class prod_logger_policy : public MinimumLogLevelPolicy<logger::INFO> { | ||
| public: | ||
| static std::string name() { return "prod"; } | ||
| }; | ||
|
|
||
| class debug_logger_policy : public MinimumLogLevelPolicy<logger::TRACE> { | ||
| public: | ||
| static std::string name() { return "debug"; } | ||
| }; | ||
|
|
||
| using logger_policies = std::tuple<debug_logger_policy, prod_logger_policy>; | ||
|
|
||
| template <class T> | ||
| struct unique_ptr_policy { | ||
| using return_type = std::unique_ptr<T>; | ||
|
|
||
| template <class U, class... Args> | ||
| static return_type create(Args&&... args) { | ||
| return std::make_unique<U>(std::forward<Args>(args)...); | ||
| } | ||
| }; | ||
|
|
||
| template <class T> | ||
| struct shared_ptr_policy { | ||
| using return_type = std::shared_ptr<T>; | ||
|
|
||
| template <class U, class... Args> | ||
| static return_type create(Args&&... args) { | ||
| return std::make_shared<U>(std::forward<Args>(args)...); | ||
| } | ||
| }; | ||
|
|
||
| template <template <class> class T, class CreatePolicy, class LoggerPolicyList, | ||
| size_t N> | ||
| struct logging_class_factory { | ||
| template <class... Args> | ||
| static typename CreatePolicy::return_type | ||
| create(logger& lgr, Args&&... args) { | ||
| if (std::tuple_element<N - 1, LoggerPolicyList>::type::name() == | ||
| lgr.policy_name()) { | ||
| using obj_type = | ||
| T<typename std::tuple_element<N - 1, LoggerPolicyList>::type>; | ||
| return CreatePolicy::template create<obj_type>( | ||
| lgr, std::forward<Args>(args)...); | ||
| } | ||
|
|
||
| return logging_class_factory<T, CreatePolicy, LoggerPolicyList, | ||
| N - 1>::create(lgr, | ||
| std::forward<Args>(args)...); | ||
| } | ||
| }; | ||
|
|
||
| template <template <class> class T, class CreatePolicy, class LoggerPolicyList> | ||
| struct logging_class_factory<T, CreatePolicy, LoggerPolicyList, 0> { | ||
| template <class... Args> | ||
| static typename CreatePolicy::return_type create(logger& lgr, Args&&...) { | ||
| throw std::runtime_error("no such logger policy: " + lgr.policy_name()); | ||
| } | ||
| }; | ||
|
|
||
| template <class Base, template <class> class T, class LoggerPolicyList, | ||
| class... Args> | ||
| std::unique_ptr<Base> make_unique_logging_object(logger& lgr, Args&&... args) { | ||
| return logging_class_factory< | ||
| T, unique_ptr_policy<Base>, LoggerPolicyList, | ||
| std::tuple_size<LoggerPolicyList>::value>::create(lgr, | ||
| std::forward<Args>( | ||
| args)...); | ||
| } | ||
|
|
||
| template <class Base, template <class> class T, class LoggerPolicyList, | ||
| class... Args> | ||
| std::shared_ptr<Base> make_shared_logging_object(logger& lgr, Args&&... args) { | ||
| return logging_class_factory< | ||
| T, shared_ptr_policy<Base>, LoggerPolicyList, | ||
| std::tuple_size<LoggerPolicyList>::value>::create(lgr, | ||
| std::forward<Args>( | ||
| args)...); | ||
| } | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
|
|
||
| #include "script.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class logger; | ||
|
|
||
| class lua_script : public script { | ||
| public: | ||
| lua_script(logger& lgr, const std::string& file); | ||
| ~lua_script(); | ||
|
|
||
| bool filter(file_interface const& fi) const override; | ||
| void order(file_vector& fvi) const override; | ||
|
|
||
| private: | ||
| class impl; | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,143 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cstdint> | ||
| #include <functional> | ||
| #include <memory> | ||
| #include <vector> | ||
|
|
||
| #include <sys/stat.h> | ||
| #include <sys/statvfs.h> | ||
| #include <sys/types.h> | ||
|
|
||
| #include "fstypes.h" | ||
| #include "logger.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class metadata { | ||
| public: | ||
| metadata() = default; | ||
|
|
||
| metadata(logger& lgr, std::vector<uint8_t>&& data, | ||
| const struct ::stat* defaults, int inode_offset = 0); | ||
|
|
||
| metadata& operator=(metadata&&) = default; | ||
|
|
||
| static void get_stat_defaults(struct ::stat* defaults); | ||
|
|
||
| size_t size() const { return impl_->size(); } | ||
|
|
||
| bool empty() const { return !impl_ || impl_->empty(); } | ||
|
|
||
| size_t block_size() const { return impl_->block_size(); } | ||
|
|
||
| unsigned block_size_bits() const { return impl_->block_size_bits(); } | ||
|
|
||
| void | ||
| dump(std::ostream& os, | ||
| std::function<void(const std::string&, uint32_t)> const& icb) const { | ||
| impl_->dump(os, icb); | ||
| } | ||
|
|
||
| void walk(std::function<void(const dir_entry*)> const& func) const { | ||
| impl_->walk(func); | ||
| } | ||
|
|
||
| const dir_entry* find(const char* path) const { return impl_->find(path); } | ||
|
|
||
| const dir_entry* find(int inode) const { return impl_->find(inode); } | ||
|
|
||
| const dir_entry* find(int inode, const char* name) const { | ||
| return impl_->find(inode, name); | ||
| } | ||
|
|
||
| int getattr(const dir_entry* de, struct ::stat* stbuf) const { | ||
| return impl_->getattr(de, stbuf); | ||
| } | ||
|
|
||
| int access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const { | ||
| return impl_->access(de, mode, uid, gid); | ||
| } | ||
|
|
||
| const directory* opendir(const dir_entry* de) const { | ||
| return impl_->opendir(de); | ||
| } | ||
|
|
||
| const dir_entry* | ||
| readdir(const directory* d, size_t offset, std::string* name) const { | ||
| return impl_->readdir(d, offset, name); | ||
| } | ||
|
|
||
| size_t dirsize(const directory* d) const { return impl_->dirsize(d); } | ||
|
|
||
| int readlink(const dir_entry* de, char* buf, size_t size) const { | ||
| return impl_->readlink(de, buf, size); | ||
| } | ||
|
|
||
| int readlink(const dir_entry* de, std::string* buf) const { | ||
| return impl_->readlink(de, buf); | ||
| } | ||
|
|
||
| int statvfs(struct ::statvfs* stbuf) const { return impl_->statvfs(stbuf); } | ||
|
|
||
| int open(const dir_entry* de) const { return impl_->open(de); } | ||
|
|
||
| const chunk_type* get_chunks(int inode, size_t& num) const { | ||
| return impl_->get_chunks(inode, num); | ||
| } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual size_t size() const = 0; | ||
| virtual bool empty() const = 0; | ||
| virtual size_t block_size() const = 0; | ||
| virtual unsigned block_size_bits() const = 0; | ||
| virtual void dump( | ||
| std::ostream& os, | ||
| std::function<void(const std::string&, uint32_t)> const& icb) const = 0; | ||
| virtual void | ||
| walk(std::function<void(const dir_entry*)> const& func) const = 0; | ||
| virtual const dir_entry* find(const char* path) const = 0; | ||
| virtual const dir_entry* find(int inode) const = 0; | ||
| virtual const dir_entry* find(int inode, const char* name) const = 0; | ||
| virtual int getattr(const dir_entry* de, struct ::stat* stbuf) const = 0; | ||
| virtual int | ||
| access(const dir_entry* de, int mode, uid_t uid, gid_t gid) const = 0; | ||
| virtual const directory* opendir(const dir_entry* de) const = 0; | ||
| virtual const dir_entry* | ||
| readdir(const directory* d, size_t offset, std::string* name) const = 0; | ||
| virtual size_t dirsize(const directory* d) const = 0; | ||
| virtual int readlink(const dir_entry* de, char* buf, size_t size) const = 0; | ||
| virtual int readlink(const dir_entry* de, std::string* buf) const = 0; | ||
| virtual int statvfs(struct ::statvfs* stbuf) const = 0; | ||
| virtual int open(const dir_entry* de) const = 0; | ||
| virtual const chunk_type* get_chunks(int inode, size_t& num) const = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <vector> | ||
|
|
||
| #include <folly/Range.h> | ||
|
|
||
| #include "fstypes.h" | ||
| #include "logger.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class metadata_writer { | ||
| public: | ||
| using const_iterator = std::vector<uint8_t>::const_iterator; | ||
|
|
||
| metadata_writer(logger& lgr, std::vector<uint8_t>& mem, | ||
| size_t section_align = 8); | ||
| void align(size_t align); | ||
| void finish_section(); | ||
| void start_section(section_type type); | ||
| uint8_t* buffer(size_t size); | ||
| void write(const void* data, size_t size); | ||
|
|
||
| const_iterator begin() const { return mem_.begin(); } | ||
|
|
||
| const_iterator section_begin() const { | ||
| return mem_.begin() + section_data_offset(); | ||
| } | ||
|
|
||
| const uint8_t* section_data() const { | ||
| return mem_.data() + section_data_offset(); | ||
| } | ||
|
|
||
| size_t section_data_size() const { | ||
| return mem_.size() - section_data_offset(); | ||
| } | ||
|
|
||
| size_t section_data_offset() const { | ||
| return section_header_offset_ + sizeof(section_header); | ||
| } | ||
|
|
||
| const_iterator end() const { return mem_.end(); } | ||
|
|
||
| size_t offset() const { return mem_.size(); } | ||
|
|
||
| template <typename T> | ||
| void write(const T& obj) { | ||
| write(&obj, sizeof(T)); | ||
| } | ||
|
|
||
| template <typename T> | ||
| void write(const std::vector<T>& vec) { | ||
| if (!vec.empty()) { | ||
| write(vec.data(), sizeof(T) * vec.size()); | ||
| } | ||
| } | ||
|
|
||
| void write(const std::string& str) { | ||
| if (!str.empty()) { | ||
| write(str.data(), str.size()); | ||
| } | ||
| } | ||
|
|
||
| void write(folly::StringPiece str) { | ||
| if (!str.empty()) { | ||
| write(str.data(), str.size()); | ||
| } | ||
| } | ||
|
|
||
| private: | ||
| std::vector<uint8_t>& mem_; | ||
| size_t section_header_offset_; | ||
| const size_t section_align_; | ||
| log_proxy<debug_logger_policy> log_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <string> | ||
|
|
||
| #include "mmif.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class mmap : public mmif { | ||
| public: | ||
| mmap(const std::string& path); | ||
| mmap(const std::string& path, size_t size); | ||
|
|
||
| virtual ~mmap() noexcept; | ||
|
|
||
| private: | ||
| int fd_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <string> | ||
|
|
||
| #include <boost/noncopyable.hpp> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class mmif : public boost::noncopyable { | ||
| public: | ||
| virtual ~mmif() = default; | ||
|
|
||
| const void* get() const { return addr_; } | ||
|
|
||
| template <typename T> | ||
| const T* as(size_t offset = 0) const { | ||
| return reinterpret_cast<const T*>( | ||
| reinterpret_cast<const char*>(const_cast<const void*>(addr_)) + offset); | ||
| } | ||
|
|
||
| size_t size() const { return size_; } | ||
|
|
||
| protected: | ||
| void assign(const void* addr, size_t size) { | ||
| addr_ = addr; | ||
| size_ = size; | ||
| } | ||
|
|
||
| private: | ||
| const void* addr_; | ||
| size_t size_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| struct block_cache_options { | ||
| size_t max_bytes{0}; | ||
| size_t num_workers{0}; | ||
| double decompress_ratio{1.0}; | ||
| }; | ||
|
|
||
| enum class file_order_mode { NONE, PATH, SCRIPT, SIMILARITY }; | ||
|
|
||
| struct scanner_options { | ||
| file_order_mode file_order; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
| #include <string> | ||
|
|
||
| #include <sys/stat.h> | ||
|
|
||
| #include "mmif.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class dir_reader { | ||
| public: | ||
| virtual ~dir_reader() = default; | ||
|
|
||
| virtual bool read(std::string& name) const = 0; | ||
| }; | ||
|
|
||
| class os_access { | ||
| public: | ||
| virtual ~os_access() = default; | ||
|
|
||
| virtual std::shared_ptr<dir_reader> | ||
| opendir(const std::string& path) const = 0; | ||
| virtual void lstat(const std::string& path, struct ::stat* st) const = 0; | ||
| virtual std::string readlink(const std::string& path, size_t size) const = 0; | ||
| virtual std::shared_ptr<mmif> | ||
| map_file(const std::string& path, size_t size) const = 0; | ||
| virtual int access(const std::string& path, int mode) const = 0; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
| #include <string> | ||
|
|
||
| #include "os_access.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class os_access_posix : public os_access { | ||
| public: | ||
| std::shared_ptr<dir_reader> opendir(const std::string& path) const override; | ||
| void lstat(const std::string& path, struct ::stat* st) const override; | ||
| std::string readlink(const std::string& path, size_t size) const override; | ||
| std::shared_ptr<mmif> | ||
| map_file(const std::string& path, size_t size) const override; | ||
| int access(const std::string& path, int mode) const override; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <atomic> | ||
| #include <condition_variable> | ||
| #include <cstdint> | ||
| #include <mutex> | ||
| #include <ostream> | ||
| #include <thread> | ||
|
|
||
| #include <folly/Function.h> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class progress { | ||
| public: | ||
| progress(folly::Function<void(const progress&, bool)>&& func); | ||
| ~progress() noexcept; | ||
|
|
||
| std::atomic<size_t> files_found{0}; | ||
| std::atomic<size_t> files_scanned{0}; | ||
| std::atomic<size_t> dirs_found{0}; | ||
| std::atomic<size_t> dirs_scanned{0}; | ||
| std::atomic<size_t> links_found{0}; | ||
| std::atomic<size_t> links_scanned{0}; | ||
| std::atomic<size_t> duplicate_files{0}; | ||
| std::atomic<size_t> block_count{0}; | ||
| std::atomic<size_t> chunk_count{0}; | ||
| std::atomic<size_t> inodes_written{0}; | ||
| std::atomic<size_t> blocks_written{0}; | ||
| std::atomic<size_t> errors{0}; | ||
| std::atomic<uint64_t> original_size{0}; | ||
| std::atomic<uint64_t> saved_by_deduplication{0}; | ||
| std::atomic<uint64_t> saved_by_segmentation{0}; | ||
| std::atomic<uint64_t> filesystem_size{0}; | ||
| std::atomic<uint64_t> compressed_size{0}; | ||
|
|
||
| private: | ||
| std::atomic<bool> running_; | ||
| std::mutex mx_; | ||
| std::condition_variable cond_; | ||
| std::thread thread_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
| #include <string> | ||
|
|
||
| #include "block_manager.h" | ||
| #include "worker_group.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| class entry_factory; | ||
| class logger; | ||
| class os_access; | ||
| class progress; | ||
| struct scanner_options; | ||
| class script; | ||
|
|
||
| class scanner { | ||
| public: | ||
| scanner(logger& lgr, worker_group& wg, const block_manager::config& cfg, | ||
| std::shared_ptr<entry_factory> ef, std::shared_ptr<os_access> os, | ||
| std::shared_ptr<script> scr, const scanner_options& options); | ||
|
|
||
| void scan(filesystem_writer& fsw, const std::string& path, progress& prog) { | ||
| impl_->scan(fsw, path, prog); | ||
| } | ||
|
|
||
| class impl { | ||
| public: | ||
| virtual ~impl() = default; | ||
|
|
||
| virtual void | ||
| scan(filesystem_writer& fsw, const std::string& path, progress& prog) = 0; | ||
| }; | ||
|
|
||
| private: | ||
| std::unique_ptr<impl> impl_; | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <algorithm> | ||
| #include <memory> | ||
| #include <vector> | ||
|
|
||
| #include "file_interface.h" | ||
| #include "file_vector.h" | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| namespace detail { | ||
|
|
||
| template <class T> | ||
| class file_vector_ : public file_vector { | ||
| public: | ||
| file_vector_(std::vector<std::shared_ptr<T>>& vec) | ||
| : vec_(vec) {} | ||
|
|
||
| const file_interface* operator[](size_t i) const override { | ||
| return vec_[i].get(); | ||
| } | ||
|
|
||
| size_t size() const override { return vec_.size(); } | ||
|
|
||
| void | ||
| sort(std::function<bool(const file_interface*, const file_interface*)> const& | ||
| less) override { | ||
| std::sort(vec_.begin(), vec_.end(), | ||
| [&](const std::shared_ptr<T>& a, const std::shared_ptr<T>& b) { | ||
| return less(a.get(), b.get()); | ||
| }); | ||
| } | ||
|
|
||
| private: | ||
| std::vector<std::shared_ptr<T>>& vec_; | ||
| }; | ||
| } // namespace detail | ||
|
|
||
| class script { | ||
| public: | ||
| virtual ~script() = default; | ||
|
|
||
| virtual bool filter(file_interface const& fi) const = 0; | ||
| virtual void order(file_vector& fvi) const = 0; | ||
|
|
||
| template <typename T> | ||
| void order(std::vector<std::shared_ptr<T>>& vec) const { | ||
| detail::file_vector_<T> fv(vec); | ||
| order(fv); | ||
| } | ||
| }; | ||
| } // namespace dwarfs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cstdint> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| uint32_t get_similarity_hash(const uint8_t* data, size_t size); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| /* vim:set ts=2 sw=2 sts=2 et: */ | ||
| /** | ||
| * \author Marcus Holland-Moritz (github@mhxnet.de) | ||
| * \copyright Copyright (c) Marcus Holland-Moritz | ||
| * | ||
| * This file is part of dwarfs. | ||
| * | ||
| * dwarfs is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * dwarfs is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU General Public License | ||
| * along with dwarfs. If not, see <https://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <array> | ||
| #include <climits> | ||
| #include <string> | ||
|
|
||
| namespace dwarfs { | ||
|
|
||
| std::string time_with_unit(double sec); | ||
| std::string size_with_unit(size_t size); | ||
| size_t parse_size_with_unit(const std::string& str); | ||
| std::string get_program_path(); | ||
| } // namespace dwarfs |