Skip to content

Commit

Permalink
chunkserver: Add punching holes in chunk files
Browse files Browse the repository at this point in the history
This commit modifies chunkserver so it detects
zeros in chunk data and frees corresponding
file system blocks.

Configuration entry HDD_PUNCH_HOLES can be used
to enable this feature in chunkserver.

Closes #370

Change-Id: I06b94290e059a3583e5314f711017dd81437b191
  • Loading branch information
DarkHaze authored and Fretek committed Mar 11, 2016
1 parent e1081b5 commit 96b6491
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 1 deletion.
7 changes: 6 additions & 1 deletion EnvTests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ include(CheckIncludes)
include(CheckLibraryExists)
include(CheckMembers)
include(CheckStructHasMember)
include(CheckSymbolExists)
include(CheckTypeSize)
include(TestBigEndian)

Expand Down Expand Up @@ -63,7 +64,7 @@ endif()
check_functions("${REQUIRED_FUNCTIONS}" TRUE)

set(OPTIONAL_FUNCTIONS strerror perror pread pwrite readv writev getrusage
setitimer posix_fadvise)
setitimer posix_fadvise fallocate)
check_functions("${OPTIONAL_FUNCTIONS}" false)

CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" LIZARDFS_HAVE_CLOCK_GETTIME)
Expand All @@ -89,3 +90,7 @@ check_cxx_source_compiles("${_CHECK_CXX_MULTIVERSION_CODE}" LIZARDFS_HAVE_MULTIV
if(APPLE)
set(SOCKET_CONVERT_POLL_TO_SELECT 1)
endif()

set(CMAKE_REQUIRED_FLAGS "-D_GNU_SOURCE")
check_symbol_exists(FALLOC_FL_PUNCH_HOLE "fcntl.h" LIZARDFS_HAVE_FALLOC_FL_PUNCH_HOLE)
unset(CMAKE_REQUIRED_FLAGS)
2 changes: 2 additions & 0 deletions config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@
#cmakedefine LIZARDFS_HAVE_POSIX_FADVISE
#cmakedefine LIZARDFS_HAVE_CLOCK_GETTIME
#cmakedefine LIZARDFS_HAVE_PAM
#cmakedefine LIZARDFS_HAVE_FALLOCATE
#cmakedefine LIZARDFS_HAVE_FALLOC_FL_PUNCH_HOLE

/* [CMake] Other */
#cmakedefine HAVE_CRCUTIL
Expand Down
5 changes: 5 additions & 0 deletions doc/mfschunkserver.cfg.5.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ chunk test period in seconds (default is 10)
whether to remove each chunk from page when closing it to reduce cache pressure
generated by chunkserver (default is 0, i.e. no)

*HDD_PUNCH_HOLES*::
if enabled then chunkserver detects zero values in chunk data and frees
corresponding file blocks (decreasing file system usage). This option works only on Linux
with file systems supporting punching holes (XFS, ext4, Btrfs, tmpfs)

*REPLICATION_BANDWIDTH_LIMIT_KBPS*::
limit how many kilobytes can be replicated from other chunkservers to this chunkserver in every
second (by default undefined, i.e. no limits)
Expand Down
55 changes: 55 additions & 0 deletions src/chunkserver/hddspacemgr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
#include "common/platform.h"
#include "chunkserver/hddspacemgr.h"

#if defined(LIZARDFS_HAVE_FALLOCATE) && defined(LIZARDFS_HAVE_FALLOC_FL_PUNCH_HOLE) && !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif

#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
Expand Down Expand Up @@ -118,6 +122,8 @@ static std::atomic<bool> MooseFSChunkFormat;

static std::atomic<bool> PerformFsync;

static bool gPunchHolesInFiles;

/* folders data */
static folder *folderhead = NULL;

Expand Down Expand Up @@ -1839,6 +1845,49 @@ int hdd_int_read_block_and_crc(Chunk* c, uint8_t* blockBuffer, uint8_t* crcBuffe
}
}

void hdd_int_punch_holes(Chunk *c, const uint8_t *buffer, uint32_t offset, uint32_t size) {
#if defined(LIZARDFS_HAVE_FALLOCATE) && defined(LIZARDFS_HAVE_FALLOC_FL_PUNCH_HOLE)
if (!gPunchHolesInFiles) {
return;
}

constexpr uint32_t block_size = 4096;
uint32_t p = (offset % block_size) == 0 ? 0 : block_size - (offset % block_size);
uint32_t hole_start = 0, hole_size = 0;

for(;(p + block_size) <= size; p += block_size) {
const std::size_t *zero_test = reinterpret_cast<const std::size_t*>(buffer + p);
bool is_zero = true;
for(unsigned i = 0; i < block_size/sizeof(std::size_t); ++i) {
if (zero_test[i] != 0) {
is_zero = false;
break;
}
}

if (is_zero) {
if (hole_size == 0) {
hole_start = offset + p;
}
hole_size += block_size;
} else {
if (hole_size > 0) {
fallocate(c->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, hole_start, hole_size);
}
hole_size = 0;
}
}
if (hole_size > 0) {
fallocate(c->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, hole_start, hole_size);
}
#else
(void)c;
(void)buffer;
(void)offset;
(void)size;
#endif
}

/**
* Returns number of written bytes on success, -1 on failure.
*/
Expand All @@ -1861,6 +1910,7 @@ bool hdd_int_write_partial_block_and_crc(
hdd_report_damaged_chunk(c->chunkid, c->type());
return -1;
}
hdd_int_punch_holes(c, buffer, c->getBlockOffset(blockNum) + offset, size);
memcpy(mc->getCrcBuffer(blockNum), crcBuff, crcSize);
return size;
} else {
Expand All @@ -1881,6 +1931,7 @@ bool hdd_int_write_partial_block_and_crc(
hdd_report_damaged_chunk(c->chunkid, c->type());
return -1;
}
hdd_int_punch_holes(c, buffer, c->getBlockOffset(blockNum) + offset + crcSize, size);
return crcSize + size;
}
}
Expand Down Expand Up @@ -3998,6 +4049,8 @@ void hdd_reload(void) {
HDDTestFreq = cfg_getuint32("HDD_TEST_FREQ",10);
zassert(pthread_mutex_unlock(&testlock));

gPunchHolesInFiles = cfg_getuint32("HDD_PUNCH_HOLES", 0);

hdd_int_set_chunk_format();
char *LeaveFreeStr = cfg_getstr("HDD_LEAVE_SPACE_DEFAULT", gLeaveSpaceDefaultDefaultStrValue);
if (hdd_size_parse(LeaveFreeStr,&gLeaveFree)<0) {
Expand Down Expand Up @@ -4090,6 +4143,8 @@ int hdd_init(void) {
gAdviseNoCache = cfg_getuint32("HDD_ADVISE_NO_CACHE", 0);
HDDTestFreq = cfg_getuint32("HDD_TEST_FREQ",10);

gPunchHolesInFiles = cfg_getuint32("HDD_PUNCH_HOLES", 0);

MooseFSChunkFormat = true;
hdd_int_set_chunk_format();
main_reloadregister(hdd_reload);
Expand Down
7 changes: 7 additions & 0 deletions src/data/mfschunkserver.cfg.in
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@
## (Default: 0)
# HDD_ADVISE_NO_CACHE = 0

## If enabled then chunkserver detects zero values in chunk data and frees
## corresponding file blocks (decreasing file system usage).
## This option works only on Linux
## with file systems supporting punching holes (XFS, ext4, Btrfs, tmpfs).
## (Default : 0)
# HDD_PUNCH_HOLES = 1

## Limit how many kilobytes can be replicated from other chunkservers to
## this chunkserver in every second (by default undefined, i.e. no limits)
# REPLICATION_BANDWIDTH_LIMIT_KBPS = 8192
Expand Down
47 changes: 47 additions & 0 deletions tests/test_suites/ShortSystemTests/test_punching_holes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
CHUNKSERVERS=1 \
CHUNKSERVER_EXTRA_CONFIG="HDD_PUNCH_HOLES = 1" \
setup_local_empty_lizardfs info

file=$(mktemp -p ${info[mount0]})
hdd=$(cat "${info[chunkserver0_hdd]}")

test_fallocate() {
(
echo "#define _GNU_SOURCE"
echo "#include <fcntl.h>"
echo "int main() {"
echo " int fd = 1;"
echo " fallocate(fd, FALLOC_FL_PUNCH_HOLE, 0, 1024);"
echo " return 0;"
echo "}"
) | gcc -o $TEMP_DIR/punch_test.o -xc -
}

if ! test_fallocate; then
test_end
fi

dd if=/dev/urandom of=$file count=16 bs=1024 conv=fsync

sleep 1
chunk_file=$(find "$hdd" -name 'chunk_*.???')
full_size=$(stat -c "%b" "$chunk_file")

dd if=/dev/zero of=$file count=6 bs=1024 seek=3 conv=fsync

sleep 1
chunk_file=$(find "$hdd" -name 'chunk_*.???')
sparse_size=$(stat -c "%b" "$chunk_file")

if (( $sparse_size >= $full_size )); then
test_add_failure "File is not sparse!"
fi

# Test if file with punched whole is read correctly.
dd if=/dev/urandom of=$TEMP_DIR/test_punch_hole.bin count=16 bs=1024 conv=fsync
cp $TEMP_DIR/test_punch_hole.bin ${info[mount0]}/test_punch_hole.bin

dd if=/dev/zero of=$TEMP_DIR/test_punch_hole.bin count=10 bs=1024 seek=3 conv=fsync
dd if=/dev/zero of=${info[mount0]}/test_punch_hole.bin count=10 bs=1024 seek=3 conv=fsync

cmp $TEMP_DIR/test_punch_hole.bin ${info[mount0]}/test_punch_hole.bin

0 comments on commit 96b6491

Please sign in to comment.