diff --git a/WHATSNEW.txt b/WHATSNEW.txt index cc03f2016c89..475ebcbdc840 100644 --- a/WHATSNEW.txt +++ b/WHATSNEW.txt @@ -65,6 +65,15 @@ commands have been removed from rpcclient. "net rpc vampire keytab" from Active Directory domains continues to be supported. +vfs_aio_linux module removed +============================ + +The current Linux kernel aio does not match what Samba would +do. Shipping code that uses it leads people to false +assumptions. Samba implements async I/O based on threads by default, +there is no special module required to see benefits of read and write +request being sent do the disk in parallel. + KNOWN ISSUES ============ diff --git a/docs-xml/manpages/vfs_aio_linux.8.xml b/docs-xml/manpages/vfs_aio_linux.8.xml deleted file mode 100644 index 3a009501820f..000000000000 --- a/docs-xml/manpages/vfs_aio_linux.8.xml +++ /dev/null @@ -1,115 +0,0 @@ - - - - - - vfs_aio_linux - 8 - Samba - System Administration tools - 4.7 - - - - - vfs_aio_linux - implement async I/O in Samba vfs using Linux kernel aio calls - - - - - vfs objects = aio_linux - - - - - DESCRIPTION - - This VFS module is part of the - samba - 7 suite. - - The aio_linux VFS module enables asynchronous - I/O for Samba on Linux kernels that have the kernel AIO calls available - without using the Posix AIO interface. Posix AIO can suffer from severe - limitations. For example, on some Linux versions the - real-time signals that it uses are broken under heavy load. - Other systems only allow AIO when special kernel modules are - loaded or only allow a certain system-wide amount of async - requests being scheduled. Systems based on glibc (most Linux - systems) only allow a single outstanding request per file - descriptor which essentially makes Posix AIO useless on systems - using the glibc implementation. - - To work around all these limitations, the aio_linux module - was written. It uses the Linux kernel AIO interface instead of the - internal Posix AIO interface to allow read and write calls - to be processed asynchronously. A queue size of 128 events - is used by default. To change this limit set the "aio num events" - parameter below. - - - - Note that the smb.conf parameters aio read size - and aio write size must also be set appropriately - for this module to be active. - - - This module MUST be listed last in any module stack as - the Samba VFS pread/pwrite interface is not thread-safe. This - module makes direct pread and pwrite system calls and does - NOT call the Samba VFS pread and pwrite interfaces. - - - - - - EXAMPLES - - Straight forward use: - - - - /data/ice - 1024 - 1024 - aio_linux - - - - - - OPTIONS - - - - - aio_linux:aio num events = INTEGER - - Set the maximum size of the event queue - that is used to limit outstanding IO requests. - - By default this is set to 128. - - - - - - - VERSION - - This man page is correct for version 4.0 of the Samba suite. - - - - - AUTHOR - - The original Samba software and related utilities - were created by Andrew Tridgell. Samba is now developed - by the Samba Team as an Open Source project similar - to the way the Linux kernel is developed. - - - - diff --git a/docs-xml/wscript_build b/docs-xml/wscript_build index afba0b9e8416..0bd08d11cdb9 100644 --- a/docs-xml/wscript_build +++ b/docs-xml/wscript_build @@ -52,7 +52,6 @@ manpages=''' manpages/vfs_acl_tdb.8 manpages/vfs_acl_xattr.8 manpages/vfs_aio_fork.8 - manpages/vfs_aio_linux.8 manpages/vfs_aio_pthread.8 manpages/vfs_audit.8 manpages/vfs_btrfs.8 diff --git a/source3/modules/vfs_aio_linux.c b/source3/modules/vfs_aio_linux.c deleted file mode 100644 index e89c2905bad5..000000000000 --- a/source3/modules/vfs_aio_linux.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Simulate Posix AIO using Linux kernel AIO. - * - * Copyright (C) Jeremy Allison 2012 - * Copyright (C) Volker Lendecke 2012 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "includes.h" -#include "system/filesys.h" -#include "smbd/smbd.h" -#include "smbd/globals.h" -#include "lib/util/tevent_unix.h" -#include "lib/util/sys_rw.h" -#include -#include -#include "smbprofile.h" - -static int event_fd = -1; -static io_context_t io_ctx; -static struct tevent_fd *aio_read_event; -static bool used; -static unsigned num_busy; - -static void aio_linux_done(struct tevent_context *event_ctx, - struct tevent_fd *event, - uint16_t flags, void *private_data); - -/************************************************************************ - Housekeeping. Cleanup if no activity for 30 seconds. -***********************************************************************/ - -static void aio_linux_housekeeping(struct tevent_context *event_ctx, - struct tevent_timer *te, - struct timeval now, - void *private_data) -{ - /* Remove this timed event handler. */ - TALLOC_FREE(te); - - if ((num_busy != 0) || used) { - used = false; - - /* Still busy. Look again in 30 seconds. */ - (void)tevent_add_timer(event_ctx, - NULL, - timeval_current_ofs(30, 0), - aio_linux_housekeeping, - NULL); - return; - } - - /* No activity for 30 seconds. Close out kernel resources. */ - io_queue_release(io_ctx); - memset(&io_ctx, '\0', sizeof(io_ctx)); - - if (event_fd != -1) { - close(event_fd); - event_fd = -1; - } - - TALLOC_FREE(aio_read_event); -} - -/************************************************************************ - Ensure event fd and aio context are initialized. -***********************************************************************/ - -static bool init_aio_linux(struct vfs_handle_struct *handle) -{ - struct tevent_timer *te = NULL; - - if (event_fd != -1) { - /* Already initialized. */ - return true; - } - - /* Schedule a shutdown event for 30 seconds from now. */ - te = tevent_add_timer(handle->conn->sconn->ev_ctx, - NULL, - timeval_current_ofs(30, 0), - aio_linux_housekeeping, - NULL); - - if (te == NULL) { - goto fail; - } - - event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (event_fd == -1) { - goto fail; - } - - aio_read_event = tevent_add_fd(server_event_context(), - NULL, - event_fd, - TEVENT_FD_READ, - aio_linux_done, - NULL); - if (aio_read_event == NULL) { - goto fail; - } - - if (io_queue_init(lp_aio_max_threads(), &io_ctx)) { - goto fail; - } - - DEBUG(10,("init_aio_linux: initialized with up to %d events\n", - (int)lp_aio_max_threads())); - - return true; - - fail: - - DEBUG(10,("init_aio_linux: initialization failed\n")); - - TALLOC_FREE(te); - TALLOC_FREE(aio_read_event); - if (event_fd != -1) { - close(event_fd); - event_fd = -1; - } - memset(&io_ctx, '\0', sizeof(io_ctx)); - return false; -} - -struct aio_linux_state { - struct iocb event_iocb; - ssize_t ret; - struct vfs_aio_state vfs_aio_state; - struct timespec start; -}; - -static struct tevent_req *aio_linux_pread_send( - struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, - struct tevent_context *ev, struct files_struct *fsp, - void *data, size_t n, off_t offset) -{ - struct tevent_req *req; - struct aio_linux_state *state; - struct iocb *piocb; - int ret; - - req = tevent_req_create(mem_ctx, &state, struct aio_linux_state); - if (req == NULL) { - return NULL; - } - if (!init_aio_linux(handle)) { - tevent_req_error(req, EIO); - return tevent_req_post(req, ev); - } - - io_prep_pread(&state->event_iocb, fsp->fh->fd, data, n, offset); - io_set_eventfd(&state->event_iocb, event_fd); - state->event_iocb.data = req; - - piocb = &state->event_iocb; - - PROFILE_TIMESTAMP(&state->start); - ret = io_submit(io_ctx, 1, &piocb); - if (ret < 0) { - tevent_req_error(req, -ret); - return tevent_req_post(req, ev); - } - num_busy += 1; - used = true; - return req; -} - -static struct tevent_req *aio_linux_pwrite_send( - struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, - struct tevent_context *ev, struct files_struct *fsp, - const void *data, size_t n, off_t offset) -{ - struct tevent_req *req; - struct aio_linux_state *state; - struct iocb *piocb; - int ret; - - req = tevent_req_create(mem_ctx, &state, struct aio_linux_state); - if (req == NULL) { - return NULL; - } - if (!init_aio_linux(handle)) { - tevent_req_error(req, EIO); - return tevent_req_post(req, ev); - } - - io_prep_pwrite(&state->event_iocb, fsp->fh->fd, discard_const(data), - n, offset); - io_set_eventfd(&state->event_iocb, event_fd); - state->event_iocb.data = req; - - piocb = &state->event_iocb; - - PROFILE_TIMESTAMP(&state->start); - ret = io_submit(io_ctx, 1, &piocb); - if (ret < 0) { - tevent_req_error(req, -ret); - return tevent_req_post(req, ev); - } - num_busy += 1; - used = true; - return req; -} - -static struct tevent_req *aio_linux_fsync_send( - struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx, - struct tevent_context *ev, struct files_struct *fsp) -{ - struct tevent_req *req; - struct aio_linux_state *state; - struct iocb *piocb; - int ret; - - req = tevent_req_create(mem_ctx, &state, struct aio_linux_state); - if (req == NULL) { - return NULL; - } - if (!init_aio_linux(handle)) { - tevent_req_error(req, EIO); - return tevent_req_post(req, ev); - } - - io_prep_fsync(&state->event_iocb, fsp->fh->fd); - io_set_eventfd(&state->event_iocb, event_fd); - state->event_iocb.data = req; - - piocb = &state->event_iocb; - - PROFILE_TIMESTAMP(&state->start); - ret = io_submit(io_ctx, 1, &piocb); - if (ret < 0) { - tevent_req_error(req, -ret); - return tevent_req_post(req, ev); - } - num_busy += 1; - used = true; - return req; -} - -static void aio_linux_done(struct tevent_context *event_ctx, - struct tevent_fd *event, - uint16_t flags, void *private_data) -{ - uint64_t num_events = 0; - struct timespec end; - - DEBUG(10, ("aio_linux_done called with flags=%d\n", - (int)flags)); - - PROFILE_TIMESTAMP(&end); - - /* Read the number of events available. */ - if (sys_read(event_fd, &num_events, sizeof(num_events)) != - sizeof(num_events)) { - smb_panic("aio_linux_handle_completion: invalid read"); - } - - while (num_events > 0) { - struct timespec ts = { 0, }; - struct io_event finished; - struct tevent_req *req; - struct aio_linux_state *state; - int ret; - - ret = io_getevents(io_ctx, 1, 1, &finished, &ts); - if (ret < 0) { - DEBUG(1, ("aio_linux_done: io_getevents returned %s\n", - strerror(-ret))); - return; - } - if (ret == 0) { - DEBUG(10, ("aio_linux_done: io_getvents returned " - "0\n")); - continue; - } - - num_busy -= 1; - - req = talloc_get_type_abort(finished.data, - struct tevent_req); - state = tevent_req_data(req, struct aio_linux_state); - - if (finished.res < 0) { - state->ret = -1; - state->vfs_aio_state.error = -finished.res; - } else { - state->ret = finished.res; - } - state->vfs_aio_state.duration = nsec_time_diff(&end, &state->start); - tevent_req_done(req); - num_events -= 1; - } -} - -static ssize_t aio_linux_recv(struct tevent_req *req, - struct vfs_aio_state *vfs_aio_state) -{ - struct aio_linux_state *state = tevent_req_data( - req, struct aio_linux_state); - - if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) { - return -1; - } - *vfs_aio_state = state->vfs_aio_state; - return state->ret; -} - -static int aio_linux_int_recv(struct tevent_req *req, - struct vfs_aio_state *vfs_aio_state) -{ - /* - * Use implicit conversion ssize_t->int - */ - return aio_linux_recv(req, vfs_aio_state); -} - -static struct vfs_fn_pointers vfs_aio_linux_fns = { - .pread_send_fn = aio_linux_pread_send, - .pread_recv_fn = aio_linux_recv, - .pwrite_send_fn = aio_linux_pwrite_send, - .pwrite_recv_fn = aio_linux_recv, - .fsync_send_fn = aio_linux_fsync_send, - .fsync_recv_fn = aio_linux_int_recv, -}; - -static_decl_vfs; -NTSTATUS vfs_aio_linux_init(TALLOC_CTX *ctx) -{ - return smb_register_vfs(SMB_VFS_INTERFACE_VERSION, - "aio_linux", &vfs_aio_linux_fns); -} diff --git a/source3/modules/wscript_build b/source3/modules/wscript_build index ab9be384189a..4e4fdb98a1d9 100644 --- a/source3/modules/wscript_build +++ b/source3/modules/wscript_build @@ -357,14 +357,6 @@ bld.SAMBA3_MODULE('vfs_aio_pthread', internal_module=bld.SAMBA3_IS_STATIC_MODULE('vfs_aio_pthread'), enabled=bld.SAMBA3_IS_ENABLED_MODULE('vfs_aio_pthread')) -bld.SAMBA3_MODULE('vfs_aio_linux', - subsystem='vfs', - source='vfs_aio_linux.c', - deps='samba-util aio', - init_function='', - internal_module=bld.SAMBA3_IS_STATIC_MODULE('vfs_aio_linux'), - enabled=bld.SAMBA3_IS_ENABLED_MODULE('vfs_aio_linux')) - bld.SAMBA3_MODULE('vfs_preopen', subsystem='vfs', source='vfs_preopen.c', diff --git a/source3/wscript b/source3/wscript index e311f5f6efe7..d45a7d9ec606 100644 --- a/source3/wscript +++ b/source3/wscript @@ -551,29 +551,6 @@ return acl_get_perm_np(permset_d, perm); msg='Checking for openat', headers='fcntl.h') - if host_os.rfind('linux') > -1: - conf.CHECK_FUNCS_IN('io_submit', 'aio') - conf.CHECK_CODE(''' -struct io_event ioev; -struct iocb *ioc; -io_context_t ctx; -struct timespec ts; -int fd; -char *buf; -fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); -io_queue_init(128,&ctx); -io_prep_pwrite(ioc, 1, buf, 1, 0); -io_prep_pread(ioc, 1, buf, 1, 0); -io_set_eventfd(ioc, fd); -io_set_callback(ioc, (io_callback_t)(0)); -io_submit(ctx, 1, &ioc); -io_getevents(ctx, 1, 1, &ioev, &ts); -''', - 'HAVE_LINUX_KERNEL_AIO', - msg='Checking for linux kernel asynchronous io support', - headers='unistd.h stdlib.h sys/types.h fcntl.h sys/eventfd.h libaio.h', - lib='aio') - conf.CHECK_CODE(''' struct msghdr msg; union { @@ -1717,9 +1694,6 @@ main() { if Options.options.with_pthreadpool: default_shared_modules.extend(TO_LIST('vfs_aio_pthread')) - if conf.CONFIG_SET('HAVE_LINUX_KERNEL_AIO'): - default_shared_modules.extend(TO_LIST('vfs_aio_linux')) - if conf.CONFIG_SET('HAVE_LDAP'): default_static_modules.extend(TO_LIST('pdb_ldapsam idmap_ldap'))