Skip to content

Commit

Permalink
shared/rm-rf: loop over nested directories instead of instead of recu…
Browse files Browse the repository at this point in the history
…rsing

To remove directory structures, we need to remove the innermost items first,
and then recursively remove higher-level directories. We would recursively
descend into directories and invoke rm_rf_children and rm_rm_children_inner.
This is problematic when too many directories are nested.

Instead, let's create a "TODO" queue. In the the queue, for each level we
hold the DIR* object we were working on, and the name of the directory. This
allows us to leave a partially-processed directory, and restart the removal
loop one level down. When done with the inner directory, we use the name to
unlinkat() it from the parent, and proceed with the removal of other items.

Because the nesting is increased by one level, it is best to view this patch
with -b/--ignore-space-change.

This fixes CVE-2021-3997, https://bugzilla.redhat.com/show_bug.cgi?id=2024639.
The issue was reported and patches reviewed by Qualys Team.
Mauro Matteo Cascella and Riccardo Schirone from Red Hat handled the disclosure.
  • Loading branch information
keszybz committed Dec 21, 2021
1 parent 84ced33 commit 5b1cf7a
Showing 1 changed file with 112 additions and 47 deletions.
159 changes: 112 additions & 47 deletions src/shared/rm-rf.c
Expand Up @@ -52,7 +52,6 @@ static int patch_dirfd_mode(
}

int unlinkat_harder(int dfd, const char *filename, int unlink_flags, RemoveFlags remove_flags) {

mode_t old_mode;
int r;

Expand Down Expand Up @@ -116,12 +115,13 @@ int fstatat_harder(int dfd,
return 0;
}

static int rm_rf_children_inner(
static int rm_rf_inner_child(
int fd,
const char *fname,
int is_dir,
RemoveFlags flags,
const struct stat *root_dev) {
const struct stat *root_dev,
bool allow_recursion) {

struct stat st;
int r, q = 0;
Expand All @@ -141,9 +141,7 @@ static int rm_rf_children_inner(
}

if (is_dir) {
_cleanup_close_ int subdir_fd = -1;

/* if root_dev is set, remove subdirectories only if device is same */
/* If root_dev is set, remove subdirectories only if device is same */
if (root_dev && st.st_dev != root_dev->st_dev)
return 0;

Expand All @@ -155,7 +153,6 @@ static int rm_rf_children_inner(
return 0;

if ((flags & REMOVE_SUBVOLUME) && btrfs_might_be_subvol(&st)) {

/* This could be a subvolume, try to remove it */

r = btrfs_subvol_remove_fd(fd, fname, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
Expand All @@ -169,13 +166,16 @@ static int rm_rf_children_inner(
return 1;
}

subdir_fd = openat(fd, fname, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
if (!allow_recursion)
return -EISDIR;

int subdir_fd = openat(fd, fname, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
if (subdir_fd < 0)
return -errno;

/* We pass REMOVE_PHYSICAL here, to avoid doing the fstatfs() to check the file system type
* again for each directory */
q = rm_rf_children(TAKE_FD(subdir_fd), flags | REMOVE_PHYSICAL, root_dev);
q = rm_rf_children(subdir_fd, flags | REMOVE_PHYSICAL, root_dev);

} else if (flags & REMOVE_ONLY_DIRECTORIES)
return 0;
Expand All @@ -188,62 +188,127 @@ static int rm_rf_children_inner(
return 1;
}

typedef struct TodoEntry {
DIR *dir; /* A directory that we were operating on. */
char *dirname; /* The filename of that directory itself. */
} TodoEntry;

static void free_todo_entries(TodoEntry **todos) {
for (TodoEntry *x = *todos; x && x->dir; x++) {
closedir(x->dir);
free(x->dirname);
}

freep(todos);
}

int rm_rf_children(
int fd,
RemoveFlags flags,
const struct stat *root_dev) {

_cleanup_closedir_ DIR *d = NULL;
_cleanup_(free_todo_entries) TodoEntry *todos = NULL;
size_t n_todo = 0;
_cleanup_free_ char *dirname = NULL; /* Set when we are recursing and want to delete ourselves */
int ret = 0, r;

assert(fd >= 0);
/* Return the first error we run into, but nevertheless try to go on.
* The passed fd is closed in all cases, including on failure. */

for (;;) { /* This loop corresponds to the directory nesting level. */
_cleanup_closedir_ DIR *d = NULL;

if (n_todo > 0) {
/* We know that we are in recursion here, because n_todo is set.
* We need to remove the inner directory we were operating on. */
assert(dirname);
r = unlinkat_harder(dirfd(todos[n_todo-1].dir), dirname, AT_REMOVEDIR, flags);
if (r < 0 && r != -ENOENT && ret == 0)
ret = r;
dirname = mfree(dirname);

/* And now let's back out one level up */
n_todo --;
d = TAKE_PTR(todos[n_todo].dir);
dirname = TAKE_PTR(todos[n_todo].dirname);

assert(d);
fd = dirfd(d); /* Retrieve the file descriptor from the DIR object */
assert(fd >= 0);
} else {
next_fd:
assert(fd >= 0);
d = fdopendir(fd);
if (!d) {
safe_close(fd);
return -errno;
}
fd = dirfd(d); /* We donated the fd to fdopendir(). Let's make sure we sure we have
* the right descriptor even if it were to internally invalidate the
* one we passed. */

if (!(flags & REMOVE_PHYSICAL)) {
struct statfs sfs;

if (fstatfs(fd, &sfs) < 0)
return -errno;

if (is_physical_fs(&sfs)) {
/* We refuse to clean physical file systems with this call, unless
* explicitly requested. This is extra paranoia just to be sure we
* never ever remove non-state data. */

_cleanup_free_ char *path = NULL;

(void) fd_get_path(fd, &path);
return log_error_errno(SYNTHETIC_ERRNO(EPERM),
"Attempted to remove disk file system under \"%s\", and we can't allow that.",
strna(path));
}
}
}

/* This returns the first error we run into, but nevertheless tries to go on. This closes the passed
* fd, in all cases, including on failure. */
FOREACH_DIRENT_ALL(de, d, return -errno) {
int is_dir;

d = fdopendir(fd);
if (!d) {
safe_close(fd);
return -errno;
}
if (dot_or_dot_dot(de->d_name))
continue;

if (!(flags & REMOVE_PHYSICAL)) {
struct statfs sfs;
is_dir = de->d_type == DT_UNKNOWN ? -1 : de->d_type == DT_DIR;

if (fstatfs(dirfd(d), &sfs) < 0)
return -errno;
r = rm_rf_inner_child(fd, de->d_name, is_dir, flags, root_dev, false);
if (r == -EISDIR) {
/* Push the current working state onto the todo list */

if (is_physical_fs(&sfs)) {
/* We refuse to clean physical file systems with this call, unless explicitly
* requested. This is extra paranoia just to be sure we never ever remove non-state
* data. */
if (!GREEDY_REALLOC0(todos, n_todo + 2))
return log_oom();

_cleanup_free_ char *path = NULL;
_cleanup_free_ char *newdirname = strdup(de->d_name);
if (!newdirname)
return log_oom();

(void) fd_get_path(fd, &path);
return log_error_errno(SYNTHETIC_ERRNO(EPERM),
"Attempted to remove disk file system under \"%s\", and we can't allow that.",
strna(path));
}
}
int newfd = openat(fd, de->d_name,
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
if (newfd >= 0) {
todos[n_todo++] = (TodoEntry) { TAKE_PTR(d), TAKE_PTR(dirname) };
fd = newfd;
dirname = TAKE_PTR(newdirname);

FOREACH_DIRENT_ALL(de, d, return -errno) {
int is_dir;
goto next_fd;

if (dot_or_dot_dot(de->d_name))
continue;
} else if (errno != -ENOENT && ret == 0)
ret = -errno;

is_dir =
de->d_type == DT_UNKNOWN ? -1 :
de->d_type == DT_DIR;
} else if (r < 0 && r != -ENOENT && ret == 0)
ret = r;
}

r = rm_rf_children_inner(dirfd(d), de->d_name, is_dir, flags, root_dev);
if (r < 0 && r != -ENOENT && ret == 0)
ret = r;
}
if (FLAGS_SET(flags, REMOVE_SYNCFS) && syncfs(fd) < 0 && ret >= 0)
ret = -errno;

if (FLAGS_SET(flags, REMOVE_SYNCFS) && syncfs(dirfd(d)) < 0 && ret >= 0)
ret = -errno;
if (n_todo == 0)
break;
}

return ret;
}
Expand Down Expand Up @@ -335,5 +400,5 @@ int rm_rf_child(int fd, const char *name, RemoveFlags flags) {
if (FLAGS_SET(flags, REMOVE_ONLY_DIRECTORIES|REMOVE_SUBVOLUME))
return -EINVAL;

return rm_rf_children_inner(fd, name, -1, flags, NULL);
return rm_rf_inner_child(fd, name, -1, flags, NULL, true);
}

0 comments on commit 5b1cf7a

Please sign in to comment.