From 8131ca7b2699b8fa7dfb43f5bd0ac161f0841f94 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 21 Aug 2023 12:36:34 -0700 Subject: [PATCH 1/5] build: update gnulib submodule to latest --- gnulib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnulib b/gnulib index 46f9c21a..10ffa047 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit 46f9c21a08245fe224fd975de8632b04a0256387 +Subproject commit 10ffa04786b32895df0d245ff5a7cf362d243b5e From bfee1d44a3d09a25586fe364c0cde74ebf0fa638 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 21 Aug 2023 12:52:14 -0700 Subject: [PATCH 2/5] Pacify gcc -Wanalyzer-fd-use-without-check * src/system.c (sys_exec_setmtime_script): Treat fds with more care. --- src/system.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/system.c b/src/system.c index b7e39f7c..e8370c55 100644 --- a/src/system.c +++ b/src/system.c @@ -963,16 +963,16 @@ sys_exec_setmtime_script (const char *script_name, FATAL_ERROR ((0, errno, _("chdir failed"))); } - close (0); - close (1); + close (p[0]); + if (dup2 (p[1], STDOUT_FILENO) < 0) + FATAL_ERROR ((0, errno, _("dup2 failed"))); + if (p[1] != STDOUT_FILENO) + close (p[1]); - if (open (dev_null, O_RDONLY) == -1) + close (STDIN_FILENO); + if (open (dev_null, O_RDONLY) != STDIN_FILENO) open_error (dev_null); - if (dup2 (p[1], 1) == -1) - FATAL_ERROR ((0, errno, _("dup2 failed"))); - close (p[0]); - priv_set_restore_linkdir (); xexec (command); } From a5afb367655ac6dcbc79ba7802a2479044184b38 Mon Sep 17 00:00:00 2001 From: Benjamin Woodruff Date: Mon, 21 Aug 2023 13:06:45 -0700 Subject: [PATCH 3/5] Fix O(n^2) time bug in --delay-directory-restore delayed_set_stat avoids inserting duplicate entries into delayed_set_stat_head. It was doing this by scanning the entire list. Normally this list is small, but if --delay-directory-restore is used (including automatically for incremental archives), this list grows with the total number of directories in the archive. The entire scan takes O(n) time. Extracting an archive with n directories could therefore take O(n^2) time. The included test uses AT_SKIP_LARGE_FILES, allowing it to optionally be skipped. It may execute slowly on certain filesystems or disks, as it creates thousands of directories. There are still potentially problematic O(n) scans in find_direct_ancestor and remove_delayed_set_stat, which this patch does not attempt to fix. * NEWS: Update. * src/extract.c (delayed_set_stat_table): Create a table for O(1) lookups of entries in the delayed_set_stat_head list. The list remains, as tracking insertion order is important. (dl_hash, dl_compare): New hash table helper functions. (delay_set_stat): Create the hash table, replace the O(n) list scan with a hash_lookup, insert new entries into the hash table. (remove_delayed_set_stat): Also remove entry from hash table. (apply_nonancestor_delayed_set_stat): Also remove entry from hash table. (extract_finish): Free the (empty) hash table. * tests/extrac26.at: New file. * tests/Makefile.am (TESTSUITE_AT): Include extrac26.at. * tests/testsuite.at: Include extrac26.at. --- NEWS | 5 ++++- src/extract.c | 39 +++++++++++++++++++++++++++++++++++---- tests/Makefile.am | 1 + tests/extrac26.at | 43 +++++++++++++++++++++++++++++++++++++++++++ tests/testsuite.at | 1 + 5 files changed, 84 insertions(+), 5 deletions(-) create mode 100644 tests/extrac26.at diff --git a/NEWS b/NEWS index 22d66500..4a8754cf 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU tar NEWS - User visible changes. 2023-08-02 +GNU tar NEWS - User visible changes. 2023-08-21 Please send GNU tar bug reports to version TBD @@ -26,6 +26,9 @@ used, command output will be parsed using strptime(3). * Bug fixes +** Fixed O(n^2) time complexity bug for large numbers of directories when + extracting with --delay-directory-restore or reading incremental archives. + ** tar no longer uses alloca, fixing an unlikely stack overflow. diff --git a/src/extract.c b/src/extract.c index 314d8bc0..a0263bb5 100644 --- a/src/extract.c +++ b/src/extract.c @@ -130,6 +130,9 @@ struct delayed_set_stat static struct delayed_set_stat *delayed_set_stat_head; +/* Table of delayed stat updates hashed by path; null if none. */ +static Hash_table *delayed_set_stat_table; + /* A link whose creation we have delayed. */ struct delayed_link { @@ -214,6 +217,20 @@ dl_compare (void const *a, void const *b) return (da->dev == db->dev) & (da->ino == db->ino); } +static size_t +ds_hash (void const *entry, size_t table_size) +{ + struct delayed_set_stat const *ds = entry; + return hash_string (ds->file_name, table_size); +} + +static bool +ds_compare (void const *a, void const *b) +{ + struct delayed_set_stat const *dsa = a, *dsb = b; + return strcmp (dsa->file_name, dsb->file_name) == 0; +} + /* Set up to extract files. */ void extr_init (void) @@ -513,11 +530,14 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st, size_t file_name_len = strlen (file_name); struct delayed_set_stat *data; - for (data = delayed_set_stat_head; data; data = data->next) - if (strcmp (data->file_name, file_name) == 0) - break; + if (! (delayed_set_stat_table + || (delayed_set_stat_table = hash_initialize (0, 0, ds_hash, + ds_compare, NULL)))) + xalloc_die (); + + const struct delayed_set_stat key = { .file_name = (char*) file_name }; - if (data) + if ((data = hash_lookup (delayed_set_stat_table, &key)) != NULL) { if (data->interdir) { @@ -541,6 +561,8 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st, delayed_set_stat_head = data; data->file_name_len = file_name_len; data->file_name = xstrdup (file_name); + if (! hash_insert (delayed_set_stat_table, data)) + xalloc_die (); data->after_links = false; if (st) { @@ -652,6 +674,7 @@ remove_delayed_set_stat (const char *fname) if (chdir_current == data->change_dir && strcmp (data->file_name, fname) == 0) { + hash_remove (delayed_set_stat_table, data); free_delayed_set_stat (data); if (prev) prev->next = next; @@ -1000,6 +1023,7 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool after_links) } delayed_set_stat_head = data->next; + hash_remove (delayed_set_stat_table, data); free_delayed_set_stat (data); } } @@ -1962,6 +1986,13 @@ extract_finish (void) /* Finally, fix the status of directories that are ancestors of delayed links. */ apply_nonancestor_delayed_set_stat ("", 1); + + /* This table should be empty after apply_nonancestor_delayed_set_stat */ + if (delayed_set_stat_table != NULL) + { + hash_free (delayed_set_stat_table); + delayed_set_stat_table = NULL; + } } bool diff --git a/tests/Makefile.am b/tests/Makefile.am index b696f016..57c9696f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -133,6 +133,7 @@ TESTSUITE_AT = \ extrac23.at\ extrac24.at\ extrac25.at\ + extrac26.at\ filerem01.at\ filerem02.at\ grow.at\ diff --git a/tests/extrac26.at b/tests/extrac26.at new file mode 100644 index 00000000..48fe468b --- /dev/null +++ b/tests/extrac26.at @@ -0,0 +1,43 @@ +# Test suite for GNU tar. -*- autotest -*- +# Copyright 2022-2023 Free Software Foundation, Inc. +# +# This file is part of GNU tar. +# +# GNU tar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GNU tar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +AT_SETUP([extract a large directory tree with --delay-directory-restore]) +AT_KEYWORDS([extract extrac26]) + +AT_TAR_CHECK([ +AT_SKIP_LARGE_FILES +AT_TIMEOUT_PREREQ + +echo Creating dirtree +awk 'BEGIN { for (j = 0; j < 300; j++) for (k = 0; k < 300; k++) print "dirtree/" j "/" k }' | \ + xargs mkdir -p + +echo Creating archive +tar -cf archive.tar dirtree + +echo Extracting archive +mkdir output +timeout 15 tar -xf archive.tar --delay-directory-restore -C output +], +[0], +[Creating dirtree +Creating archive +Extracting archive +], +[],[],[],[gnu]) + +AT_CLEANUP diff --git a/tests/testsuite.at b/tests/testsuite.at index 3a98f865..7cfa636f 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -349,6 +349,7 @@ m4_include([extrac22.at]) m4_include([extrac23.at]) m4_include([extrac24.at]) m4_include([extrac25.at]) +m4_include([extrac26.at]) m4_include([backup01.at]) From 12b58a69aa97acf12403065fb7b4a2b6e22cd7e9 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 21 Aug 2023 13:40:37 -0700 Subject: [PATCH 4/5] Simplify recently-added hash code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * src/extract.c (delay_set_stat): Simplify hash lookup; no need to initialize members other than file_name. Avoid assignment in ‘if’ when it’s easy. (extract_finish): Do not bother to free when we are about to exit. --- src/extract.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/extract.c b/src/extract.c index a0263bb5..0261134f 100644 --- a/src/extract.c +++ b/src/extract.c @@ -535,9 +535,11 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st, ds_compare, NULL)))) xalloc_die (); - const struct delayed_set_stat key = { .file_name = (char*) file_name }; + struct delayed_set_stat key; + key.file_name = (char *) file_name; - if ((data = hash_lookup (delayed_set_stat_table, &key)) != NULL) + data = hash_lookup (delayed_set_stat_table, &key); + if (data) { if (data->interdir) { @@ -1847,7 +1849,7 @@ extract_archive (void) if (!delay_directory_restore_option) { int dir = chdir_current; - apply_nonancestor_delayed_set_stat (current_stat_info.file_name, 0); + apply_nonancestor_delayed_set_stat (current_stat_info.file_name, false); chdir_do (dir); } @@ -1961,7 +1963,7 @@ apply_delayed_links (void) for (struct delayed_link *ds = delayed_link_head; ds; ds = ds->next) apply_delayed_link (ds); - if (false) + if (false && delayed_link_table) { /* There is little point to freeing, as we are about to exit, and freeing is more likely to cause than cure trouble. @@ -1977,7 +1979,7 @@ void extract_finish (void) { /* First, fix the status of ordinary directories that need fixing. */ - apply_nonancestor_delayed_set_stat ("", 0); + apply_nonancestor_delayed_set_stat ("", false); /* Then, apply delayed links, so that they don't affect delayed directory status-setting for ordinary directories. */ @@ -1985,11 +1987,13 @@ extract_finish (void) /* Finally, fix the status of directories that are ancestors of delayed links. */ - apply_nonancestor_delayed_set_stat ("", 1); + apply_nonancestor_delayed_set_stat ("", true); - /* This table should be empty after apply_nonancestor_delayed_set_stat */ - if (delayed_set_stat_table != NULL) + /* This table should be empty after apply_nonancestor_delayed_set_stat. */ + if (false && delayed_set_stat_table) { + /* There is little point to freeing, as we are about to exit, + and freeing is more likely to cause than cure trouble. */ hash_free (delayed_set_stat_table); delayed_set_stat_table = NULL; } From a9a8990fb3b8ff4765151bccea9667e4b6a4fc0c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 21 Aug 2023 13:41:42 -0700 Subject: [PATCH 5/5] Bump extrac26 timeout * tests/extrac26.at: Increase timeout from 15 to 60 s. On my old machine it took 15 s. --- tests/extrac26.at | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/extrac26.at b/tests/extrac26.at index 48fe468b..35005b1a 100644 --- a/tests/extrac26.at +++ b/tests/extrac26.at @@ -31,7 +31,7 @@ tar -cf archive.tar dirtree echo Extracting archive mkdir output -timeout 15 tar -xf archive.tar --delay-directory-restore -C output +timeout 60 tar -xf archive.tar --delay-directory-restore -C output ], [0], [Creating dirtree