Skip to content

Commit

Permalink
MDL-70631 files: Fix performance of zip_packer::extract_to_pathname()
Browse files Browse the repository at this point in the history
The original implementation was based on ZipArchive::getStream() which
turns out to be very slow and if the archive contains many files, the
unzipping performance is very slow.

The patch changes the implementation to use ZipArchive::extractTo()
unless the extracted entry path contains a folder name ending with dot
(such as some/path./to/file.txt). There is a known upstream bug in the
PHP ZIP extension #77214 (also #74619 and #69477) so that we fall back
to keep using the stream in those cases.
  • Loading branch information
mudrd8mz committed Feb 3, 2021
1 parent cd2e91a commit 31f71d9
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 15 deletions.
49 changes: 49 additions & 0 deletions lib/filestorage/tests/zip_packer_test.php
Expand Up @@ -254,6 +254,55 @@ public function test_extract_to_pathname() {
}
}

/**
* Test functionality of {@see zip_packer} for entries with folders ending with dots.
*
* @link https://bugs.php.net/bug.php?id=77214
*/
public function test_zip_entry_path_having_folder_ending_with_dot() {
$this->resetAfterTest(false);

$packer = get_file_packer('application/zip');
$tmp = make_request_directory();
$now = time();

// Create a test archive containing a folder ending with dot.
$zippath = $tmp . '/test_archive.zip';
$zipcontents = [
'HOW.TO' => ['Just run tests.'],
'README.' => ['This is a test ZIP file'],
'./Current time' => [$now],
'Data/sub1./sub2/1221' => ['1221'],
'Data/sub1./sub2./Příliš žluťoučký kůň úpěl Ďábelské Ódy.txt' => [''],
];

// Check that the archive can be created.
$result = $packer->archive_to_pathname($zipcontents, $zippath, false);
$this->assertTrue($result);

// Check list of files.
$listfiles = $packer->list_files($zippath);
$this->assertEquals(count($zipcontents), count($listfiles));

foreach ($listfiles as $fileinfo) {
$this->assertSame($fileinfo->pathname, $fileinfo->original_pathname);
$this->assertArrayHasKey($fileinfo->pathname, $zipcontents);
}

// Check actual extracting.
$targetpath = $tmp . '/target';
check_dir_exists($targetpath);
$result = $packer->extract_to_pathname($zippath, $targetpath, null, null, true);

$this->assertTrue($result);

foreach ($zipcontents as $filename => $filecontents) {
$filecontents = reset($filecontents);
$this->assertTrue(is_readable($targetpath . '/' . $filename));
$this->assertEquals($filecontents, file_get_contents($targetpath . '/' . $filename));
}
}

/**
* @depends test_archive_to_storage
*/
Expand Down
22 changes: 22 additions & 0 deletions lib/filestorage/zip_archive.php
Expand Up @@ -254,6 +254,28 @@ public function get_stream($index) {
return $this->za->getStream($name);
}

/**
* Extract the archive contents to the given location.
*
* @param string $destination Path to the location where to extract the files.
* @param int $index Index of the archive entry.
* @return bool true on success or false on failure
*/
public function extract_to($destination, $index) {

if (!isset($this->za)) {
return false;
}

$name = $this->za->getNameIndex($index);

if ($name === false) {
return false;
}

return $this->za->extractTo($destination, $name);
}

/**
* Returns file information.
*
Expand Down
55 changes: 40 additions & 15 deletions lib/filestorage/zip_packer.php
Expand Up @@ -341,33 +341,58 @@ public function extract_to_pathname($archivefile, $pathname,
}

$newfile = "$newdir/$filename";
if (!$fp = fopen($newfile, 'wb')) {
$processed[$name] = 'Can not write target file'; // TODO: localise
$success = false;
continue;

if (strpos($newfile, './') > 1) {
// The path to the entry contains a directory ending with dot. We cannot use extract_to() due to
// upstream PHP bugs #69477, #74619 and #77214. Extract the file from its stream which is slower but
// should work even in this case.
if (!$fp = fopen($newfile, 'wb')) {
$processed[$name] = 'Can not write target file'; // TODO: localise.
$success = false;
continue;
}

if (!$fz = $ziparch->get_stream($info->index)) {
$processed[$name] = 'Can not read file from zip archive'; // TODO: localise.
$success = false;
fclose($fp);
continue;
}

while (!feof($fz)) {
$content = fread($fz, 262143);
fwrite($fp, $content);
}

fclose($fz);
fclose($fp);

} else {
if (!$fz = $ziparch->extract_to($pathname, $info->index)) {
$processed[$name] = 'Can not read file from zip archive'; // TODO: localise.
$success = false;
continue;
}
}
if (!$fz = $ziparch->get_stream($info->index)) {
$processed[$name] = 'Can not read file from zip archive'; // TODO: localise

// Check that the file was correctly created in the destination.
if (!file_exists($newfile)) {
$processed[$name] = 'Unknown error during zip extraction (file not created).'; // TODO: localise.
$success = false;
fclose($fp);
continue;
}

while (!feof($fz)) {
$content = fread($fz, 262143);
fwrite($fp, $content);
}
fclose($fz);
fclose($fp);
// Check that the size of extracted file matches the expectation.
if (filesize($newfile) !== $size) {
$processed[$name] = 'Unknown error during zip extraction'; // TODO: localise
$processed[$name] = 'Unknown error during zip extraction (file size mismatch).'; // TODO: localise.
$success = false;
// something went wrong :-(
@unlink($newfile);
continue;
}

$processed[$name] = true;
}

$ziparch->close();

if ($returnbool) {
Expand Down

0 comments on commit 31f71d9

Please sign in to comment.