Skip to content

Commit

Permalink
Improved performance for solid archive extraction (particularly LZMA1…
Browse files Browse the repository at this point in the history
…) by caching decompressed buffers
  • Loading branch information
onitake committed Dec 27, 2015
1 parent 67d708b commit 419260e
Showing 1 changed file with 84 additions and 56 deletions.
140 changes: 84 additions & 56 deletions Setup/Inno/Interpret4000.pm
Original file line number Diff line number Diff line change
Expand Up @@ -92,72 +92,100 @@ sub DiskInfo {
sub ReadFile {
my ($self, $input, $header, $location, $offset1, $password, @slices) = @_;

# Note: once we support decryption, make sure the password is interpreted as UTF-16LE (why?)
if ($location->{Flags}->{ChunkEncrypted} || $location->{Flags}->{foChunkEncrypted}) {
!defined($password) && croak("File is encrypted, but no password was given");
croak("Encryption is not supported yet");
}

my $buffer;

if (@slices > 1) {
my $i = 0;
my $size = $location->{ChunkCompressedSize} + 4;
my $offset = $offset1 + $location->{StartOffset} - $slices[$i]->{SliceOffset};
my $available = $slices[$i]->{Size} - $offset;
my $slicesize = $available < $size ? $available : $size;
$slices[$i]->{Input}->seek($offset, Fcntl::SEEK_SET);
$slices[$i]->{Input}->read($buffer, $slicesize);
my $slicedata = $buffer;
$size -= $slicesize;
$i++;
while ($i < @slices && $size > 0) {
$offset = $slices[$i]->{DataOffset};
$available = $slices[$i]->{Size};
$slicesize = $available < $size ? $available : $size;
# Check if we have a cached chunk and verify it is the same chunk
# and that we can reach our data with only forward seeking
if (
defined($self->{ChunkState}) &&
$self->{ChunkState}->{FirstSlice} == $location->{FirstSlice} &&
$self->{ChunkState}->{LastSlice} == $location->{LastSlice} &&
$self->{ChunkState}->{StartOffset} == $location->{StartOffset} &&
$self->{ChunkState}->{ChunkCompressedSize} == $location->{ChunkCompressedSize} &&
$self->{ChunkState}->{ChunkSuboffset} <= $location->{ChunkSuboffset}
) {
# Yes, use the cached reader
} else {
# No, create a new reader

# Note: once we support decryption, make sure the password is interpreted as UTF-16LE (why?)
if ($location->{Flags}->{ChunkEncrypted} || $location->{Flags}->{foChunkEncrypted}) {
!defined($password) && croak("File is encrypted, but no password was given");
croak("Encryption is not supported yet");
}

my $buffer;

if (@slices > 1) {
my $i = 0;
my $size = $location->{ChunkCompressedSize} + 4;
my $offset = $offset1 + $location->{StartOffset} - $slices[$i]->{SliceOffset};
my $available = $slices[$i]->{Size} - $offset;
my $slicesize = $available < $size ? $available : $size;
$slices[$i]->{Input}->seek($offset, Fcntl::SEEK_SET);
$slices[$i]->{Input}->read($buffer, $slicesize);
$slicedata .= $buffer;
my $slicedata = $buffer;
$size -= $slicesize;
$i++;
while ($i < @slices && $size > 0) {
$offset = $slices[$i]->{DataOffset};
$available = $slices[$i]->{Size};
$slicesize = $available < $size ? $available : $size;
$slices[$i]->{Input}->seek($offset, Fcntl::SEEK_SET);
$slices[$i]->{Input}->read($buffer, $slicesize);
$slicedata .= $buffer;
$size -= $slicesize;
$i++;
}
# Replace input handle with virtual handle over concatenated data
# This requires Perl 5.6 or later, use IO::String or IO::Scalar for earlier versions
$input = IO::File->new(\$slicedata, 'r') || croak("Can't create file handle for preprocessed data: $!");
} else {
$input->seek($offset1 + $location->{StartOffset}, Fcntl::SEEK_SET);
}
# Replace input handle with virtual handle over concatenated data
# This requires Perl 5.6 or later, use IO::String or IO::Scalar for earlier versions
$input = IO::File->new(\$slicedata, 'r') || croak("Can't create file handle for preprocessed data: $!");
} else {
$input->seek($offset1 + $location->{StartOffset}, Fcntl::SEEK_SET);
}

$input->read($buffer, 4) || croak("Can't read compressed block magic: $!");
($buffer eq $ZLIBID) || croak("Compressed block ID invalid");

$input->read($buffer, 4) || croak("Can't read compressed block magic: $!");
($buffer eq $ZLIBID) || croak("Compressed block ID invalid");

my $reader;
if ($location->{Flags}->{ChunkCompressed} || $location->{Flags}->{foChunkCompressed}) {
given ($self->Compression1($header)) {
when (/Zip$/i) {
$reader = IO::Uncompress::AnyInflate->new($input, Transparent => 0) || croak("Can't create zlib reader: $!");
}
when (/Bzip$/i) {
$reader = IO::Uncompress::Bunzip2->new($input, Transparent => 0) || croak("Can't create bzip2 reader: $!");
}
when (/Lzma$/i) {
$reader = Setup::Inno::LzmaReader->new($input, $location->{ChunkCompressedSize}) || croak("Can't create lzma reader: $!");
}
when (/Lzma2$/i) {
$reader = Setup::Inno::Lzma2Reader->new($input, $location->{ChunkCompressedSize}) || croak("Can't create lzma2 reader: $!");
}
default {
# Plain reader for stored mode
$reader = $input;
my $reader;
if ($location->{Flags}->{ChunkCompressed} || $location->{Flags}->{foChunkCompressed}) {
given ($self->Compression1($header)) {
when (/Zip$/i) {
$reader = IO::Uncompress::AnyInflate->new($input, Transparent => 0) || croak("Can't create zlib reader: $!");
}
when (/Bzip$/i) {
$reader = IO::Uncompress::Bunzip2->new($input, Transparent => 0) || croak("Can't create bzip2 reader: $!");
}
when (/Lzma$/i) {
$reader = Setup::Inno::LzmaReader->new($input, $location->{ChunkCompressedSize}) || croak("Can't create lzma reader: $!");
}
when (/Lzma2$/i) {
$reader = Setup::Inno::Lzma2Reader->new($input, $location->{ChunkCompressedSize}) || croak("Can't create lzma2 reader: $!");
}
default {
# Plain reader for stored mode
$reader = $input;
}
}
} else {
$reader = $input;
}
} else {
$reader = $input;

# Update the reader state
$self->{ChunkState} = {
FirstSlice => $location->{FirstSlice},
LastSlice => $location->{LastSlice},
StartOffset => $location->{StartOffset},
ChunkCompressedSize => $location->{ChunkCompressedSize},
ChunkSuboffset => 0,
Reader => $reader,
};
}

$reader->seek($location->{ChunkSuboffset}, Fcntl::SEEK_CUR);
($reader->read($buffer, $location->{OriginalSize}) >= $location->{OriginalSize}) || croak("Can't uncompress file: $!");

$self->{ChunkState}->{Reader}->seek($location->{ChunkSuboffset} - $self->{ChunkState}->{ChunkSuboffset}, Fcntl::SEEK_CUR);
($self->{ChunkState}->{Reader}->read(my $buffer, $location->{OriginalSize}) >= $location->{OriginalSize}) || croak("Can't uncompress file: $!");

# Update the offset
$self->{ChunkState}->{ChunkSuboffset} = $location->{ChunkSuboffset} + $location->{OriginalSize};

if ($location->{Flags}->{CallInstructionOptimized} || $location->{Flags}->{foCallInstructionOptimized}) {
# We could just transform the whole data, but this will expose a flaw in the original algorithm:
# It doesn't detect jump instructions across block boundaries.
Expand Down

0 comments on commit 419260e

Please sign in to comment.