Skip to content

Commit

Permalink
Remove CompStatus::OTHER and use exception for error management.
Browse files Browse the repository at this point in the history
`CompStatus::OTHER` is not really used and is mainly used as a error
code (if used).

Let's remove it and directly throw a exception instead of returning it.

This commit make the ZSTD/LZMA error correctly handled
(at least not silently ignored) during (de)compression.

See kiwix/libkiwix#515
  • Loading branch information
mgautierfr committed Jun 1, 2021
1 parent a5c4bc3 commit a7201ce
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 56 deletions.
24 changes: 15 additions & 9 deletions src/compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,19 @@ CompStatus LZMA_INFO::stream_run_decode(stream_t* stream, CompStep step) {
CompStatus LZMA_INFO::stream_run(stream_t* stream, CompStep step)
{
auto errcode = lzma_code(stream, step==CompStep::STEP?LZMA_RUN:LZMA_FINISH);
if (errcode == LZMA_BUF_ERROR)
return CompStatus::BUF_ERROR;
if (errcode == LZMA_STREAM_END)
return CompStatus::STREAM_END;
if (errcode == LZMA_OK)
return CompStatus::OK;
return CompStatus::OTHER;
switch(errcode) {
case LZMA_BUF_ERROR:
return CompStatus::BUF_ERROR;
case LZMA_STREAM_END:
return CompStatus::STREAM_END;
case LZMA_OK:
return CompStatus::OK;
default: {
std::ostringstream ss;
ss << "Unexpected lzma status : " << errcode;
throw std::runtime_error(ss.str());
}
}
}

void LZMA_INFO::stream_end_decode(stream_t* stream)
Expand Down Expand Up @@ -115,7 +121,7 @@ CompStatus ZSTD_INFO::stream_run_encode(stream_t* stream, CompStep step) {
stream->total_out += outBuf.pos;

if (::ZSTD_isError(ret)) {
return CompStatus::OTHER;
throw std::runtime_error(::ZSTD_getErrorName(ret));
}

if ( step == CompStep::STEP ) {
Expand Down Expand Up @@ -149,7 +155,7 @@ CompStatus ZSTD_INFO::stream_run_decode(stream_t* stream, CompStep /*step*/) {
stream->total_out += outBuf.pos;

if (::ZSTD_isError(ret))
return CompStatus::OTHER;
throw std::runtime_error(::ZSTD_getErrorName(ret));

if (ret == 0)
return CompStatus::STREAM_END;
Expand Down
103 changes: 56 additions & 47 deletions src/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ enum class CompStatus {
OK,
STREAM_END,
BUF_ERROR,
OTHER
};

enum class RunnerStatus {
Expand Down Expand Up @@ -98,43 +97,49 @@ class Uncompressor
RunnerStatus feed(char* data, size_t size, CompStep step = CompStep::STEP) {
stream.next_in = (unsigned char*)data;
stream.avail_in = size;
auto errcode = CompStatus::OTHER;
while (true) {
errcode = INFO::stream_run_decode(&stream, step);
auto errcode = INFO::stream_run_decode(&stream, step);
DEB((int)errcode)
if (errcode == CompStatus::BUF_ERROR) {
if (stream.avail_in == 0 && stream.avail_out != 0) {
// End of input stream.
// compressor hasn't recognize the end of the input stream but there is
// no more input.
return RunnerStatus::NEED_MORE;
} else {
//Not enought output size
DEB("need memory " << data_size << " " << stream.avail_out << " " << stream.total_out)
data_size *= 2;
std::unique_ptr<char[]> new_ret_data(new char[data_size]);
memcpy(new_ret_data.get(), ret_data.get(), stream.total_out);
stream.next_out = (unsigned char*)(new_ret_data.get() + stream.total_out);
stream.avail_out = data_size - stream.total_out;
DEB(data_size << " " << stream.avail_out << " " << stream.avail_in)
ret_data = std::move(new_ret_data);
continue;
}
}
if (errcode == CompStatus::STREAM_END)
break;
// On first call where lzma cannot progress (no output size).
// Lzma return OK. If we return NEED_MORE, then we will try to compress
// with new input data, but we should not as current one is not processed.
// We must do a second step to have te BUF_ERROR and handle thing correctly.
if (errcode == CompStatus::OK) {
if (stream.avail_in == 0)
switch (errcode) {
case CompStatus::BUF_ERROR:
if (stream.avail_in == 0 && stream.avail_out != 0) {
// End of input stream.
// compressor hasn't recognize the end of the input stream but there is
// no more input.
return RunnerStatus::NEED_MORE;
} else {
// Not enought output size.
// Allocate more memory and continue the loop.
DEB("need memory " << data_size << " " << stream.avail_out << " " << stream.total_out)
data_size *= 2;
std::unique_ptr<char[]> new_ret_data(new char[data_size]);
memcpy(new_ret_data.get(), ret_data.get(), stream.total_out);
stream.next_out = (unsigned char*)(new_ret_data.get() + stream.total_out);
stream.avail_out = data_size - stream.total_out;
DEB(data_size << " " << stream.avail_out << " " << stream.avail_in)
ret_data = std::move(new_ret_data);
}
break;
case CompStatus::OK:
// On first call where lzma cannot progress (no output size).
// Lzma return OK. If we return NEED_MORE, then we will try to compress
// with new input data, but we should not as current one is not processed.
// We must do a second step to have te BUF_ERROR and handle thing correctly.
// If we have no more input, then we must ask for more.
if (stream.avail_in == 0) {
return RunnerStatus::NEED_MORE;
}
break;
continue;
case CompStatus::STREAM_END:
// End of compressed stream. Everything is ok.
return RunnerStatus::OK;
default:
// unreachable
return RunnerStatus::ERROR;
}
return RunnerStatus::ERROR;
};
return errcode==CompStatus::STREAM_END?RunnerStatus::OK:RunnerStatus::NEED_MORE;
// unreachable
return RunnerStatus::NEED_MORE;
}

std::unique_ptr<char[]> get_data(zim::zsize_t* size) {
Expand Down Expand Up @@ -215,16 +220,20 @@ class Compressor
RunnerStatus feed(const char* data, size_t size, CompStep step=CompStep::STEP) {
stream.next_in = (unsigned char*)data;
stream.avail_in = size;
auto errcode = CompStatus::OTHER;
while (true) {
errcode = INFO::stream_run_encode(&stream, step);
if (stream.avail_out == 0) {
if (errcode == CompStatus::OK) {
// lzma return a OK return status the first time it runs out of output memory.
// The BUF_ERROR is returned only the second time we call a lzma_code.
continue;
}
if (errcode == CompStatus::BUF_ERROR) {
auto errcode = INFO::stream_run_encode(&stream, step);
switch (errcode) {
case CompStatus::OK:
if (stream.avail_out == 0) {
// lzma return a OK return status the first time it runs out of output memory.
// The BUF_ERROR is returned only the second time we call a lzma_code.
continue;
} else {
return RunnerStatus::NEED_MORE;
}
case CompStatus::STREAM_END:
return RunnerStatus::NEED_MORE;
case CompStatus::BUF_ERROR: {
//Not enought output size
ret_size *= 2;
std::unique_ptr<char[]> new_ret_data(new char[ret_size]);
Expand All @@ -234,13 +243,13 @@ class Compressor
ret_data = std::move(new_ret_data);
continue;
}
}
if (errcode == CompStatus::STREAM_END || errcode == CompStatus::OK) {
// Everything ok, quit the loop
break;
}
return RunnerStatus::ERROR;
default:
// unreachable
return RunnerStatus::ERROR;
};
};
// urreachable
return RunnerStatus::NEED_MORE;
}

Expand Down
4 changes: 4 additions & 0 deletions src/decoderstreamreader.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ class DecoderStreamReader : public IStreamReader
m_decoderState.avail_out = nbytes.v;
while ( m_decoderState.avail_out != 0 )
{
// We don't car of the return code of decodeMoreBytes.
// We feed (or stop feeding) the decoder based on what
// we need to decode and the `avail_in`.
// If there is a error somehow, a exception will be thrown.
decodeMoreBytes();
}
}
Expand Down

0 comments on commit a7201ce

Please sign in to comment.