Skip to content

Commit

Permalink
[FIX] compression streams writing wrong format
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Mar 21, 2021
1 parent de4db1f commit 0a1bd47
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 11 deletions.
2 changes: 1 addition & 1 deletion include/seqan3/io/detail/misc_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <string>
#include <tuple>


#ifdef SEQAN3_HAS_BZIP2
#include <seqan3/contrib/stream/bz2_istream.hpp>
#endif
Expand All @@ -31,6 +30,7 @@
#include <seqan3/contrib/stream/gz_istream.hpp>
#endif
#include <seqan3/io/detail/magic_header.hpp>
#include <seqan3/io/exception.hpp>
#include <seqan3/utility/detail/exposition_only_concept.hpp>

namespace seqan3::detail
Expand Down
21 changes: 15 additions & 6 deletions include/seqan3/io/detail/misc_output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,20 @@

#pragma once

#include <seqan3/std/filesystem>
#include <iostream>
#include <string>
#include <tuple>

#include <seqan3/utility/detail/exposition_only_concept.hpp>
#ifdef SEQAN3_HAS_BZIP2
#include <seqan3/contrib/stream/bz2_ostream.hpp>
#endif
#ifdef SEQAN3_HAS_ZLIB
#include <seqan3/contrib/stream/bgzf_ostream.hpp>
#include <seqan3/contrib/stream/gz_ostream.hpp>
#endif
#include <seqan3/std/filesystem>
#include <seqan3/io/exception.hpp>
#include <seqan3/utility/detail/exposition_only_concept.hpp>

namespace seqan3::detail
{
Expand All @@ -46,16 +47,24 @@ inline auto make_secondary_ostream(std::basic_ostream<char_t> & primary_stream,

std::string extension = filename.extension().string();

if ((extension == ".gz") || (extension == ".bgzf") || (extension == ".bam"))
if (extension == ".gz")
{
#ifdef SEQAN3_HAS_ZLIB
filename.replace_extension("");
return {new contrib::basic_gz_ostream<char_t>{primary_stream}, stream_deleter_default};
#else
throw file_open_error{"Trying to write a gzipped file, but no ZLIB available."};
#endif
}
else if ((extension == ".bgzf") || (extension == ".bam"))
{
#ifdef SEQAN3_HAS_ZLIB
if (extension != ".bam") // remove extension except for bam
filename.replace_extension("");

return {new contrib::basic_bgzf_ostream<char_t>{primary_stream},
stream_deleter_default};
return {new contrib::basic_bgzf_ostream<char_t>{primary_stream}, stream_deleter_default};
#else
throw file_open_error{"Trying to write a gzipped file, but no ZLIB available."};
throw file_open_error{"Trying to write a bgzf'ed file, but no ZLIB available."};
#endif
}
else if (extension == ".bz2")
Expand Down
55 changes: 54 additions & 1 deletion test/unit/io/detail/misc_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@

#include <gtest/gtest.h>

#include <fstream>
#include <seqan3/std/ranges>
#include <string>
#include <vector>

#include <seqan3/io/detail/magic_header.hpp>
#include <seqan3/io/detail/misc.hpp>
#include <seqan3/std/ranges>
#include <seqan3/io/detail/misc_input.hpp>
#include <seqan3/io/detail/misc_output.hpp>
#include <seqan3/test/expect_same_type.hpp>
#include <seqan3/test/tmp_filename.hpp>

struct dummy_file
Expand Down Expand Up @@ -70,3 +74,52 @@ TEST(misc, valid_compression_extensions)
EXPECT_TRUE(std::find(valid_compression.begin(), valid_compression.end(), "zst") != valid_compression.end());
#endif
}

template <typename compression_t>
class misc : public ::testing::Test
{
public:
static inline std::string const compression_extension = "." + compression_t::file_extensions[0];
static constexpr auto expected_magic_header = compression_t::magic_header;
static constexpr size_t biggest_magic_header_size = seqan3::detail::bgzf_compression::magic_header.size();

std::filesystem::path file_path() const
{
return tmp_file.get_path();
}

private:
std::string const tmp_file_name = "io_misc_test.txt" + compression_extension;
seqan3::test::tmp_filename const tmp_file{tmp_file_name.c_str()};
};

using compression_types = ::testing::Types<seqan3::detail::gz_compression,
seqan3::detail::bgzf_compression,
seqan3::detail::bz2_compression>;

TYPED_TEST_SUITE(misc, compression_types, );

// https://github.com/seqan/seqan3/issues/2455
TYPED_TEST(misc, issue2455)
{
{
auto file_path = this->file_path();
std::ofstream filestream{file_path};
auto stream_ptr = seqan3::detail::make_secondary_ostream(filestream, file_path);
*stream_ptr << std::string(this->biggest_magic_header_size, 'a') << '\n';
}

std::ifstream filestream{this->file_path()};
std::array<char, this->biggest_magic_header_size> magic_header{};
std::copy_n(std::istreambuf_iterator{filestream}, this->biggest_magic_header_size, magic_header.begin());

if constexpr (std::same_as<TypeParam, seqan3::detail::bgzf_compression>)
{
EXPECT_TRUE(TypeParam::validate_header(std::span{magic_header}));
}
else
{
EXPECT_TRUE(seqan3::detail::starts_with(magic_header, this->expected_magic_header));
EXPECT_FALSE(seqan3::detail::bgzf_compression::validate_header(std::span{magic_header}));
}
}
2 changes: 1 addition & 1 deletion test/unit/io/sam_file/sam_file_output_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ TEST(compression, by_filename_gz)

std::string buffer = compression_by_filename_impl(filename);
buffer[9] = '\x00'; // zero out OS byte.
EXPECT_EQ(buffer, expected_bgzf);
EXPECT_EQ(buffer, expected_gz);
}

TEST(compression, by_stream_gz)
Expand Down
2 changes: 1 addition & 1 deletion test/unit/io/sequence_file/sequence_file_output_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ TEST(compression, by_filename_gz)

std::string buffer = compression_by_filename_impl(filename);
buffer[9] = '\x00'; // zero out OS byte
EXPECT_EQ(buffer, expected_bgzf);
EXPECT_EQ(buffer, expected_gz);
}

TEST(compression, by_stream_gz)
Expand Down
2 changes: 1 addition & 1 deletion test/unit/io/structure_file/structure_file_output_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ TEST_F(structure_file_output_compression, by_filename_gz)
seqan3::test::tmp_filename filename{"structure_file_output_test.dbn.gz"};
std::string buffer = compression_by_filename_impl(filename);
buffer[9] = '\x00'; // zero out OS byte
EXPECT_EQ(buffer, expected_bgzf);
EXPECT_EQ(buffer, expected_gz);
}

TEST_F(structure_file_output_compression, by_stream_gz)
Expand Down

0 comments on commit 0a1bd47

Please sign in to comment.