Skip to content

Commit

Permalink
[MISC] major break: new hibf
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Nov 1, 2023
1 parent f8de565 commit 3ca0fab
Show file tree
Hide file tree
Showing 38 changed files with 256 additions and 357 deletions.
1 change: 1 addition & 0 deletions include/raptor/build/store_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#pragma once

#include <filesystem>
#include <fstream>

#include <raptor/index.hpp>
#include <raptor/strong_types.hpp>
Expand Down
78 changes: 3 additions & 75 deletions include/raptor/index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

#pragma once

#include <cereal/types/string.hpp>

#include <sharg/exceptions.hpp>

#include <raptor/argument_parsing/build_arguments.hpp>
Expand Down Expand Up @@ -51,14 +53,13 @@ class raptor_index
uint64_t window_size_{};
seqan3::shape shape_{};
uint8_t parts_{};
bool compressed_{false};
std::vector<std::vector<std::string>> bin_path_{};
double fpr_{};
bool is_hibf_{index_structure::is_hibf<data_t>};
data_t ibf_{};

public:
static constexpr uint32_t version{2u};
static constexpr uint32_t version{3u};

raptor_index() = default;
raptor_index(raptor_index const &) = default;
Expand Down Expand Up @@ -107,11 +108,6 @@ class raptor_index
return parts_;
}

bool compressed() const
{
return compressed_;
}

std::vector<std::vector<std::string>> const & bin_path() const
{
return bin_path_;
Expand Down Expand Up @@ -157,9 +153,6 @@ class raptor_index
archive(window_size_);
archive(shape_);
archive(parts_);
archive(compressed_);
if (compressed_)
throw sharg::parser_error{"Index cannot be compressed."};
archive(bin_path_);
archive(fpr_);
archive(is_hibf_);
Expand Down Expand Up @@ -196,7 +189,6 @@ class raptor_index
archive(window_size_);
archive(shape_);
archive(parts_);
archive(compressed_);
archive(bin_path_);
archive(fpr_);
archive(is_hibf_);
Expand All @@ -213,70 +205,6 @@ class raptor_index
throw sharg::parser_error{"Unsupported index version. Check raptor upgrade."}; // GCOVR_EXCL_LINE
}
}

//!\brief Load parameters from old index format for use with raptor upgrade.
template <seqan3::cereal_input_archive archive_t>
void load_old_parameters(archive_t & archive)
{
uint32_t parsed_version{};
archive(parsed_version);
if (parsed_version == 1u)
{
try
{
archive(window_size_);
archive(shape_);
archive(parts_);
archive(compressed_);
archive(bin_path_);
}
// GCOVR_EXCL_START
catch (std::exception const & e)
{
throw sharg::parser_error{"Cannot read index: " + std::string{e.what()}};
}
// GCOVR_EXCL_STOP
}
else
{
throw sharg::parser_error{"Unsupported index version. Use Raptor 2.0's upgrade first."}; // LCOV_EXCL_LINE
}
}
//!\endcond

private:
friend class index_upgrader;

//!\cond DEV
//!\brief Load old index format for use with raptor upgrade.
template <seqan3::cereal_archive archive_t>
void load_old_index(archive_t & archive)
{
uint32_t parsed_version{};
archive(parsed_version);
if (parsed_version == 1u)
{
try
{
archive(window_size_);
archive(shape_);
archive(parts_);
archive(compressed_);
archive(bin_path_);
archive(ibf_);
}
// GCOVR_EXCL_START
catch (std::exception const & e)
{
throw sharg::parser_error{"Cannot read index: " + std::string{e.what()}};
}
// GCOVR_EXCL_STOP
}
else
{
throw sharg::parser_error{"Unsupported index version. Use Raptor 2.0's upgrade first."}; // LCOV_EXCL_LINE
}
}
//!\endcond
};

Expand Down
86 changes: 43 additions & 43 deletions include/raptor/upgrade/index_upgrader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,52 +18,52 @@
namespace raptor
{

class index_upgrader
{
public:
std::string index_file{};
std::string output_file{};
double fpr{};
size_t max_count{};
// class index_upgrader
// {
// public:
// std::string index_file{};
// std::string output_file{};
// double fpr{};
// size_t max_count{};

index_upgrader() = default;
index_upgrader(index_upgrader const &) = default;
index_upgrader(index_upgrader &&) = default; // GCOVR_EXCL_LINE
index_upgrader & operator=(index_upgrader const &) = default;
index_upgrader & operator=(index_upgrader &&) = default;
~index_upgrader() = default;
// index_upgrader() = default;
// index_upgrader(index_upgrader const &) = default;
// index_upgrader(index_upgrader &&) = default; // GCOVR_EXCL_LINE
// index_upgrader & operator=(index_upgrader const &) = default;
// index_upgrader & operator=(index_upgrader &&) = default;
// ~index_upgrader() = default;

explicit index_upgrader(upgrade_arguments const & arguments, size_t const max_count) :
index_file{arguments.index_file},
output_file{arguments.output_file},
fpr{arguments.fpr},
max_count{max_count}
{}
// explicit index_upgrader(upgrade_arguments const & arguments, size_t const max_count) :
// index_file{arguments.index_file},
// output_file{arguments.output_file},
// fpr{arguments.fpr},
// max_count{max_count}
// {}

void upgrade()
{
raptor_index<index_structure::ibf> index{};
{
std::ifstream is{index_file, std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
index.load_old_index(iarchive);
}
if (std::isnan(fpr))
fpr = compute_fpr(index.ibf().hash_function_count(), max_count, index.ibf().bin_size());
index.fpr_ = fpr;
std::cout << "FPR for " << index_file << ": " << fpr << '\n';
index.is_hibf_ = false;
std::ofstream os{output_file, std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(index);
}
// void upgrade()
// {
// raptor_index<index_structure::ibf> index{};
// {
// std::ifstream is{index_file, std::ios::binary};
// cereal::BinaryInputArchive iarchive{is};
// index.load_old_index(iarchive);
// }
// if (std::isnan(fpr))
// fpr = compute_fpr(index.ibf().hash_function_count(), max_count, index.ibf().bin_size());
// index.fpr_ = fpr;
// std::cout << "FPR for " << index_file << ": " << fpr << '\n';
// index.is_hibf_ = false;
// std::ofstream os{output_file, std::ios::binary};
// cereal::BinaryOutputArchive oarchive{os};
// oarchive(index);
// }

static double compute_fpr(size_t const hash_fun, size_t const count, size_t const bin_size)
{
double const exp_arg = (hash_fun * count) / static_cast<double>(bin_size);
double const log_arg = 1.0 - std::exp(-exp_arg);
return std::exp(hash_fun * std::log(log_arg));
}
};
// static double compute_fpr(size_t const hash_fun, size_t const count, size_t const bin_size)
// {
// double const exp_arg = (hash_fun * count) / static_cast<double>(bin_size);
// double const log_arg = 1.0 - std::exp(-exp_arg);
// return std::exp(hash_fun * std::log(log_arg));
// }
// };

} // namespace raptor
4 changes: 2 additions & 2 deletions include/raptor/version.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
#include <cstdint>

//!\brief The major version as MACRO.
#define RAPTOR_VERSION_MAJOR 3
#define RAPTOR_VERSION_MAJOR 4
//!\brief The minor version as MACRO.
#define RAPTOR_VERSION_MINOR 1
#define RAPTOR_VERSION_MINOR 0
//!\brief The patch version as MACRO.
#define RAPTOR_VERSION_PATCH 0
//!\brief The release candidate number. 0 means stable release, >= 1 means release candidate.
Expand Down
Loading

0 comments on commit 3ca0fab

Please sign in to comment.