Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tools: add rime_table_decompiler #706

Merged
merged 4 commits into from Sep 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 0 additions & 31 deletions src/rime/dict/table.cc
Expand Up @@ -21,37 +21,6 @@ const int kTableFormatLowestCompatible = 4.0;
const char kTableFormatPrefix[] = "Rime::Table/";
const size_t kTableFormatPrefixLen = sizeof(kTableFormatPrefix) - 1;

class TableQuery {
public:
TableQuery(table::Index* index) : lv1_index_(index) { Reset(); }

TableAccessor Access(SyllableId syllable_id, double credibility = 0.0) const;

// down to next level
bool Advance(SyllableId syllable_id, double credibility = 0.0);

// up one level
bool Backdate();

// back to root
void Reset();

size_t level() const { return level_; }

protected:
size_t level_ = 0;
Code index_code_;
vector<double> credibility_;

private:
bool Walk(SyllableId syllable_id);

table::HeadIndex* lv1_index_ = nullptr;
table::TrunkIndex* lv2_index_ = nullptr;
table::TrunkIndex* lv3_index_ = nullptr;
table::TailIndex* lv4_index_ = nullptr;
};

TableAccessor::TableAccessor(const Code& index_code,
const List<table::Entry>* list,
double credibility)
Expand Down
33 changes: 32 additions & 1 deletion src/rime/dict/table.h
Expand Up @@ -128,7 +128,37 @@ class TableAccessor {
using TableQueryResult = map<int, vector<TableAccessor>>;

struct SyllableGraph;
class TableQuery;

class TableQuery {
public:
TableQuery(table::Index* index) : lv1_index_(index) { Reset(); }

TableAccessor Access(SyllableId syllable_id, double credibility = 0.0) const;

// down to next level
bool Advance(SyllableId syllable_id, double credibility = 0.0);

// up one level
bool Backdate();

// back to root
void Reset();

size_t level() const { return level_; }

protected:
size_t level_ = 0;
Code index_code_;
vector<double> credibility_;

private:
bool Walk(SyllableId syllable_id);

table::HeadIndex* lv1_index_ = nullptr;
table::TrunkIndex* lv2_index_ = nullptr;
table::TrunkIndex* lv3_index_ = nullptr;
table::TailIndex* lv4_index_ = nullptr;
};

class Table : public MappedFile {
public:
Expand All @@ -152,6 +182,7 @@ class Table : public MappedFile {
RIME_API string GetEntryText(const table::Entry& entry);

uint32_t dict_file_checksum() const;
table::Metadata* metadata() const { return metadata_; }

private:
table::Index* BuildIndex(const Vocabulary& vocabulary, size_t num_syllables);
Expand Down
13 changes: 13 additions & 0 deletions tools/CMakeLists.txt
Expand Up @@ -37,8 +37,21 @@ target_link_libraries(rime_deployer
${rime_dict_library}
${rime_levers_library})

set(rime_table_decompiler_src
"rime_table_decompiler.cc"
${CMAKE_SOURCE_DIR}/src/rime/dict/table.cc
${CMAKE_SOURCE_DIR}/src/rime/dict/mapped_file.cc
${CMAKE_SOURCE_DIR}/src/rime/dict/string_table.cc
${CMAKE_SOURCE_DIR}/src/rime/dict/vocabulary.cc
)
add_executable(rime_table_decompiler ${rime_table_decompiler_src})
target_link_libraries(rime_table_decompiler
${rime_library}
${rime_dict_library})

install(TARGETS rime_deployer DESTINATION ${BIN_INSTALL_DIR})
install(TARGETS rime_dict_manager DESTINATION ${BIN_INSTALL_DIR})
install(TARGETS rime_table_decompiler DESTINATION ${BIN_INSTALL_DIR})

install(TARGETS rime_patch DESTINATION ${BIN_INSTALL_DIR})

Expand Down
124 changes: 124 additions & 0 deletions tools/rime_table_decompiler.cc
@@ -0,0 +1,124 @@
// rime_table_decompiler.cc
// nopdan <me@nopdan.com>
//
#include <cmath>
#include <fstream>
#include <iomanip>
#include <ios>
#include <iostream>
#include <string>
#include <rime/dict/table.h>

// usage:
// rime_table_decompiler <rime-table-file> [save-path]
// example:
// rime_table_decompiler pinyin.table.bin pinyin.dict.yaml

void outCode(rime::Table* table, const rime::Code code, std::ofstream& fout) {
if (code.empty()) {
return;
}
auto item = code.begin();
fout << table->GetSyllableById(*item);
item++;
for (; item != code.end(); ++item) {
fout << " ";
fout << table->GetSyllableById(*item);
}
return;
}

void access(rime::Table* table,
rime::TableAccessor accessor,
std::ofstream& fout) {
while (!accessor.exhausted()) {
auto word = table->GetEntryText(*accessor.entry());
fout << word << "\t";
outCode(table, accessor.code(), fout);

auto weight = accessor.entry()->weight;
if (weight >= 0) {
fout << "\t" << exp(weight);
}
fout << std::endl;
accessor.Next();
}
}

// recursively traverse table
void recursion(rime::Table* table,
rime::TableQuery* query,
std::ofstream& fout) {
for (int i = 0; i < table->metadata()->num_syllables; i++) {
auto accessor = query->Access(i);
access(table, accessor, fout);
if (query->Advance(i)) {
if (query->level() < 3) {
recursion(table, query, fout);
} else {
auto accessor = query->Access(0);
access(table, accessor, fout);
}
query->Backdate();
}
}
}

void traversal(rime::Table* table, std::ofstream& fout) {
auto metadata = table->metadata();
std::cout << "num_syllables: " << metadata->num_syllables << std::endl;
std::cout << "num_entries: " << metadata->num_entries << std::endl;

fout << std::fixed;
fout << std::setprecision(0);
rime::TableQuery query(table->metadata()->index.get());
recursion(table, &query, fout);
}

int main(int argc, char* argv[]) {
if (argc < 2 || argc > 3) {
std::cout << "Usage: rime_table_decompiler <rime-table-file> [save-path]"
<< std::endl;
std::cout << "Example: rime_table_decompiler pinyin.table.bin pinyin.dict.yaml"
<< std::endl;
return 0;
}

std::string fileName(argv[1]);
rime::Table table(fileName);
bool success = table.Load();
if (!success) {
std::cerr << "Failed to load table." << std::endl;
return 1;
}

// Remove the extension ".table.bin" if present.
const size_t table_bin_idx = fileName.rfind(".table.bin");
if (std::string::npos != table_bin_idx) {
fileName.erase(table_bin_idx);
}
const std::string outputName =
(argc == 3) ? argv[2]: fileName + ".yaml";

std::ofstream fout;
fout.open(outputName);
if (!fout.is_open()) {
std::cerr << "Failed to open file " << outputName << std::endl;
return 1;
}

// schema id
const size_t last_slash_idx = fileName.find_last_of("\\/");
if (std::string::npos != last_slash_idx) {
fileName.erase(0, last_slash_idx + 1);
}
fout << "# Rime dictionary\n\n";
fout << "---\n"
"name: " << fileName << "\n"
"version: \"1.0\"\n"
"...\n\n";
traversal(&table, fout);
std::cout << "Save to: " << outputName << std::endl;
fout.close();
return 0;
}