forked from rime/librime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rime_table_decompiler.cc
124 lines (112 loc) · 3.26 KB
/
rime_table_decompiler.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// rime_table_decompiler.cc
// nopdan <me@nopdan.com>
//
#include <cmath>
#include <fstream>
#include <iomanip>
#include <ios>
#include <iostream>
#include <string>
#include <rime/dict/table.h>
// usage:
// rime_table_decompiler <rime-table-file> [save-path]
// example:
// rime_table_decompiler pinyin.table.bin pinyin.dict.yaml
void outCode(rime::Table* table, const rime::Code code, std::ofstream& fout) {
if (code.empty()) {
return;
}
auto item = code.begin();
fout << table->GetSyllableById(*item);
item++;
for (; item != code.end(); ++item) {
fout << " ";
fout << table->GetSyllableById(*item);
}
return;
}
void access(rime::Table* table,
rime::TableAccessor accessor,
std::ofstream& fout) {
while (!accessor.exhausted()) {
auto word = table->GetEntryText(*accessor.entry());
fout << word << "\t";
outCode(table, accessor.code(), fout);
auto weight = accessor.entry()->weight;
if (weight >= 0) {
fout << "\t" << exp(weight);
}
fout << std::endl;
accessor.Next();
}
}
// recursively traverse table
void recursion(rime::Table* table,
rime::TableQuery* query,
std::ofstream& fout) {
for (int i = 0; i < table->metadata()->num_syllables; i++) {
auto accessor = query->Access(i);
access(table, accessor, fout);
if (query->Advance(i)) {
if (query->level() < 3) {
recursion(table, query, fout);
} else {
auto accessor = query->Access(0);
access(table, accessor, fout);
}
query->Backdate();
}
}
}
void traversal(rime::Table* table, std::ofstream& fout) {
auto metadata = table->metadata();
std::cout << "num_syllables: " << metadata->num_syllables << std::endl;
std::cout << "num_entries: " << metadata->num_entries << std::endl;
fout << std::fixed;
fout << std::setprecision(0);
rime::TableQuery query(table->metadata()->index.get());
recursion(table, &query, fout);
}
int main(int argc, char* argv[]) {
if (argc < 2 || argc > 3) {
std::cout << "Usage: rime_table_decompiler <rime-table-file> [save-path]"
<< std::endl;
std::cout << "Example: rime_table_decompiler pinyin.table.bin pinyin.dict.yaml"
<< std::endl;
return 0;
}
std::string fileName(argv[1]);
rime::Table table(fileName);
bool success = table.Load();
if (!success) {
std::cerr << "Failed to load table." << std::endl;
return 1;
}
// Remove the extension ".table.bin" if present.
const size_t table_bin_idx = fileName.rfind(".table.bin");
if (std::string::npos != table_bin_idx) {
fileName.erase(table_bin_idx);
}
const std::string outputName =
(argc == 3) ? argv[2]: fileName + ".yaml";
std::ofstream fout;
fout.open(outputName);
if (!fout.is_open()) {
std::cerr << "Failed to open file " << outputName << std::endl;
return 1;
}
// schema id
const size_t last_slash_idx = fileName.find_last_of("\\/");
if (std::string::npos != last_slash_idx) {
fileName.erase(0, last_slash_idx + 1);
}
fout << "# Rime dictionary\n\n";
fout << "---\n"
"name: " << fileName << "\n"
"version: \"1.0\"\n"
"...\n\n";
traversal(&table, fout);
std::cout << "Save to: " << outputName << std::endl;
fout.close();
return 0;
}