-
Notifications
You must be signed in to change notification settings - Fork 1
/
XML.h
149 lines (121 loc) · 4.89 KB
/
XML.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#pragma once
#include <fstream>
#include <iostream>
#include <vector>
#include "Timer.h"
#include "OrderedTree.h"
#include "TopTree.h"
#include "Labels.h"
#include "3rdparty/pugixml.hpp"
using std::cout;
using std::endl;
using std::flush;
using std::string;
using std::vector;
/// Read an XML file into a tree, using RapidXml
template <typename TreeType>
struct XmlParser {
// TODO figure out if we can keep the char pointers instead of converting them
// to string, this currently uses more than half of the parsing time
static bool parse(const string &filename, TreeType &tree, Labels<string> &labels, const bool verbose = true) {
if (verbose) cout << "Reading and parsing " << filename << "… " << flush;
Timer timer;
pugi::xml_document doc;
pugi::xml_parse_result result = doc.load_file(filename.c_str(), pugi::parse_minimal);
if (!result) { // parse failed
return false;
}
if (verbose) cout << timer.getAndReset() << "ms; building tree… " << flush;
const int rootId = tree.addNode();
assert((int)labels.size() == rootId);
pugi::xml_node root(doc.root().first_child());
labels.set(rootId, root.name());
parseStructure(tree, labels, root, rootId);
if (verbose) cout << timer.get() << "ms." << endl;
return true;
}
protected:
static void parseStructure(TreeType &tree, Labels<string> &labels, pugi::xml_node node, const int id) {
const size_t numChildren = std::distance(node.children().begin(), node.children().end());
int childId = tree.addNodes(numChildren);
for (size_t i = 0; i < numChildren; ++i) {
tree.addEdge(id, childId + i);
}
// Recurse into children
for (pugi::xml_node child : node.children()) {
labels.set(childId, child.name());
parseStructure(tree, labels, child, childId);
++childId;
}
}
};
/// XML tree writer (empty template for overloading)
template <typename TreeType>
struct XmlWriter {};
/// Top tree XML writer
template <typename DataType>
struct XmlWriter<TopTree<DataType>> {
/// Write a TopTree instance to an XML file.
/// Nodes without labels will have their merge types used as labels
/// \param tree the TopTree to write
/// \param filename output filename (path must exist)
static void write(const TopTree<DataType> &tree, const string &filename) {
std::ofstream out(filename.c_str());
assert(out.is_open());
auto writeNode = [&] (const int nodeId, const int depth, const auto &writeNode) {
const Cluster<DataType> &node = tree.clusters[nodeId];
for (int i = 0; i < depth; ++i) out << " ";
out << "<";
if (node.label == NULL) out << node.mergeType; else out << *node.label;
out << ">";
if (node.left >= 0 || node.right >= 0) {
out << endl;
if (node.left >= 0) {
writeNode(node.left, depth + 1, writeNode);
}
if (node.right >= 0) {
writeNode(node.right, depth + 1, writeNode);
}
for (int i = 0; i < depth; ++i) out << " ";
}
out << "</";
if (node.label == NULL) out << node.mergeType; else out << *node.label;
out << ">";
};
int rootId = tree.clusters.size() - 1;
writeNode(rootId, 0, writeNode);
out.close();
}
};
/// OrderedTree XML tree writer
template <typename NodeType, typename EdgeType>
struct XmlWriter<OrderedTree<NodeType, EdgeType>> {
/// write an OrderedTree to an XML file, using its labels
/// \param tree the OrderedTree instance to write
/// \param labels the nodes' labels
/// \param filename filename to use. Directory must exist.
template <typename DataType>
static void write(const OrderedTree<NodeType, EdgeType> &tree, const LabelsT<DataType> &labels, const string &filename, const bool indent=true) {
std::ofstream out(filename.c_str());
assert(out.is_open());
auto writeNode = [&] (const int nodeId, const int depth, const auto &writeNode) {
if (indent) for (int i = 0; i < depth; ++i) out << " ";
out << "<" << labels[nodeId] << ">";
if (tree.nodes[nodeId].isLeaf()) {
out << "</" << labels[nodeId] << ">";
if (indent) out << endl;
return;
}
if (indent) out << endl;
FORALL_OUTGOING_EDGES(tree, nodeId, edge) {
if (edge->valid)
writeNode(edge->headNode, depth + 1, writeNode);
}
if (indent) for (int i = 0; i < depth; ++i) out << " ";
out << "</" << labels[nodeId] << ">";
if (indent) out << endl;
};
writeNode(0, 0, writeNode);
out.close();
}
};