-
-
Notifications
You must be signed in to change notification settings - Fork 27
/
Node.h
365 lines (346 loc) · 13.2 KB
/
Node.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
/*
CTML - written by Tinfoilboy
uses the MIT License (https://github.com/tinfoilboy/CFML/blob/master/LICENSE)
*/
#pragma once
#include <vector>
#include <unordered_map>
#include <string>
#include <sstream>
#include <algorithm>
namespace CTML {
// the types of nodes used for the html
// DOCUMENT_TYPE doesn't use the element name, but uses
// the content to determine the document type to use
// ELEMENT is just a normal element
enum class NodeType { DOCUMENT_TYPE, ELEMENT };
// a few enums for readability of the HTML
// SINGLE_LINE returns the string as one line
// MULTILINE returns the string as multiple lines, which is good for file outputs or readability.
// MULTILINE_BR is essentially the same as MULTILINE, but the difference is that newlines in the content of the node are formatted to use <br> tags.
enum class Readability { SINGLE_LINE, MULTILINE, MULTILINE_BR };
// the state of the Node name parser
// NONE means that nothing is being parsed
// CLASS means that a class attribute is being parsed
// ID means that an ID is being parsed for the node
enum class NodeParser { NONE, CLASS, ID };
class Node {
// the type of node this
NodeType m_type;
// the name of this node, e.g. div, a, span, e.t.c.
std::string m_name;
// the classes for this node
std::string m_classes;
// the ids for this node
std::string m_id;
// the content of this node
std::string m_content;
// determines whether or not to add a closing tag (ie. </name>)
// if this is false, it also doesn't add content to the tag
// as there is nowhere to place content
bool m_closeTag = true;
// the child elements of this node
std::vector<Node> m_children;
// an unordered_map of attributes, name is the attribute name and the value is the attribute value
std::unordered_map<std::string, std::string> m_attributes;
public:
// default constructor, does nothing
Node() = default;
// create a node with the name specified
Node(const std::string& name) {
this->m_type = NodeType::ELEMENT;
this->SetName(name);
}
// create a node with the name specified, also containing the following content
Node(const std::string& name, const std::string& content) {
this->m_type = NodeType::ELEMENT;
this->SetName(name);
this->m_content = content;
}
// return this node as an html string
std::string ToString(Readability readability, int indentLevel) const {
// the element string that will be returned
std::string elem = "";
// the four space indent.
std::string indent = "";
std::string indentContent = "";
// if the readabilty points is either multiline types, this would be true
bool isMultiline = (readability == Readability::MULTILINE || readability == Readability::MULTILINE_BR);
// increment the indent string by four spaces based on the indentLevel
// but only if the readabilty is MULTILINE OR MULTILINE_BR
if (isMultiline) {
for (int i = 0; i < indentLevel; i++) {
indent = std::string(INDENT_SPACES * indentLevel, ' ');
}
// set the m_content indent level to the indent level plus four more spaces
indentContent = std::string(INDENT_SPACES * (indentLevel + 1), ' ');
}
if (this->m_type == NodeType::ELEMENT) {
// construct the first part of the element string, the tag beginning
elem = ((isMultiline) ? indent : "") + "<" + m_name + "";
// add the class list if it isn't empty
if (!m_classes.empty()) {
std::string classTag = "class=\"";
elem += " " + classTag + m_classes + "\"";
}
// add the id list if it isn't empty
if (!m_id.empty()) {
std::string idTag = "id=\"";
elem += " " + idTag + m_id + "\"";
}
// make an iterator for each attribute
for (const auto& attr : m_attributes) {
elem += " " + attr.first + "=\"" + attr.second + "\"";
}
// close the beginning tag
elem += ">";
// only add the content, as well as the closing tag if it is
// specified to do so
if (m_closeTag)
{
// if multiline is specified and the content/children aren't empty, add a newline
if (isMultiline && (!m_content.empty() || !m_children.empty()))
elem += "\n";
// if we have m_content to append
if (!m_content.empty()) {
// format the elements content based on the readability, as well as the indent level for content
elem += _GetFormattedContent(readability, indentContent);
}
// get every child node from the m_children list
for (std::size_t i = 0; i < m_children.size(); ++i) {
const auto& childNode = m_children[i];
// append the child node to the elem string.
// if this is not the last child node append a newline if multiline
elem += childNode.ToString(readability, indentLevel + 1) + ((i != m_children.size() - 1 && isMultiline) ? "\n" : "");
}
// if multiline is specified and the content/children aren't empty, add a newline and indent
elem += ((isMultiline && (!m_content.empty() || !m_children.empty())) ? "\n" + indent : "") + "</" + m_name + ">";
}
}
else if (this->m_type == NodeType::DOCUMENT_TYPE) {
// just construct the docm_type from the m_content given, if readability is wanted, add a newline
elem += "<!DOCTYPE " + m_content + ">" + ((isMultiline) ? "\n" : "");
}
return elem;
}
std::string GetTreeString(int indentLevel) const {
// the tree string
std::string tree;
// indent level
std::string indent(INDENT_SPACES * indentLevel, ' ');
// turn the class list into actual classes for the elements
std::string classList = m_classes;
std::replace(classList.begin(), classList.end(), ' ', '.');
// if the class list isn't empty, prepend a period
if (!classList.empty())
classList = '.' + classList;
// add the current element to the tree
tree += indent + " |_ " + this->m_name + classList + '\n';
// for each child
for (const auto& child : m_children) {
tree += child.GetTreeString(indentLevel + 1) + '\n';
}
return tree;
}
Node& SetName(const std::string& name) {
// the index of a period
const auto periodIndex = name.find('.');
// the index of a pound sign
const auto poundIndex = name.find('#');
// if there are classes in the name
if (periodIndex != std::string::npos || poundIndex != std::string::npos) {
// if the pound index comes before the period index
bool poundBefore = (poundIndex != std::string::npos && poundIndex < periodIndex);
// get the first index for parsing
// if pound comes first, or there are no periods, use the first pound index first
// else use the first period index
const auto ind = ((poundBefore || (periodIndex == std::string::npos && poundIndex != std::string::npos)) ? poundIndex : periodIndex);
// get the element name
std::string elemName = name.substr(0, ind);
// parse the current ids and classes
_ParseClassesAndIDS(name.substr(ind));
// set the element name to the built element name
this->m_name = elemName;
}
else {
this->m_name = name;
}
return *this;
}
std::string GetAttribute(const std::string& name) const {
// the class attribute is tracked with m_classes, so we return that instead of m_attributes[name]
if (name != "class" && name != "id" && m_attributes.count(name) > 0)
return m_attributes.at(name);
else if (name == "class")
return m_classes;
else if (name == "id")
return m_id;
else
return "";
}
std::string GetSelector() const {
std::string classesPeriod = _ReplaceAllOccurrences(m_classes, " ", ".");
return m_name + classesPeriod + "#" + m_id;
}
Node& SetAttribute(std::string name, std::string value) {
// setting the "class" attribute would make there be two class attributes on the element
// so therefore, if the name of this is class, we just override "m_classes"
if (name != "class" && name != "id")
m_attributes[name] = value;
else if (name == "class")
m_classes = value;
else if (name == "id")
m_id = value;
return *this;
}
Node& SetType(NodeType type) {
this->m_type = type;
return *this;
}
Node& SetContent(const std::string& text) {
this->m_content = text;
return *this;
}
Node& ToggleClass(const std::string& className) {
size_t findIndex = m_classes.find(className);
if (findIndex == std::string::npos) {
// append the class
m_classes += ((!m_classes.empty()) ? " " : "") + className;
}
else {
// remove the class
m_classes.erase(findIndex, className.size());
}
return *this;
}
Node& AppendChild(Node child) {
m_children.push_back(child);
return *this;
}
Node& UseClosingTag(bool close) {
this->m_closeTag = close;
return *this;
}
private:
std::string _GetFormattedContent(Readability readability, const std::string& indent) const {
std::string result;
std::istringstream iss(m_content);
// if we are using either variant of multiple lines, run this.
if (readability == Readability::MULTILINE || readability == Readability::MULTILINE_BR) {
// the newline string, differs between MULTILINE and MULTILINE_BR
std::string newline = ((readability == Readability::MULTILINE_BR) ? "\n" + indent + "<br>\n" : "\n");
// the current line iterated
int curLine = 0;
// iterate through each line in this node
for (std::string line; std::getline(iss, line);)
{
result += ((curLine > 0) ? newline : "") + indent + line;
curLine++;
}
}
else {
// iterate through each line in this node
for (std::string line; std::getline(iss, line);)
{
result += line;
}
}
// replaces all instances of "<" in the content with "<", to escape rogue HTML
result = _ReplaceAllOccurrences(result, "<", "<");
// replaces all instances of ">" in the content with ">" to escape rogue HTML
result = _ReplaceAllOccurrences(result, ">", ">");
// return the result of the content
return result;
}
std::string _ReplaceAllOccurrences(std::string replacer, const std::string& replacable, const std::string& replace) const {
// the start of the current replacable string
size_t start = 0;
// try and find each occurrence of replaceable until it can't be found
while ((start = replacer.find(replacable, start)) != std::string::npos) {
// replace the actual string
replacer.replace(start, replacable.length(), replace);
// add to the start so that find can be run again
start += replace.length();
}
// return the replaced string
return replacer;
}
int _CountOccurrences(std::string finder, const std::string& findable) const {
// the occurrences of the string
int occurrences = 0;
// the start of the current replacable string
size_t start = 0;
// try and find each occurrence of replaceable until it can't be found
while ((start = finder.find(findable, start)) != std::string::npos) {
// replace the actual string
occurrences++;
// add to the start so that find can be run again
start += findable.length();
}
// return the replaced string
return occurrences;
}
void _ParseClassesAndIDS(std::string classesAndIDs) {
// what is currently being parsed
// zero for nothing
// one for class
// two for id
NodeParser currentlyParsing = NodeParser::NONE;
// the string for the class or ID
std::string attrString;
// iterate through each character in the string
for (unsigned int i = 0; i < classesAndIDs.size(); i++) {
// the current character being iterated
char curChar = classesAndIDs[i];
if (currentlyParsing == NodeParser::NONE) {
// if the current character is a period, set the current parsing to class
// else if the current character is a pound sign, set the current parsing to id
if (curChar == '.') {
currentlyParsing = NodeParser::CLASS;
}
else if (curChar == '#') {
currentlyParsing = NodeParser::ID;
}
}
else {
// if the current character is a period, set the current parsing to class
// else if the current character is a pound sign, set the current parsing to id
if (curChar == '.' || curChar == '#') {
if (currentlyParsing == NodeParser::CLASS)
m_classes += attrString + " ";
else
// if we hit an id, we just reset the id
// this is because HTML only allows for a single id on each element
m_id = attrString;
attrString.clear();
currentlyParsing = ((curChar == '.') ? NodeParser::CLASS : NodeParser::ID);
}
else {
// add the current character to the class or id string
attrString += curChar;
}
}
// if we are at the last character, and are still parsing something, add it to the respective attr
if (currentlyParsing != NodeParser::NONE && i == classesAndIDs.size() - 1) {
if (currentlyParsing == NodeParser::CLASS)
m_classes += attrString;
else
// if we hit an id, we just reset the id
// this is because HTML only allows for a single id on each element
m_id = attrString;
attrString.clear();
}
}
// if there is an extra space at the end of m_classes, remove it
if (!m_classes.empty()) {
if (isspace(m_classes.at(m_classes.size() - 1)))
m_classes = m_classes.substr(0, m_classes.size() - 1);
}
}
};
inline Node& operator<<(Node& node_parent, Node& node_child)
{
node_parent.AppendChild(node_child);
return node_parent;
}
}