-
-
Notifications
You must be signed in to change notification settings - Fork 5k
/
import-enex-html-gen.js
184 lines (156 loc) · 5.62 KB
/
import-enex-html-gen.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
const stringToStream = require('string-to-stream');
// const cleanHtml = require('clean-html');
const resourceUtils = require('./resourceUtils.js');
const { cssValue } = require('./import-enex-md-gen');
const htmlUtils = require('./htmlUtils').default;
const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = new Entities().encode;
function addResourceTag(lines, resource, attributes) {
// Note: refactor to use Resource.markdownTag
if (!attributes.alt) attributes.alt = resource.title;
if (!attributes.alt) attributes.alt = resource.filename;
if (!attributes.alt) attributes.alt = '';
const src = `:/${resource.id}`;
if (resourceUtils.isImageMimeType(resource.mime)) {
lines.push(resourceUtils.imgElement({ src, attributes }));
} else if (resource.mime === 'audio/x-m4a') {
// TODO: once https://github.com/laurent22/joplin/issues/1794 is resolved,
// come back to this and make sure it works.
lines.push(resourceUtils.audioElement({
src,
alt: attributes.alt,
id: resource.id,
}));
} else {
// TODO: figure out what other mime types can be handled more gracefully
lines.push(resourceUtils.attachmentElement({
src,
attributes,
id: resource.id,
}));
}
return lines;
}
function attributeToLowerCase(node) {
if (!node.attributes) return {};
const output = {};
for (const n in node.attributes) {
if (!node.attributes.hasOwnProperty(n)) continue;
output[n.toLowerCase()] = node.attributes[n];
}
return output;
}
function enexXmlToHtml_(stream, resources) {
const remainingResources = resources.slice();
const removeRemainingResource = id => {
for (let i = 0; i < remainingResources.length; i++) {
const r = remainingResources[i];
if (r.id === id) {
remainingResources.splice(i, 1);
}
}
};
return new Promise((resolve) => {
const options = {};
const strict = false;
const saxStream = require('@joplin/fork-sax').createStream(strict, options);
const section = {
type: 'text',
lines: [],
parent: null,
};
saxStream.on('error', (e) => {
console.warn(e);
});
saxStream.on('text', (text) => {
section.lines.push(htmlentities(text));
});
saxStream.on('opentag', function(node) {
const tagName = node.name.toLowerCase();
const attributesStr = resourceUtils.attributesToStr(node.attributes);
const nodeAttributes = attributeToLowerCase(node);
if (tagName === 'en-media') {
const nodeAttributes = attributeToLowerCase(node);
const hash = nodeAttributes.hash;
let resource = null;
for (let i = 0; i < resources.length; i++) {
const r = resources[i];
if (r.id === hash) {
resource = r;
removeRemainingResource(r.id);
break;
}
}
if (!resource) {
// TODO: Extract this duplicate of code in ./import-enex-md-gen.js
let found = false;
for (let i = 0; i < remainingResources.length; i++) {
const r = remainingResources[i];
if (!r.id) {
resource = { ...r };
resource.id = hash;
remainingResources.splice(i, 1);
found = true;
break;
}
}
if (!found) {
// console.warn(`Hash with no associated resource: ${hash}`);
}
}
// If the resource does not appear among the note's resources, it
// means it's an attachment. It will be appended along with the
// other remaining resources at the bottom of the markdown text.
if (resource && !!resource.id) {
section.lines = addResourceTag(section.lines, resource, nodeAttributes);
}
} else if (tagName === 'en-todo') {
const checkedHtml = nodeAttributes.checked && nodeAttributes.checked.toLowerCase() === 'true' ? ' checked="checked" ' : ' ';
section.lines.push(`<input${checkedHtml}type="checkbox" onclick="return false;" />`);
} else if (tagName === 'li' && cssValue(this, nodeAttributes.style, '--en-checked')) {
const checkedHtml = cssValue(this, nodeAttributes.style, '--en-checked') === 'true' ? ' checked="checked" ' : ' ';
section.lines.push(`<${tagName}${attributesStr}> <input${checkedHtml}type="checkbox" onclick="return false;" />`);
} else if (htmlUtils.isSelfClosingTag(tagName)) {
section.lines.push(`<${tagName}${attributesStr}/>`);
} else {
section.lines.push(`<${tagName}${attributesStr}>`);
}
});
saxStream.on('closetag', (node) => {
const tagName = node ? node.toLowerCase() : node;
if (!htmlUtils.isSelfClosingTag(tagName)) section.lines.push(`</${tagName}>`);
});
saxStream.on('attribute', () => {});
saxStream.on('end', () => {
resolve({
content: section,
resources: remainingResources,
});
});
stream.pipe(saxStream);
});
}
async function enexXmlToHtml(xmlString, resources, options = {}) {
const stream = stringToStream(xmlString);
const result = await enexXmlToHtml_(stream, resources, options);
const preCleaning = result.content.lines.join('');
const final = await beautifyHtml(preCleaning);
return final.join('');
}
const beautifyHtml = (html) => {
// The clean-html package doesn't appear to be robust enough to deal with the crazy HTML that Evernote can generate.
// In the best case scenario it will throw an error but in some cases it will go into an infinite loop, so
// for that reason we need to disable it.
//
// Fixed https://github.com/laurent22/joplin/issues/3958
return [html];
// return new Promise((resolve) => {
// try {
// cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml));
// } catch (error) {
// console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`);
// resolve([html]);
// }
// });
};
module.exports = { enexXmlToHtml };