Electron: Fixes #318, Fixes #317: ENEX: Improved handling and renderi…

…ng of plain text links. Improved detection and import of resources. Improved import of tables.
laurent22 · Mar 23, 2018 · a677b2e · a677b2e
1 parent c63bb19
commit a677b2e
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 70 deletions.
diff --git a/ElectronClient/app/gui/MainScreen.jsx b/ElectronClient/app/gui/MainScreen.jsx
@@ -278,7 +278,7 @@ class MainScreenComponent extends React.Component {
 			position: 'absolute',
 			top: 0,
 			left: 0,
-			backgroundColor: theme.backgroundColorTransparent,
+			backgroundColor: theme.backgroundColor,
 			width: width - 20,
 			height: height - 20,
 			padding: 10,

diff --git a/ReactNativeClient/lib/MdToHtml.js b/ReactNativeClient/lib/MdToHtml.js
@@ -4,6 +4,7 @@ const htmlentities = (new Entities()).encode;
 const Resource = require('lib/models/Resource.js');
 const ModelCache = require('lib/ModelCache');
 const { shim } = require('lib/shim.js');
+const { _ } = require('lib/locale');
 const md5 = require('md5');
 const MdToHtml_Katex = require('lib/MdToHtml_Katex');
 
@@ -54,11 +55,11 @@ class MdToHtml {
 		return output.join(' ');
 	}
 
-	getAttr_(attrs, name) {
+	getAttr_(attrs, name, defaultValue = null) {
 		for (let i = 0; i < attrs.length; i++) {
 			if (attrs[i][0] === name) return attrs[i].length > 1 ? attrs[i][1] : null;
 		}
-		return null;
+		return defaultValue;
 	}
 
 	setAttr_(attrs, name, value) {
@@ -182,11 +183,23 @@ class MdToHtml {
 		return null;
 	}
 
+	urldecode_(str) {
+		try {
+			return decodeURIComponent((str+'').replace(/\+/g, '%20'));
+		} catch (error) {
+			// decodeURIComponent can throw if the string contains non-encoded data (for example "100%")
+			// so in this case just return the non encoded string. 
+			return str;
+		}
+	}
+
+
 	renderTokens_(markdownIt, tokens, options) {
 		let output = [];
 		let previousToken = null;
 		let anchorAttrs = [];
 		let extraCssBlocks = {};
+		let anchorHrefs = [];
 
 		for (let i = 0; i < tokens.length; i++) {
 			let t = tokens[i];
@@ -202,6 +215,7 @@ class MdToHtml {
 			const codeBlockLanguage = t && t.info ? t.info : null;
 			let rendererPlugin = null;
 			let rendererPluginOptions = { tagType: 'inline' };
+			let linkHref = null;
 
 			if (isCodeBlock) rendererPlugin = this.rendererPlugin_(codeBlockLanguage);
 
@@ -233,6 +247,7 @@ class MdToHtml {
 			if (openTag) {
 				if (openTag === 'a') {
 					anchorAttrs.push(attrs);
+					anchorHrefs.push(this.getAttr_(attrs, 'href'));
 					output.push(this.renderOpenLink_(attrs, options));
 				} else {
 					const attrsHtml = this.renderAttrs_(attrs);
@@ -317,7 +332,28 @@ class MdToHtml {
 
 			if (closeTag) {
 				if (closeTag === 'a') {
-					output.push(this.renderCloseLink_(anchorAttrs.pop(), options));
+					const currentAnchorAttrs = anchorAttrs.pop();
+					const previousContent = output.length ? output[output.length - 1].trim() : '';
+					const anchorHref = this.getAttr_(currentAnchorAttrs, 'href', '').trim();
+
+					// Optimisation: If the content of the anchor is the same as the URL, we replace the content
+					// by (Link). This is to shorten the text, which is important especially when the note comes
+					// from imported HTML, which can contain many such links and make the text unreadble. An example
+					// would be a movie review that has multiple links to allow a user to rate the film from 1 to 5 stars.
+					// In the original page, it might be rendered as stars, via CSS, but in the imported note it would look like this:
+					// http://example.com/rate/1 http://example.com/rate/2 http://example.com/rate/3
+					// http://example.com/rate/4 http://example.com/rate/5
+					// which would take a lot of screen space even though it doesn't matter since the user is unlikely
+					// to rate the film from the note. This is actually a nice example, still readable, but there is way
+					// worse that this in notes that come from web-clipped content.
+					// With this change, the links will still be preserved but displayed like
+					// (link) (link) (link) (link) (link)
+					if (this.urldecode_(previousContent) === htmlentities(this.urldecode_(anchorHref))) {
+						output.pop();
+						output.push(_('(Link)'));
+					}
+
+					output.push(this.renderCloseLink_(currentAnchorAttrs, options));
 				} else {
 					output.push('</' + closeTag + '>');
 				}

diff --git a/ReactNativeClient/lib/import-enex-md-gen.js b/ReactNativeClient/lib/import-enex-md-gen.js
@@ -476,12 +476,16 @@ function enexXmlToMdArray(stream, resources) {
 					//		</note>
 					//	</en-export>
 
+					// Note that there's also the case of resources with no ID where the ID is actually the MD5 of the content.
+					// This is handled in import-enex.js
+
 					let found = false;
 					for (let i = 0; i < remainingResources.length; i++) {
 						let r = remainingResources[i];
 						if (!r.id) {
-							r.id = hash;
-							remainingResources[i] = r;
+							resource = Object.assign({}, r);
+							resource.id = hash;
+							remainingResources.splice(i, 1);
 							found = true;
 							break;
 						}
@@ -490,13 +494,13 @@ function enexXmlToMdArray(stream, resources) {
 					if (!found) {
 						console.warn('Hash with no associated resource: ' + hash);
 					}
-				} else {
-					// If the resource does not appear among the note's resources, it
-					// means it's an attachement. It will be appended along with the
-					// other remaining resources at the bottom of the markdown text.
-					if (!!resource.id) {
-						section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
-					}
+				}
+
+				// If the resource does not appear among the note's resources, it
+				// means it's an attachement. It will be appended along with the
+				// other remaining resources at the bottom of the markdown text.
+				if (resource && !!resource.id) {
+					section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
 				}
 			} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
 				// Inline tags that can be ignored in Markdown
@@ -545,22 +549,18 @@ function enexXmlToMdArray(stream, resources) {
 
 				if (section.lines.length < 1) throw new Error('Invalid anchor tag closing'); // Sanity check, but normally not possible
 
-				const pushEmptyAnchor = (url) => {
-					section.lines.push('[link](' + url + ')');
-				}
-
 				// When closing the anchor tag, check if there's is any text content. If not
 				// put the URL as is (don't wrap it in [](url)). The markdown parser, using
 				// GitHub flavour, will turn this URL into a link. This is to generate slightly
 				// cleaner markdown.
 				let previous = section.lines[section.lines.length - 1];
 				if (previous == '[') {
 					section.lines.pop();
-					pushEmptyAnchor(url);
+					section.lines.push(url);
 				} else if (!previous || previous == url) {
 					section.lines.pop();
 					section.lines.pop();
-					pushEmptyAnchor(url);
+					section.lines.push(url);
 				} else {
 					// Need to remove any new line character between the current ']' and the previous '['
 					// otherwise it won't render properly.
@@ -583,8 +583,7 @@ function enexXmlToMdArray(stream, resources) {
 							const c = section.lines.pop();
 							if (c === '[') break;
 						}						
-						//section.lines.push(url);
-						pushEmptyAnchor(url);
+						section.lines.push(url);
 					} else {
 						section.lines.push('](' + url + ')');
 					}
@@ -644,7 +643,6 @@ function drawTable(table) {
 	// https://gist.github.com/IanWang/28965e13cdafdef4e11dc91f578d160d#tables
 
 	const flatRender = tableHasSubTables(table); // Render the table has regular text
-	const minColWidth = 3;
 	let lines = [];
 	lines.push(BLOCK_OPEN);
 	let headerDone = false;
@@ -687,9 +685,16 @@ function drawTable(table) {
 
 				// A cell in a Markdown table cannot have actual new lines so replace
 				// them with <br>, which are supported by the markdown renderers.
-				const cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
+				let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
+
+				// Inside tables cells, "|" needs to be escaped
+				cellText = cellText.replace(/\|/g, "\\|");
 
-				const width = Math.max(cellText.length, 3);
+				// Previously the width of the cell was as big as the content since it looks nicer, however that often doesn't work
+				// since the content can be very long, resulting in unreadable markdown. So no solution is perfect but making it a
+				// width of 3 is a bit better. Note that 3 is the minimum width of a cell - below this, it won't be rendered by
+				// markdown parsers.
+				const width = 3;
 				line.push(stringPadding(cellText, width, ' ', stringPadding.RIGHT));
 
 				if (!headerDone) {

diff --git a/ReactNativeClient/lib/import-enex.js b/ReactNativeClient/lib/import-enex.js
@@ -11,6 +11,7 @@ const { enexXmlToMd } = require('./import-enex-md-gen.js');
 const { time } = require('lib/time-utils.js');
 const Levenshtein = require('levenshtein');
 const jsSHA = require("jssha");
+const md5 = require('md5');
 
 //const Promise = require('promise');
 const fs = require('fs-extra');
@@ -30,8 +31,8 @@ function extractRecognitionObjId(recognitionXml) {
 	return r && r.length >= 2 ? r[1] : null;
 }
 
-function filePutContents(filePath, content) {
-	return fs.writeFile(filePath, content);
+async function filePutContents(filePath, content) {
+	await fs.writeFile(filePath, content);
 }
 
 function removeUndefinedProperties(note) {
@@ -255,49 +256,6 @@ function importEnex(parentFolderId, filePath, importOptions = null) {
 			stream.resume();
 			processingNotes = false;
 			return true;
-
-			// let chain = [];
-			// while (notes.length) {
-			// 	let note = notes.shift();
-			// 	const contentStream = stringToStream(note.bodyXml);
-			// 	chain.push(() => {
-			// 		return enexXmlToMd(contentStream, note.resources).then((body) => {
-			// 			delete note.bodyXml;
-
-			// 			// console.info('-----------------------------------------------------------');
-			// 			// console.info(body);
-			// 			// console.info('-----------------------------------------------------------');
-
-			// 			note.id = uuid.create();
-			// 			note.parent_id = parentFolderId;
-			// 			note.body = body;
-
-			// 			// Notes in enex files always have a created timestamp but not always an
-			// 			// updated timestamp (it the note has never been modified). For sync
-			// 			// we require an updated_time property, so set it to create_time in that case
-			// 			if (!note.updated_time) note.updated_time = note.created_time;
-
-			// 			return saveNoteToStorage(note, importOptions.fuzzyMatching);
-			// 		}).then((result) => {
-			// 			if (result.noteUpdated) {
-			// 				progressState.updated++;
-			// 			} else if (result.noteCreated) {
-			// 				progressState.created++;
-			// 			} else if (result.noteSkipped) {
-			// 				progressState.skipped++;
-			// 			}
-			// 			progressState.resourcesCreated += result.resourcesCreated;
-			// 			progressState.notesTagged += result.notesTagged;
-			// 			importOptions.onProgress(progressState);
-			// 		});
-			// 	});
-			// }
-
-			// return promiseChain(chain).then(() => {
-			// 	stream.resume();
-			// 	processingNotes = false;
-			// 	return true;
-			// });
 		}
 
 		saxStream.on('error', (error) => {
@@ -418,6 +376,7 @@ function importEnex(parentFolderId, filePath, importOptions = null) {
 				noteAttributes = null;
 			} else if (n == 'resource') {
 				let decodedData = null;
+				let resourceId = noteResource.id;
 				if (noteResource.dataEncoding == 'base64') {
 					try {
 						decodedData = Buffer.from(noteResource.data, 'base64');
@@ -429,8 +388,14 @@ function importEnex(parentFolderId, filePath, importOptions = null) {
 					decodedData = noteResource.data; // Just put the encoded data directly in the file so it can, potentially, be manually decoded later
 				}
 
+				if (!resourceId && decodedData) {
+					// If no resource ID is present, the resource ID is actually the MD5 of the data.
+					// This ID will match the "hash" attribute of the corresponding <en-media> tag.
+					resourceId = md5(decodedData);
+				}
+
 				let r = {
-					id: noteResource.id,
+					id: resourceId,
 					data: decodedData,
 					mime: noteResource.mime,
 					title: noteResource.filename ? noteResource.filename : '',