Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plaintext linkification #39

Merged
merged 2 commits into from
Jan 3, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions data/test.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<!doctype html>
<!-- open as resource://ipfs-firefox-addon-at-lidel-dot-org/data/test.html -->
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool! It probably should be used in a dedicated test suite (test/test-linkify.js).

So far I did manual test and got weird results:

  1. Run via jpm -b vendor/firefox-45.0a2/firefox run
  2. I enabled at addon's about:addons screen:
    2016-01-01-182616_256x49_scrot
  3. Visited resource://ipfs-firefox-addon-at-lidel-dot-org/data/test.html
  4. Text links were not converted into <a> tags. The only change I was able to spot was the href of the "abs link", which changed from
    fs:/ to https://ipfs.io/

I did not dig into the underlying code: perhaps it is still work in progress?
Or did I found a bug? :-)

<html>
<body>
<p id='plain-links'>
ipfs:/QmTAsnXoWmLZQEpvyZscrReFzqxP3pvULfGVgpJuayrp1w<br>
ipfs:/ipfs/QmTAsnXoWmLZQEpvyZscrReFzqxP3pvULfGVgpJuayrp1w<br>
fs:/ipfs/QmTAsnXoWmLZQEpvyZscrReFzqxP3pvULfGVgpJuayrp1w<br>
/ipfs/QmTAsnXoWmLZQEpvyZscrReFzqxP3pvULfGVgpJuayrp1w<br>
</p>
<a href="fs:/ipfs/QmTAsnXoWmLZQEpvyZscrReFzqxP3pvULfGVgpJuayrp1w">abs link</a>
<a href="/ipfs/QmTAsnXoWmLZQEpvyZscrReFzqxP3pvULfGVgpJuayrp1w" id="relative-ipfs-path">relative link</a>
<a href="http://gateway.ipfs.io/ipfs/QmTAsnXoWmLZQEpvyZscrReFzqxP3pvULfGVgpJuayrp1w">gateway link</a>
</body>
</html>
1 change: 1 addition & 0 deletions lib/child-main.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
const protocols = require('./protocols.js');

protocols.register();
require("./rewrite-pages.js");

require('sdk/system/unload').when(() => {
protocols.unregister();
Expand Down
6 changes: 6 additions & 0 deletions lib/gateways.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,9 @@ exports.publicUri = function() {
exports.publicHosts = function() {
return PUBLIC_GATEWAY_HOSTS;
};

Object.defineProperty(exports, "linkify", {
get: function() {
return prefs.linkify;
}
});
29 changes: 29 additions & 0 deletions lib/protocols.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ const FS_SCHEME = 'fs';
const WEB_SCHEME_PREFIX = 'web+'; // https://github.com/lidel/ipfs-firefox-addon/issues/36


const IPFS_PATH = new RegExp("^/ip(f|n)s/");

function CommonProtocolHandler() {}

function IpfsProtocolHandler() {}
Expand Down Expand Up @@ -176,3 +178,30 @@ exports.unregister = function() {
(exports[scheme]).unregister();
}
};

// used to canonize <a href="..."> and plain text links in pages
exports.rewrite = function(url) {
// canonize ipfs schemes
for (let k of Object.keys(HANDLERS)) {
let c = HANDLERS[k];
let p = c.prototype;
if (url.startsWith(p.scheme)) {
return ioservice.newURI(url, null, null).spec;
}

}

// relative path
if (IPFS_PATH.test(url)) {
return ioservice.newURI(FS_SCHEME + ":" + url, null, null).spec;
}

// TODO: reprocess gateways?

// normal URL, don't touch
return null;
};

exports.ipfs = factory(IpfsProtocolHandler);
exports.ipns = factory(IpnsProtocolHandler);
exports.fs = factory(FsProtocolHandler);
214 changes: 214 additions & 0 deletions lib/rewrite-pages.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
'use strict';

var events = require("sdk/system/events");
var protocols = require("./protocols.js");
const gw = require("./gateways.js");

var {
Ci
} = require('chrome');

// TODO: svg text elements?
const ELEMENT_WHITELIST = `p span div body aside section blockquote li dt dd article header footer figure td th
ins del i em u s b dfn abbr small strong`.split(/\s+/);

/*
current approach: iterate over adjacent text nodes and prefix-match plain IPFS URIs, then extend match forward until whitespace is found

potential alternative to investigate: using DOM Ranges
*/
function processTextNode(node) {
let parent = node.parentNode;
if (!parent || parent.namespaceURI != "http://www.w3.org/1999/xhtml" || parent.isContentEditable || ELEMENT_WHITELIST.indexOf(parent.localName) === -1)
return;
// don't create links inside links
let ancestor = parent.closest("a");
if (ancestor && ancestor.namespaceURI == "http://www.w3.org/1999/xhtml")
return;

let re = /(?:\/ip(f|n)s\/|fs:|ipns:|ipfs:)\S+/g;

let text = node.data;

// only match once, we slice up the text node which will trigger further mutations anyway
let match;
let toReplace = node;
let offset;

for (;;) {
match = re.exec(text);
if (!match)
return;
offset = match.index;
// JS regex doesn't support lookbehind, do it manually
if (offset > 0 && !/\s/.test(text[offset - 1]))
continue;
break;
}

let doc = node.ownerDocument;
let frag = doc.createDocumentFragment();


let url = match[0];
let lastMatch = re.lastIndex;

// preceding non-link text
if (offset > 0) {
frag.appendChild(doc.createTextNode(text.substring(0, offset)));
}

let a = doc.createElementNS("http://www.w3.org/1999/xhtml", "a");
frag.appendChild(a);
a.appendChild(doc.createTextNode(url));


let brs = [];
let toRemove = [];

// slip-forward to deal with text split over multiple text nodes
// TODO: in theory we also should do a look-behind if we happen to get a partial mutation from the parser
while (lastMatch == text.length) {
node = node.nextSibling;
if (!node)
break;

if (node.nodeType == 1 && (node.localName == "br" || node.localName == "wbr")) {
brs.push(node);
if (brs.length > 1) {
// two consecutive newlines -> new paragraph and not just a forced linebreak
break;
}

continue;
}

if (node.nodeType == 3) {
re = /^\S+/;
match = re.exec(text);
if (!match)
break;
text = node.data;
for (let br of brs) {
a.appendChild(br);
}
brs = [];
toRemove.push(node);


let urlPart = match[0];
lastMatch = urlPart.length;

url += urlPart;
a.appendChild(doc.createTextNode(urlPart));
continue;
}

break;
}

toRemove.forEach(e => e.remove());

a.href = protocols.rewrite(url);

// trailing non-link text
frag.appendChild(doc.createTextNode(text.substring(lastMatch)));
parent.replaceChild(frag, toReplace);

return a;
}

function processElement(element) {
if (element.namespaceURI != "http://www.w3.org/1999/xhtml")
return;
if (element.localName == "a" && element.hasAttribute("href")) {
// .href resolves relative to base domain, .getAttribute may return relative urls e.g. /ipfs/...
let orig = element.getAttribute("href");
let rewritten = protocols.rewrite(orig);
if (rewritten && orig != rewritten) {
element.href = rewritten;
}

}
}

function mutationCallback(records) {
for (let r of records) {
if (r.type == "childList") {
for (let added of r.addedNodes) {

if (added.nodeType == 3) { // Node.TEXT_NODE
//console.log("added T", added.localname)
processTextNode(added);
}

if (added.nodeType == 1) { // Node.ELEMENT
//console.log("added E", added)
processElement(added);
}
}
}
if (r.type == "characterData") {
//console.log("changed T", r.target)
processTextNode(r.target);
}
}
}

const SCHEME_WHITELIST = "resource http https ftp ipfs fs ipns".split(" ");


const documentObserver = function(event) {
let { subject, type } = event;

if (type != 'content-document-global-created')
return;
if (!gw.linkify)
return;
if (!(subject instanceof Ci.nsIDOMWindow))
return;

let window = subject;
let document = window.document;

let uri = document.documentURIObject;

//don't touch chrome/about documents
// alternative approach: check for non-system principal
if (!uri || SCHEME_WHITELIST.indexOf(uri.scheme) === -1)
return;


/*
// approach A
// tree walk on load -> might lead to a long processing pause on large docs

let docLoaded = function() {
let obs = new window.MutationObserver(mutationCallback)
obs.observe(document, {characterData: true, subtree: true, childList: true});
let walker = document.createTreeWalker(document.documentElement, window.NodeFilter.SHOW_TEXT);
let next;
while(walker.nextNode()) {
let replaced = processTextNode(walker.currentNode);
if(replaced) {
walker.currentNode = replaced;
}
}
document.removeEventListener("DOMContentLoaded", docLoaded);
}

document.addEventListener("DOMContentLoaded", docLoaded);
*/

// approach B
// incremental, process text as it arrives from the parser
let obs = new window.MutationObserver(mutationCallback);
obs.observe(document, {
characterData: true,
subtree: true,
childList: true
});

};

events.on('content-document-global-created', documentObserver);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be a better idea to on/off entire listener based on checkbox state.
Perhaps something along these lines:

gw.onChange(() => {
  if (gw.linkify) {
    events.on('content-document-global-created', documentObserver);
  } else {
    events.off('content-document-global-created', documentObserver);
  }
});
gw.reload();

6 changes: 6 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
"title": "Custom IPFS API Port",
"type": "integer",
"value": 5001
},
{
"name": "linkify",
"title": "make plaintext IPFS links clickable",
"type": "bool",
"value": false
}
],
"permissions": {
Expand Down
35 changes: 35 additions & 0 deletions test/test-linkify.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
'use strict';

const tabs = require('sdk/tabs');
const parent = require('sdk/remote/parent');
const self = require('sdk/self');
const testpage = self.data.url('test.html');

require("../lib/rewrite-pages.js");
parent.remoteRequire("resource://ipfs-firefox-addon-at-lidel-dot-org/lib/rewrite-pages.js");

exports["test link processing, plain text conversion"] = function(assert, done) {
require('sdk/simple-prefs').prefs.linkify = true;

tabs.open({
url: testpage,
onReady(tab) {
let worker = tab.attach({
contentScript: `
self.port.emit("test result", {
numLinks: document.querySelectorAll('#plain-links > a').length,
relativeScheme: document.querySelector('#relative-ipfs-path').protocol
})
`
});
worker.port.on("test result", (msg) => {
assert.equal(msg.numLinks|0, 4, 'number of linkified plaintext chunks');
assert.equal(msg.relativeScheme, "fs:", 'relative ipfs reference rewritten to fs: scheme');

tab.close(done);
});
}
});
};

require('sdk/test').run(exports);