Skip to content

Commit

Permalink
Merge branch 'spiderable' into devel
Browse files Browse the repository at this point in the history
  • Loading branch information
n1mmy committed Aug 6, 2012
2 parents 0f18161 + f55a824 commit 2809b9b
Show file tree
Hide file tree
Showing 12 changed files with 168 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/.meteor/packages
Expand Up @@ -9,3 +9,4 @@ showdown
code-prettify
jquery-waypoints
less
spiderable
1 change: 1 addition & 0 deletions docs/client/docs.js
Expand Up @@ -194,6 +194,7 @@ var toc = [
"jquery",
"less",
"sass",
"spiderable",
"stylus",
"showdown",
"underscore"
Expand Down
1 change: 1 addition & 0 deletions docs/client/packages.html
Expand Up @@ -24,6 +24,7 @@ <h1 id="packages">Packages</h1>
{{> pkg_jquery}}
{{> pkg_less}}
{{> pkg_sass}}
{{> pkg_spiderable}}
{{> pkg_stylus}}
{{> pkg_showdown}}
{{> pkg_underscore}}
Expand Down
41 changes: 41 additions & 0 deletions docs/client/packages/spiderable.html
@@ -0,0 +1,41 @@
<template name="pkg_spiderable">
{{#better_markdown}}
## `spiderable`


The `spiderable` package is a temporary solution to allow web search
engines to index a Meteor application. It uses the <a target="_blank"
href="https://developers.google.com/webmasters/ajax-crawling/">AJAX
Crawling specification</a> published by Google to serve HTML to
compatible spiders (Google, Bing, Yandex, and more).

When a spider requests an HTML snapshot of a page the Meteor server runs
the client half of the application inside <a target="_blank"
href="http://phantomjs.org/">phantomjs</a>, a headless browser, and
returns the full HTML generated by the client code.

{{#warning}}
This is a temporary approach to allow Meteor applications to be
searchable. Expect significant changes to this package.
{{/warning}}

In order to have links between multiple pages on a site visible to
spiders, apps must use real links (eg `<a href="/about">`) rather than
simply re-rendering portions of the page when an element is
clicked. Apps should render their content based on the URL of the page
and can use HTML5 push-state to alter the URL on the client without
triggering a page reload. See the <a target="_blank"
href="http://meteor.com/examples/todos">Todos example</a> for a
demonstration.


{{#warning}}
If you deploy your application with `meteor bundle`, you must install
`phantomjs` (<a target="_blank"
href="http://phantomjs.org/">http://phantomjs.org</a>) somewhere in your
`$PATH`. If you use `meteor deploy` this is already taken care of.
{{/warning}}


{{/better_markdown}}
</template>
1 change: 1 addition & 0 deletions examples/todos/.meteor/packages
Expand Up @@ -5,3 +5,4 @@

underscore
backbone
spiderable
2 changes: 2 additions & 0 deletions examples/todos/client/todos.css
Expand Up @@ -131,6 +131,8 @@ h3 {

#lists .list-name {
cursor: pointer;
color: black;
text-decoration: none;
}

#createList {
Expand Down
4 changes: 2 additions & 2 deletions examples/todos/client/todos.html
Expand Up @@ -27,9 +27,9 @@ <h3>Todo Lists</h3>
</div>
{{else}}
<div class="display">
<div class="list-name {{name_class}}">
<a class="list-name {{name_class}}" href="/{{_id}}">
{{name}}
</div>
</a>
</div>
{{/if}}
</div>
Expand Down
4 changes: 4 additions & 0 deletions examples/todos/client/todos.js
Expand Up @@ -88,6 +88,10 @@ Template.lists.events = {
'mousedown .list': function (evt) { // select list
Router.setList(this._id);
},
'click .list': function (evt) {
// prevent clicks on <a> from refreshing the page.
evt.preventDefault();
},
'dblclick .list': function (evt) { // start editing list name
Session.set('editing_listname', this._id);
Meteor.flush(); // force DOM redraw, so we can focus the edit field
Expand Down
15 changes: 14 additions & 1 deletion packages/livedata/livedata_connection.js
Expand Up @@ -575,7 +575,9 @@ _.extend(Meteor, {
// "http://subdomain.meteor.com/sockjs" (deprecated),
// "/sockjs" (deprecated)
connect: function (url, _restartOnUpdate) {
return new Meteor._LivedataConnection(url, _restartOnUpdate);
var ret = new Meteor._LivedataConnection(url, _restartOnUpdate);
Meteor._LivedataConnection._allConnections.push(ret); // hack. see below.
return ret;
},

autosubscribe: function (sub_func) {
Expand Down Expand Up @@ -604,3 +606,14 @@ _.extend(Meteor, {
}
});


// Hack for `spiderable` package: a way to see if the page is done
// loading all the data it needs.
Meteor._LivedataConnection._allConnections = [];
Meteor._LivedataConnection._allSubscriptionsReady = function () {
return _.all(Meteor._LivedataConnection._allConnections, function (conn) {
for (var k in conn.sub_ready_callbacks)
return false;
return true;
});
};
10 changes: 10 additions & 0 deletions packages/spiderable/package.js
@@ -0,0 +1,10 @@
Package.describe({
summary: "Makes the application crawlable to web spiders."
});

Package.on_use(function (api) {
api.use(['templating'], 'client');

api.add_files('spiderable.html', 'client');
api.add_files('spiderable.js', 'server');
});
1 change: 1 addition & 0 deletions packages/spiderable/spiderable.html
@@ -0,0 +1 @@
<head><meta name="fragment" content="!"></head>
90 changes: 90 additions & 0 deletions packages/spiderable/spiderable.js
@@ -0,0 +1,90 @@
(function () {
var fs = __meteor_bootstrap__.require('fs');
var spawn = __meteor_bootstrap__.require('child_process').spawn;
var querystring = __meteor_bootstrap__.require('querystring');
var app = __meteor_bootstrap__.app;

// how long to let phantomjs run before we kill it
var REQUEST_TIMEOUT = 15*1000;

app.use(function (req, res, next) {
if (/\?.*_escaped_fragment_=/.test(req.url)) {
// get escaped fragment out of the url.
var idx = req.url.indexOf('?');
var preQuery = req.url.substr(0, idx);
var queryStr = req.url.substr(idx + 1);
var parsed = querystring.parse(queryStr);
delete parsed['_escaped_fragment_'];
var newQuery = querystring.stringify(parsed);
var newPath = preQuery + (newQuery ? "?" + newQuery : "");
var url = "http://" + req.headers.host + newPath;

// run phantomjs
//
// Use '/dev/stdin' to avoid writing to a temporary file. Can't
// just omit the file, as PhantomJS takes that to mean 'use a
// REPL' and exits as soon as stdin closes.
var cp = spawn('phantomjs', ['--load-images=no', '/dev/stdin']);

var data = '';
cp.stdout.setEncoding('utf8');
cp.stdout.on('data', function (chunk) {
data += chunk;
});

cp.on('exit', function (code) {
if (0 === code && /<html>/i.test(data)) {
res.writeHead(200, {'Content-Type': 'text/html; charset=UTF-8'});
res.end(data);
} else {
// phantomjs failed. Don't send the error, instead send the
// normal page.
if (code === 127)
Meteor._debug("spiderable: phantomjs not installed. Download and install from http://phantomjs.org/");
else
Meteor._debug("spiderable: phantomjs failed:", code, data);

next();
}
});

// don't crash w/ EPIPE if phantomjs isn't installed.
cp.stdin.on('error', function () {});

cp.stdin.write(
"var url = '" + url + "';" +
"var page = require('webpage').create();" +
"page.open(url);" +

"setInterval(function() {" +
" var ready = page.evaluate(function () {" +
" if (typeof Meteor !== 'undefined' && Meteor.status().connected) {" +
" Meteor.flush();" +
" return Meteor._LivedataConnection._allSubscriptionsReady();" +
" }" +
" return false;" +
" });" +

" if (ready) {" +
" var out = page.content;" +
" out = out.replace(/<script[^>]+>(.|\\n|\\r)*?<\\/script\\s*>/ig, '');" +
" out = out.replace('<meta name=\"fragment\" content=\"!\">', '');" +

" console.log(out);" +
" phantom.exit();" +
" }" +
"}, 100);");
cp.stdin.end();

// Just kill it if it takes too long.
setTimeout(function () {
if (cp && cp.pid) {
cp.kill();
}
}, REQUEST_TIMEOUT);

} else {
next();
}
});
})();

0 comments on commit 2809b9b

Please sign in to comment.