Permalink
Browse files

restructured megazine.sjs into article, content-extraction and news-s…

…ources modules
  • Loading branch information...
1 parent 3280394 commit 4f347bf0626f82d5d7fe25bd7de9c461186ae346 @timbertson timbertson committed Sep 22, 2011
View
@@ -0,0 +1,73 @@
+var Content = require("./content-extraction");
+var logging = require("apollo:logging");
+var s = require("apollo:common").supplant;
+
+// -------------------- Article object --------------------
+
+var Article = exports.Article = function(url, user, text, pointerURL) {
+ this.url = url;
+ this.users = [user];
+ this.pointerText = text;
+ this.pointerURL = pointerURL;
+};
+
+Article.prototype.addUser = function(user) {
+ this.users.push(user);
+};
+
+Article.prototype.userList = function() { return this.users.join(", "); };
+
+Article.prototype.loadContent = function() {
+ logging.debug("Processing article: {url}", this);
+ this.heading = {};
+
+ var contents = Content.getURLContents.rateLimited(this.url);
+
+ if (!contents) {
+ logging.debug("no contents found for article:" + this);
+ this.heading.text = this.url;
+ return;
+ }
+
+ this.contents = contents;
+ this.img = Content.extractImage(this.contents, this.url);
+
+ if(this.img && this.img.imgService) {
+ this.heading.image = this.img.src;
+ this.contextImage = null;
+ } else {
+ this.contextImage = this.img;
+ this.populateTitle();
+ }
+ this.summary = this.getSummary();
+};
+
+Article.prototype.toString = function() {
+ return s("<Article from: {url}>", this);
+};
+
+Article.prototype.populateTitle = function() {
+ // set header.text to contents.title.
+ // if the title is undefined, shift this.pointerText to replace it
+ this.heading.text = this.contents.title;
+ if(!this.heading.text) {
+ // use tweet as title, but don't show anything for tweet text
+ this.heading.text = this.pointerText;
+ this.pointerText = null;
+ }
+};
+
+Article.prototype.getSummary = function() {
+ if(!(this.contents.meta && this.contents.meta.content)) return;
+ var summary = {
+ text: this.contents.meta.content,
+ style: {}
+ };
+
+ if (summary.text.length > 300) {
+ summary.style['text-align'] = "justify";
+ }
+ return summary;
+};
+
+
@@ -0,0 +1,149 @@
+var cutil = require('apollo:cutil');
+var c = require('apollo:collection');
+var logging = require('apollo:logging');
+var yql = require('apollo:yql');
+var http = require('apollo:http');
+var s = require("apollo:common").supplant;
+
+var underscore = require("../lib/underscore.js");
+var imgServiceDomains = ["twitpic.com", "yfrog.com"];
+
+// -------------------- URL / Image helper functions --------------------
+
+
+// attach a rate limited version of `fn` to `fn.rateLimited`
+function rateLimit(fn, rate) {
+ fn.rateLimited = cutil.makeRateLimitedFunction(fn, rate);
+};
+
+var getExpandedURL = exports.getExpandedURL = (function() {
+ var cache = {};
+ return function(url) {
+ if(!cache[url]) {
+ cache[url] = expandUrl.rateLimited(url);
+ }
+ return cache[url];
+ };
+})();
+
+var expandUrl = exports.expandUrl = function(url) {
+ var data = http.jsonp("http://api.longurl.org/v2/expand", {
+ query: {
+ url: url,
+ format: 'json',
+ 'user-agent': 'oni apollo: megazine'
+ }
+ });
+ return data['long-url'];
+}
+rateLimit(expandUrl, 4);
+
+var getURLContents = exports.getURLContents = function getURLContents(url) {
+ var xpath = "//title[1]|//img[@src]|//meta[@name='description']|//script[contains(.,'hqdefault')]";
+ var query = "select * from html where url=@url and xpath=@xpath";
+
+ var result = (yql.query(query, {
+ url:url,
+ xpath:xpath
+ }));
+
+ logging.debug("querying article {url} with xpath {xpath} returns:", {
+ url: url,
+ xpath: xpath},
+ result.results);
+
+ return result.results;
+};
+
+rateLimit(getURLContents, 8);
+
+var extractImage = exports.extractImage = function extractImage(page, url) {
+ var images = page.img;
+ //page is the object returned by getURLContents.
+ if (page.script) {
+ // looking for http://i.ytimg.com/vi/lOTtpRAs5FY/hqdefault.jpg
+ var m = page.script.content.match(/(http.+?hqdefault.jpg)/);
+ if (m && m.length) images = [{src: m[0], width:300}];
+ }
+
+ if (images && images.length) {
+ return getBestImage(images, url);
+ } else {
+ return null;
+ }
+};
+
+function getBestImage(images, baseURL) {
+ c.each(images, guessImageSize);
+
+ // filter out images < 140px or with no `src`; we never want to display them
+ images = c.filter(images, function(img) {
+ if(!img.src) return false;
+ return (img.width === undefined) || (img.width > 140);
+ });
+ images.sort(imageCompare);
+
+ if(images.length == 0) return null;
+ logging.debug("images (worst to best) = ", null, images);
+
+ var best_img = images[images.length-1];
+ var fullURL = http.canonicalizeURL(best_img.src, baseURL);
+ logging.debug("Canonicalizing URL " + best_img.src + " on " + baseURL + " -> " + fullURL);
+ return new Image(fullURL, isImageService(baseURL));
+};
+
+function imageCompare(a, b) {
+ var criteria = function(img) {
+ var isJpeg = img.src.match(/\.jpe?g/);
+ var width = img.width ? img.width : 0;
+ // isJpeg trumps width, jpegs are less likely page decoration
+ return [isJpeg ? 1 : 0, width];
+ }
+
+ var ca = criteria(a);
+ var cb = criteria(b);
+ if(ca[0]!=cb[0]) return ca[0] - cb[0];
+ return ca[1] - cb[1];
+};
+
+function guessImageSize(img) {
+ if(img.size) return;
+ var match;
+
+ // guess based on style attribute (accurate but uncommon)
+ var styleRe = /(?:^|[^-])width: *(\d+)px/;
+ match = (img.style && img.style.match(styleRe));
+ if(match) {
+ logging.debug("guessed image width of " + match[1] + " based on style string: " + img.style, null, match);
+ img.width = parseInt(match[1]);
+ return;
+ };
+
+ // guess based on URL params (inaccurate)
+ var sizeRe = /(?:x|w|xsize|size|width)=(\d+)/;
+ match = img.src.match(sizeRe);
+ if(match) {
+ logging.debug("guessed image width of " + match[1] + " based on url: " + img.src);
+ img.width = parseInt(match[1]);
+ }
+};
+
+function isImageService(url) {
+ var domain = http.parseURL(url).authority;
+ domain = domain.replace(/^wwww\./, ''); // strip leading www
+ return imgServiceDomains.indexOf(domain) !== -1;
+};
+
+
+var Image = exports.Image = function Image(src, imgService) {
+ this.src = src;
+ this.imgService = imgService;
+ this.style = {
+ 'background-image': s('url({src})', this),
+ };
+ if(imgService) {
+ this.style.height = 200;
+ }
+};
+Image.prototype.toString = function() { return s("<Image: {src}>", this); };
+
View
@@ -0,0 +1,46 @@
+require("apollo:jquery-binding").install();
+var logging = require("apollo:logging");
+
+var NewsSources = require('./news-sources');
+
+// replace with your own application id:
+NewsSources.Twitter.prototype.appId = "hkEsBjNpWsOVKQ2gKyr1kQ";
+
+if(logging.isEnabled(logging.VERBOSE)) {
+ // in debug mode, pop up an apollo console
+ require("apollo:debug").console({receivelog:false});
+}
+
+// The main app controller, initialized by angular.js
+var App = exports.App = function App(route) {
+ route.when('/twitter', {controller: NewsSources.Twitter, template: "templates/twitter.html"});
+ route.when('/hackernews', {controller: NewsSources.HackerNews, template: "templates/hackernews.html"});
+ spawn(this.run(route));
+};
+App.$inject=['$route'];
+
+App.prototype.run = function(route) {
+ // every time the route changes, load the appropriate
+ // news type (and abort the old news loader if there is one):
+ var currentStrata;
+ while (true) {
+ waitfor() { route.onChange(resume); }
+ if(currentStrata) {
+ currentStrata.abort();
+ currentStrata = null;
+ }
+ hold(0); // scope seems to be initialized right *after* this code, so we need a delay
+
+ if(!(route.current && route.current.scope)) {
+ logging.debug("route changed with no current scope: ", null, route.current);
+ continue;
+ }
+
+ // init the scope, and run it in the background:
+ this.news = route.current.scope;
+ this.news._init();
+ currentStrata = spawn(this.news.run());
+ };
+};
+
+
Oops, something went wrong. Retry.

0 comments on commit 4f347bf

Please sign in to comment.