Permalink
Browse files

First commit

  • Loading branch information...
0 parents commit ce350aaa43e365a1ff1aca68e96195845885dbcc @chriso chriso committed Nov 16, 2010
20 LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2010 Chris O'Hara <cohara87@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
+*This is a _major_ work in progess.. check back soon!*
@@ -0,0 +1,3 @@
+#!/usr/bin/env node
+
+require('node.io').cli(process.argv.slice(2));
@@ -0,0 +1,45 @@
+// This module can find/remove duplicates in a list
+//
+// 1. To remove duplicates from a list and output unique lines:
+// $ cat list.txt | node.io duplicates
+//
+// 2. To output lines that appear more than once:
+// $ cat list.txt | node.io duplicates find
+//
+// To output the results to a file, use either:
+// $ cat list.txt | node.io -s duplicates > unique.txt
+// $ node.io -i list.txt -o unique.txt duplicates
+
+var Job = require('../lib/node.io/job').Job;
+
+var seen_lines = [], emitted_lines = [];
+
+function reduce(lines) {
+ var args = this.options.args, emit = [];
+
+ lines.forEach(function(line) {
+ if (args === 'find') {
+
+ //Output duplicate lines
+ if (seen_lines.indexOf(line) >= 0 && !~emitted_lines.indexOf(line)) {
+ emit.push(line);
+ emitted_lines.push(line); //Only output once
+ } else {
+ seen_lines.push(line);
+ }
+
+ } else {
+
+ //Remove duplicate lines (default)
+ if (!~seen_lines.indexOf(line)) {
+ emit.push(line);
+ seen_lines.push(line);
+ }
+
+ }
+ });
+
+ this.emit(emit);
+}
+
+exports.job = new Job({max:20},{reduce:reduce});
@@ -0,0 +1,99 @@
+// This module checks a domain's Google pagerank (rate limits obviously apply)
+//
+// 1. To find the rank of a domain for a given keyword:
+// $ echo "mastercard.com" | node.io -s google_pagerank
+// => mastercard.com,7
+
+var Job = require('../').Job;
+
+exports.job = new Job({timeout:10, retries:3}, {
+
+ run: function google(input) {
+ var self = this;
+
+ var url = input;
+ if (!~url.indexOf('http://')) url = 'http://'+url;
+
+ var ch = '6'+GoogleCH(strord('info:'+url));
+
+ this.get('http://www.google.com/search?client=navclient-auto&ch='+ch+'&features=Rank&q=info:'+encodeURIComponent(url), function(err, data) {
+ if (err) self.retry();
+
+ if (!~data.indexOf('Rank_1:1:')) {
+ self.emit(input+',');
+ } else {
+ self.emit(input+','+data.substr(9));
+ }
+ });
+ },
+
+ fail: function(input) {
+ this.emit(input+',');
+ }
+
+});
+
+function zF(a,b) {
+ var z = parseInt(80000000,16);
+ if (z & a) {
+ a = a>>1;
+ a &=~z;
+ a |= 0x40000000;
+ a = a>>(b-1);
+ } else {
+ a = a>>b;
+ }
+ return(a);
+}
+
+function mix(a,b,c) {
+ a-=b; a-=c; a^=(zF(c,13));
+ b-=c; b-=a; b^=(a<<8);
+ c-=a; c-=b; c^=(zF(b,13));
+ a-=b; a-=c; a^=(zF(c,12));
+ b-=c; b-=a; b^=(a<<16);
+ c-=a; c-=b; c^=(zF(b,5));
+ a-=b; a-=c; a^=(zF(c,3));
+ b-=c; b-=a; b^=(a<<10);
+ c-=a; c-=b; c^=(zF(b,15));
+ return (new Array((a),(b),(c)));
+}
+function GoogleCH(url,length) {
+ if(arguments.length == 1) length=url.length;
+ var a=0x9E3779B9, b=0x9E3779B9, c=0xE6359A60, k=0, len=length, mx=new Array();
+ while(len>=12) {
+ a+=(url[k+0]+(url[k+1]<<8)+(url[k+2]<<16)+(url[k+3]<<24));
+ b+=(url[k+4]+(url[k+5]<<8)+(url[k+6]<<16)+(url[k+7]<<24));
+ c+=(url[k+8]+(url[k+9]<<8)+(url[k+10]<<16)+(url[k+11]<<24));
+ mx=mix(a,b,c);
+ a=mx[0]; b=mx[1]; c=mx[2];
+ k+=12; len-=12;
+ }
+ c+=length;
+ switch(len) {
+ case 11: c+=url[k+10]<<24;
+ case 10: c+=url[k+9]<<16;
+ case 9:c+=url[k+8]<<8;
+ case 8:b+=(url[k+7]<<24);
+ case 7:b+=(url[k+6]<<16);
+ case 6:b+=(url[k+5]<<8);
+ case 5:b+=(url[k+4]);
+ case 4:a+=(url[k+3]<<24);
+ case 3:a+=(url[k+2]<<16);
+ case 2:a+=(url[k+1]<<8);
+ case 1:a+=(url[k+0]);
+ }
+ mx=mix(a,b,c);
+ if(mx[2]<0) {
+ return(0x100000000+mx[2]);
+ } else {
+ return(mx[2]);
+ }
+}
+function strord(string) {
+ var result=new Array();
+ for(i=0;i<string.length;i++){
+ result[i]=string[i].charCodeAt(0);
+ }
+ return(result);
+}
@@ -0,0 +1,43 @@
+// This module checks a domain's Google rank for a given keyword (rate limits obviously apply)
+//
+// 1. To find the rank of a domain for a given keyword:
+// $ echo "mastercard.com,Credit Cards" | node.io -s google_rank
+// => mastercard.com,Credit Cards,9
+
+var Job = require('../').Job;
+
+exports.job = new Job({timeout:10, retries:3}, {
+
+ run: function google(input) {
+ var links, self = this;
+
+ var input = input.split(',');
+
+ this.getHtml('http://www.google.com/search?hl=en&num=100&q='+encodeURIComponent(input[1]), function(err, $, data) {
+ if (err) self.retry();
+
+ var rank, i = 0;
+
+ if (links = $('a.l')) {
+ links.each('href', function(href) {
+ i++;
+ if (href.indexOf('www.'+input[0]+'/') >= 0) {
+ rank = i;
+ } else if (href.indexOf('/'+input[0]+'/') >= 0) {
+ rank = i;
+ }
+ });
+ if (rank) {
+ self.emit(input[0]+','+input[1]+','+rank);
+ } else {
+ self.emit(input+',');
+ }
+ }
+ });
+ },
+
+ fail: function(input) {
+ this.emit(input+',');
+ }
+
+});
@@ -0,0 +1,27 @@
+// This module uses Google suggest to spell check a word or list of words (rate limits obviously apply)
+//
+// 1. To output the result of Google suggest:
+// $ echo "definately" | node.io -s google_spell
+// => definitely
+
+var Job = require('../').Job;
+
+exports.job = new Job({timeout:10, retries:3}, {
+
+ run: function google(input) {
+ var spell, self = this;
+
+ this.getHtml('http://www.google.com/search?hl=en&q='+encodeURIComponent(input), function(err, $) {
+ if (err) self.retry();
+
+ if (spell = $('a.spell')) {
+ self.emit(spell.first().fulltext);
+ }
+ });
+ },
+
+ fail: function(input) {
+ this.emit(input);
+ }
+
+});
@@ -0,0 +1,46 @@
+//This module pulls the front page stories and scores from reddit.com
+//There are API's for doing this - this is just as a quick demonstration of
+//parsing HTML using htmlparser and an augmented soupselect
+
+var Job = require('../').Job;
+
+function reddit() {
+ var self = this;
+
+ this.getHtml('http://www.reddit.com/', function(err, $) {
+ //Handle any http / parsing errors
+ if (err) self.exit(err);
+
+ var titles = [], scores = [], output = [];
+
+ //Select all titles on the page
+ $('a.title').each(function(a) {
+ titles.push(a.text);
+ });
+
+ //Select all scores on the page
+ $('div.score.unvoted').each(function(div) {
+ scores.push(div.text);
+ });
+
+ //Mismatch? page probably didn't load properly
+ if (scores.length != titles.length) {
+ self.exit('Title / score mismatch');
+ }
+
+ //Output = [score] title
+ for (var i = 0, len = scores.length; i < len; i++) {
+ //Ignore upcoming stories
+ if (scores[i] == '&bull;') continue;
+
+ //Check the data is ok
+ this.assert(scores[i]).isInt();
+
+ output.push('['+scores[i]+'] '+titles[i]);
+ }
+
+ self.emit(output);
+ });
+}
+
+exports.job = new Job({timeout:10, once:true}, {input:false, run:reddit});
@@ -0,0 +1,78 @@
+// This module wraps the dns.lookup() method. There are a few different uses:
+// (In each case replace domains.txt with your list of domains)
+//
+// 1. To resolve domains and return "domain,ip":
+// $ cat domains.txt | node.io resolve
+//
+// 2. To return domains that do not resolve:
+// $ cat domains.txt | node.io resolve notfound
+//
+// 3. To return domains that do resolve:
+// $ cat domains.txt | node.io resolve found
+//
+// To output the results to a file, use either:
+// $ cat domains.txt | node.io -s resolve > result.txt
+// $ node.io -i domains.txt -o result.txt resolve
+
+var Job = require('../').Job, dns = require('dns');
+
+var options = {
+ max: 100,
+ timeout: 10,
+ retries: 3
+}
+
+var methods = {
+
+ run: function(domain) {
+ var self = this, type = this.options.args;
+
+ dns.lookup(domain, 4, function(err, ip) {
+ if (err) {
+
+ //The domain didn't resolve
+ switch(err.errno) {
+ case 4: case 8: // == notfound
+ if (type === 'notfound') {
+ self.emit(domain);
+ } else if (type === 'found') {
+ self.skip();
+ } else {
+ self.emit(domain + ',');
+ }
+ break;
+ default: self.retry();
+ }
+
+ } else {
+
+ //The domain resolved successfully
+ if (type === 'notfound') {
+ self.skip();
+ } else if (type === 'found') {
+ self.emit(domain);
+ } else {
+ self.emit(domain + ',' + ip);
+ }
+
+ }
+ });
+ },
+
+ fail: function(status, domain) {
+
+ //The domain either timed out or exceeded the max number of retries
+ if (type === 'notfound') {
+ self.emit(domain);
+ } else if (type === 'found') {
+ self.skip();
+ } else {
+ self.emit(domain + ',');
+ }
+ this.emit(domain+',');
+
+ }
+
+}
+
+exports.job = new Job(options, methods);
@@ -0,0 +1,10 @@
+google.com
+youtube.com
+download.com
+cnet.com
+wow.com
+google.com.au
+amazon.com
+asdfhkasdhfkashdjkashdk.com
+asdjfh98eua9sdfunm.com
+,,,,,,,,.com
Oops, something went wrong.

0 comments on commit ce350aa

Please sign in to comment.