Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
chriso committed Nov 16, 2010
0 parents commit ce350aa
Show file tree
Hide file tree
Showing 40 changed files with 3,552 additions and 0 deletions.
20 changes: 20 additions & 0 deletions LICENSE
@@ -0,0 +1,20 @@
Copyright (c) 2010 Chris O'Hara <cohara87@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1 change: 1 addition & 0 deletions README.md
@@ -0,0 +1 @@
*This is a _major_ work in progess.. check back soon!*
3 changes: 3 additions & 0 deletions bin/node.io
@@ -0,0 +1,3 @@
#!/usr/bin/env node

require('node.io').cli(process.argv.slice(2));
45 changes: 45 additions & 0 deletions examples/duplicates.js
@@ -0,0 +1,45 @@
// This module can find/remove duplicates in a list
//
// 1. To remove duplicates from a list and output unique lines:
// $ cat list.txt | node.io duplicates
//
// 2. To output lines that appear more than once:
// $ cat list.txt | node.io duplicates find
//
// To output the results to a file, use either:
// $ cat list.txt | node.io -s duplicates > unique.txt
// $ node.io -i list.txt -o unique.txt duplicates

var Job = require('../lib/node.io/job').Job;

var seen_lines = [], emitted_lines = [];

function reduce(lines) {
var args = this.options.args, emit = [];

lines.forEach(function(line) {
if (args === 'find') {

//Output duplicate lines
if (seen_lines.indexOf(line) >= 0 && !~emitted_lines.indexOf(line)) {
emit.push(line);
emitted_lines.push(line); //Only output once
} else {
seen_lines.push(line);
}

} else {

//Remove duplicate lines (default)
if (!~seen_lines.indexOf(line)) {
emit.push(line);
seen_lines.push(line);
}

}
});

this.emit(emit);
}

exports.job = new Job({max:20},{reduce:reduce});
99 changes: 99 additions & 0 deletions examples/google_pagerank.js
@@ -0,0 +1,99 @@
// This module checks a domain's Google pagerank (rate limits obviously apply)
//
// 1. To find the rank of a domain for a given keyword:
// $ echo "mastercard.com" | node.io -s google_pagerank
// => mastercard.com,7

var Job = require('../').Job;

exports.job = new Job({timeout:10, retries:3}, {

run: function google(input) {
var self = this;

var url = input;
if (!~url.indexOf('http://')) url = 'http://'+url;

var ch = '6'+GoogleCH(strord('info:'+url));

this.get('http://www.google.com/search?client=navclient-auto&ch='+ch+'&features=Rank&q=info:'+encodeURIComponent(url), function(err, data) {
if (err) self.retry();

if (!~data.indexOf('Rank_1:1:')) {
self.emit(input+',');
} else {
self.emit(input+','+data.substr(9));
}
});
},

fail: function(input) {
this.emit(input+',');
}

});

function zF(a,b) {
var z = parseInt(80000000,16);
if (z & a) {
a = a>>1;
a &=~z;
a |= 0x40000000;
a = a>>(b-1);
} else {
a = a>>b;
}
return(a);
}

function mix(a,b,c) {
a-=b; a-=c; a^=(zF(c,13));
b-=c; b-=a; b^=(a<<8);
c-=a; c-=b; c^=(zF(b,13));
a-=b; a-=c; a^=(zF(c,12));
b-=c; b-=a; b^=(a<<16);
c-=a; c-=b; c^=(zF(b,5));
a-=b; a-=c; a^=(zF(c,3));
b-=c; b-=a; b^=(a<<10);
c-=a; c-=b; c^=(zF(b,15));
return (new Array((a),(b),(c)));
}
function GoogleCH(url,length) {
if(arguments.length == 1) length=url.length;
var a=0x9E3779B9, b=0x9E3779B9, c=0xE6359A60, k=0, len=length, mx=new Array();
while(len>=12) {
a+=(url[k+0]+(url[k+1]<<8)+(url[k+2]<<16)+(url[k+3]<<24));
b+=(url[k+4]+(url[k+5]<<8)+(url[k+6]<<16)+(url[k+7]<<24));
c+=(url[k+8]+(url[k+9]<<8)+(url[k+10]<<16)+(url[k+11]<<24));
mx=mix(a,b,c);
a=mx[0]; b=mx[1]; c=mx[2];
k+=12; len-=12;
}
c+=length;
switch(len) {
case 11: c+=url[k+10]<<24;
case 10: c+=url[k+9]<<16;
case 9:c+=url[k+8]<<8;
case 8:b+=(url[k+7]<<24);
case 7:b+=(url[k+6]<<16);
case 6:b+=(url[k+5]<<8);
case 5:b+=(url[k+4]);
case 4:a+=(url[k+3]<<24);
case 3:a+=(url[k+2]<<16);
case 2:a+=(url[k+1]<<8);
case 1:a+=(url[k+0]);
}
mx=mix(a,b,c);
if(mx[2]<0) {
return(0x100000000+mx[2]);
} else {
return(mx[2]);
}
}
function strord(string) {
var result=new Array();
for(i=0;i<string.length;i++){
result[i]=string[i].charCodeAt(0);
}
return(result);
}
43 changes: 43 additions & 0 deletions examples/google_rank.js
@@ -0,0 +1,43 @@
// This module checks a domain's Google rank for a given keyword (rate limits obviously apply)
//
// 1. To find the rank of a domain for a given keyword:
// $ echo "mastercard.com,Credit Cards" | node.io -s google_rank
// => mastercard.com,Credit Cards,9

var Job = require('../').Job;

exports.job = new Job({timeout:10, retries:3}, {

run: function google(input) {
var links, self = this;

var input = input.split(',');

this.getHtml('http://www.google.com/search?hl=en&num=100&q='+encodeURIComponent(input[1]), function(err, $, data) {
if (err) self.retry();

var rank, i = 0;

if (links = $('a.l')) {
links.each('href', function(href) {
i++;
if (href.indexOf('www.'+input[0]+'/') >= 0) {
rank = i;
} else if (href.indexOf('/'+input[0]+'/') >= 0) {
rank = i;
}
});
if (rank) {
self.emit(input[0]+','+input[1]+','+rank);
} else {
self.emit(input+',');
}
}
});
},

fail: function(input) {
this.emit(input+',');
}

});
27 changes: 27 additions & 0 deletions examples/google_spell.js
@@ -0,0 +1,27 @@
// This module uses Google suggest to spell check a word or list of words (rate limits obviously apply)
//
// 1. To output the result of Google suggest:
// $ echo "definately" | node.io -s google_spell
// => definitely

var Job = require('../').Job;

exports.job = new Job({timeout:10, retries:3}, {

run: function google(input) {
var spell, self = this;

this.getHtml('http://www.google.com/search?hl=en&q='+encodeURIComponent(input), function(err, $) {
if (err) self.retry();

if (spell = $('a.spell')) {
self.emit(spell.first().fulltext);
}
});
},

fail: function(input) {
this.emit(input);
}

});
46 changes: 46 additions & 0 deletions examples/reddit.js
@@ -0,0 +1,46 @@
//This module pulls the front page stories and scores from reddit.com
//There are API's for doing this - this is just as a quick demonstration of
//parsing HTML using htmlparser and an augmented soupselect

var Job = require('../').Job;

function reddit() {
var self = this;

this.getHtml('http://www.reddit.com/', function(err, $) {
//Handle any http / parsing errors
if (err) self.exit(err);

var titles = [], scores = [], output = [];

//Select all titles on the page
$('a.title').each(function(a) {
titles.push(a.text);
});

//Select all scores on the page
$('div.score.unvoted').each(function(div) {
scores.push(div.text);
});

//Mismatch? page probably didn't load properly
if (scores.length != titles.length) {
self.exit('Title / score mismatch');
}

//Output = [score] title
for (var i = 0, len = scores.length; i < len; i++) {
//Ignore upcoming stories
if (scores[i] == '&bull;') continue;

//Check the data is ok
this.assert(scores[i]).isInt();

output.push('['+scores[i]+'] '+titles[i]);
}

self.emit(output);
});
}

exports.job = new Job({timeout:10, once:true}, {input:false, run:reddit});
78 changes: 78 additions & 0 deletions examples/resolve.js
@@ -0,0 +1,78 @@
// This module wraps the dns.lookup() method. There are a few different uses:
// (In each case replace domains.txt with your list of domains)
//
// 1. To resolve domains and return "domain,ip":
// $ cat domains.txt | node.io resolve
//
// 2. To return domains that do not resolve:
// $ cat domains.txt | node.io resolve notfound
//
// 3. To return domains that do resolve:
// $ cat domains.txt | node.io resolve found
//
// To output the results to a file, use either:
// $ cat domains.txt | node.io -s resolve > result.txt
// $ node.io -i domains.txt -o result.txt resolve

var Job = require('../').Job, dns = require('dns');

var options = {
max: 100,
timeout: 10,
retries: 3
}

var methods = {

run: function(domain) {
var self = this, type = this.options.args;

dns.lookup(domain, 4, function(err, ip) {
if (err) {

//The domain didn't resolve
switch(err.errno) {
case 4: case 8: // == notfound
if (type === 'notfound') {
self.emit(domain);
} else if (type === 'found') {
self.skip();
} else {
self.emit(domain + ',');
}
break;
default: self.retry();
}

} else {

//The domain resolved successfully
if (type === 'notfound') {
self.skip();
} else if (type === 'found') {
self.emit(domain);
} else {
self.emit(domain + ',' + ip);
}

}
});
},

fail: function(status, domain) {

//The domain either timed out or exceeded the max number of retries
if (type === 'notfound') {
self.emit(domain);
} else if (type === 'found') {
self.skip();
} else {
self.emit(domain + ',');
}
this.emit(domain+',');

}

}

exports.job = new Job(options, methods);
10 changes: 10 additions & 0 deletions examples/resources/domains.txt
@@ -0,0 +1,10 @@
google.com
youtube.com
download.com
cnet.com
wow.com
google.com.au
amazon.com
asdfhkasdhfkashdjkashdk.com
asdjfh98eua9sdfunm.com
,,,,,,,,.com

0 comments on commit ce350aa

Please sign in to comment.