Skip to content
Permalink
Newer
Older
100755 127 lines (109 sloc) 5.36 KB
1
#!/usr/bin/phantomjs
2
3
// grab the "rendered" HTML of a JavaScript-requiring web page
4
5
// TBD:
6
// add '-' as magic filename to read from STDIN
7
// add more curl-like switches? --or just let curl do that and consume the output of curl?
8
// add a switch for page.render( URLdump); // "screenshot"
9
10
var system = require('system'); // var args = require('system').args;
11
var page = require('webpage').create();
12
13
if (system.args.length === 1) {
14
console.log('Usage: curl-phantom.js <http://URL/path/file.ext>');
15
// note: can also read "pages" from the local filesystem
16
phantom.exit();
17
} else {
18
19
var URLarg=system.args[1];
20
var theStatusCode = null;
21
var theStatusPrev = null;
22
var thePrevURL = '' ;
23
var theCurrURL = '' ;
24
var timestamp = Date.now();
25
var verbose = false;
26
var debug = false;
27
var full_page = false;
28
var header_key = 'X-Forwarded-For';
29
var header_val = '3.1.20.13';
30
var requestTimeout= 5000; // Default request timeout
31
32
for (var i=1; i<system.args.length; i++) { /* skip args[0] which is this self-same script filename */
33
if ( system.args[i].indexOf('--debug') == 0 ) { debug = true; /* debug && console.log ('DEBUG: ' + system.args[i]); */ }
34
else if ( system.args[i].indexOf('--full_page')== 0 ) { full_page = true; debug && console.log ('PAGE: ' + system.args[i]); }
35
else if ( system.args[i].indexOf('--header') == 0 ) { header_key=system.args[++i]; header_val=system.args[++i] }
36
else if ( system.args[i].indexOf('--xff') == 0 ) { header_val=system.args[++i] } // kludge assume default header_key
37
else if ( system.args[i].indexOf('--verbose') == 0 ) { verbose = true; debug && console.log ('VERBOSE: ' + system.args[i]); }
38
else if ( system.args[i].indexOf('http')== 0 ) { /* http protocol optional for local files */ }
39
else if ( system.args[i].indexOf('.htm') > 0 ) { /* to detect local files */ } // kludge?
40
else if ( system.args[i].indexOf('.js') > 0 ) { /* to detect local files */ } // kludge?
41
else if ( system.args[i].indexOf('--timeout') == 0 ) { requestTimeout = system.args[++i]; }
42
else { console.log('unk. param: '+system.args[i]); }
43
}
44
45
page.settings.resourceTimeout = requestTimeout;
46
47
page.customHeaders = { header_key : header_val };
48
debug && console.log ('VERBOSE: ' + header_key +': '+ header_val);
49
50
page.onConsoleMessage = function (msg) { // call-back function intercepts console.log messages
51
debug && console.log('DEBUG: console.log message="' + msg + '"');
52
};
53
54
page.onLoadFinished = function(status) {
56
// console.log('Status: ' + status +' after onLoadFinished(' + status +')');
57
system.stderr.write('Status: ' + (theStatusCode ? theStatusCode : status) +' after onLoadFinished(' + status +')\n');
58
}
59
};
60
61
page.onResourceReceived = function(resource) {
62
// if (resource.url == URLarg || (theStatusCode >= 300 && theStatusCode < 400)) {
63
theStatusPrev = theStatusCode ;
64
theStatusCode = resource.status;
65
thePrevURL = theCurrURL ;
66
theCurrURL = resource.url;
67
// }
68
if ( resource.status === 200 ) {
69
verbose && console.log('VERBOSE status ' + resource.status + ' for ' + resource.url ); // don't usually log standard success
70
} else {
71
verbose && console.log('Status Code was: ' + theStatusPrev + ' for ' + thePrevURL );
72
verbose && console.log('Status Code is : ' + theStatusCode + ' for ' + theCurrURL );
73
}
74
};
75
76
page.onUrlChanged = function (URLnew) { // call-back function intercepts console.log messages
77
if ( URLnew === URLarg ) {
78
debug && console.log('DEBUG: old/new URL: ' + URLnew + ' --onUrlChanged()');
79
} else {
80
verbose && console.log('DEBUG: old URL: ' + URLarg);
81
verbose && console.log('DEBUG: new URL: ' + URLnew);
82
}
83
};
84
85
phantom.onError = function(msg, trace) {
86
var msgStack = ['PHANTOM ERROR: ' + msg];
87
if (trace) {
88
msgStack.push('TRACE:');
89
trace.forEach(function(t) {
90
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function + ')' : ''));
91
});
92
}
93
console.error(msgStack.join('\n'));
94
};
95
96
page.onResourceTimeout = function(request) {
97
console.error('Request timed out due to ' + request.errorCode + ' - ' + request.errorString);
98
phantom.exit(1);
99
}
100
101
page.open( URLarg, function () {
102
// onLoadFinished executes here
103
var page_content = page.content;
104
var body_innerHTML= page.evaluate( function() {
105
return document.body.innerHTML ? document.body.innerHTML : '(empty)' ;
106
});
107
var title = page.evaluate(function() {return document.title; });
108
109
// page.render( URLdump); // "screenshot"
110
verbose && console.log('VERBOSE: Loading time '+ ( Date.now() - timestamp ) +' msec');
111
112
debug && console.log('DEBUG: Page title: ' + ((title==='') ? '(none)':title) );
113
debug && console.log('DEBUG: body_innerHTML.length='+ body_innerHTML.length);
114
(debug || verbose) && console.log(' '); // empty line
115
if ( full_page || ( ! body_innerHTML ) || body_innerHTML.length < 9 ) {
116
console.log( page_content ); // return all if body is empty
117
} else {
118
console.log( body_innerHTML );
119
}
120
setTimeout(function() {
121
verbose && console.log('VERBOSE: status ' + theStatusPrev + ' for ' + thePrevURL + ' (b)');
122
verbose && console.log('VERBOSE: status ' + theStatusCode + ' for ' + theCurrURL + ' (c)');
123
}, 1333 ) ; // delay in milliseconds
125
}) ;
126
127
}