diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9111d78 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +The MIT License + +Copyright (c) 2009-2010 Laurent Arnoud + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +'Software'), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.rdoc b/README.rdoc index bf17cca..4e9c8ab 100644 --- a/README.rdoc +++ b/README.rdoc @@ -1,8 +1,4 @@ -== validate-website - -== DESCRIPTION - - Web crawler for checking the validity of your documents. += validate-website - Web crawler for checking the validity of your documents == INSTALLATION @@ -10,48 +6,56 @@ == SYNOPSIS - validate-website --help - validate-website -s "http://localhost:4567/" - -u "Mozilla 5.0" \ # user agent - -f not-well-formed.txt \ # log file - --auth=user,pass \ # http auth - -e 'redirect|news' \ # exclude regex - -n # log not found (404) - -c "name=val;name2=val2" - -v # verbose: show detail of validator errors - -d # debug: show anemone log - -== REQUIREMENTS: +validate-website [OPTIONS] - spk-anemone >= 0.2.4 (forked from chriskite/anemone, - add auth support and html body) - rainbow >= 1.0.4 (for console color) - -== CREDITS: +== DESCRIPTION - Thanks tenderlove for Nokogiri, - this library is inspired from markup_validity +validate-website is a web crawler for checking the markup validity and not +found urls. + +== OPTIONS + + -s, --site SITE + Website to crawl (Default: http://localhost:3000/) + -u, --useragent USERAGENT + Change user agent (Default: Anemone/VERSION) + -e, --exclude EXCLUDE + Url to exclude (ex: redirect|news) + -f, --file FILE + Save not well formed or not found (with -n used) urls + -a, --auth USER,PASS + Basic http authentification + -c, --cookies COOKIES + Set defaults cookies + -m, --[no-]markup-validation + Markup validation (Default: true) + -n, --not-found + Log not found url (Default: false) + -v, --verbose + Show detail of validator errors (Default: false). + -d, --debug + Show anemone log (Default: false) + -h, --help + Show help message and exit. + +== EXIT STATUS +0:: + Markup is valid and no 404 found. +1:: + Markup not valid or not found urls. + +== REQUIREMENTS + +* spk-anemone, '>= 0.4.0' +* rainbow, '>= 1.1' +* html5, '= 0.10.0' + +== CREDITS + +* Thanks tenderlove for Nokogiri, this tool is inspired from markup_validity. +* And Chris Kite for Anemone web-spider framework. == LICENSE -(The MIT License) - -Copyright (c) 2009 Laurent Arnoud (spk) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -'Software'), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +The MIT License + +Copyright (c) 2009-2010 Laurent Arnoud diff --git a/Rakefile b/Rakefile index e968d4a..0ca7d54 100644 --- a/Rakefile +++ b/Rakefile @@ -9,8 +9,8 @@ require "rspec/core/rake_task" # RSpec 2.0 PKG_NAME = 'validate-website' PKG_VERSION = '0.4.1' -PKG_FILES = ['README.rdoc', 'Rakefile'] -Find.find('lib/', 'bin/', 'spec/') do |f| +PKG_FILES = ['README.rdoc', 'Rakefile', 'LICENSE'] +Find.find('bin/', 'lib/', 'man/', 'spec/') do |f| if FileTest.directory?(f) and f =~ /\.svn|\.git/ Find.prune else @@ -63,7 +63,8 @@ spec = Gem::Specification.new do |s| s.bindir = 'bin' s.executables << 'validate-website' s.files = PKG_FILES - s.description = 'Web crawler for checking the validity of your documents' + s.description = 'validate-website is a web crawler for checking the markup' + + 'validity and not found urls.' end Rake::GemPackageTask.new(spec) do |pkg| @@ -71,6 +72,11 @@ Rake::GemPackageTask.new(spec) do |pkg| pkg.need_tar = true end +desc 'Update manpage from asciidoc file' +task :manpage do + system('a2x -f manpage -D man/man1 doc/validate-website.txt') +end + # RSpec 2.0 RSpec::Core::RakeTask.new(:spec) do |spec| spec.pattern = 'spec/*_spec.rb' diff --git a/doc/validate-website.txt b/doc/validate-website.txt new file mode 100644 index 0000000..c3c90ba --- /dev/null +++ b/doc/validate-website.txt @@ -0,0 +1,59 @@ +validate-website(1) +=================== + +NAME +---- +validate-website - Web crawler for checking the validity of your documents + +SYNOPSIS +-------- +*validate-website* ['OPTIONS'] + +DESCRIPTION +----------- +validate-website is a web crawler for checking the markup validity and not +found urls. + +OPTIONS +------- +*-s*, *--site* 'SITE':: + Website to crawl (Default: http://localhost:3000/) +*-u*, *--useragent* 'USERAGENT':: + Change user agent (Default: Anemone/VERSION) +*-e*, *--exclude* 'EXCLUDE':: + Url to exclude (ex: 'redirect|news') +*-f*, *--file* 'FILE':: + Save not well formed or not found (with -n used) urls +*-a*, *--auth* 'USER,PASS':: + Basic http authentification +*-c*, *--cookies* 'COOKIES':: + Set defaults cookies +*-m*, *--[no-]markup-validation*:: + Markup validation (Default: true) +*-n*, *--not-found*:: + Log not found url (Default: false) +*-v*, *--verbose*:: + Show detail of validator errors (Default: false). +*-d*, *--debug*:: + Show anemone log (Default: false) +*-h*, *--help*:: + Show help message and exit. + +EXIT STATUS +----------- +0:: + Markup is valid and no 404 found. +1:: + Markup not valid or not found urls. + +AUTHOR +------ +Laurent Arnoud + +LICENSE +------- +The MIT License + +Copyright (c) 2009-2010 Laurent Arnoud + +// vim: set syntax=asciidoc: diff --git a/lib/validate_website.rb b/lib/validate_website.rb index b67ea44..7cb68c3 100644 --- a/lib/validate_website.rb +++ b/lib/validate_website.rb @@ -40,26 +40,40 @@ def parse(args) o.define_head "validate-website - Web crawler for checking the validity of your documents" o.separator "" - o.on("-s", "--site=val", String, - "Default: #{@options[:site]}") { |v| @options[:site] = v } - o.on("-u", "--useragent=val", String, - "Default: #{@options[:useragent]}") { |v| @options[:useragent] = v } - o.on("-e", "--exclude=val", String, - "Url to exclude") { |v| @options[:exclude] = v } - o.on("-f", "--file=val", String, - "Save not well formed urls") { |v| @options[:file] = v } - o.on("--auth=[user,pass]", Array, + o.on("-s", "--site 'SITE'", String, + "Website to crawl (Default: #{@options[:site]})") { |v| + @options[:site] = v + } + o.on("-u", "--useragent 'USERAGENT'", String, + "Change user agent (Default: #{@options[:useragent]})") { |v| + @options[:useragent] = v + } + o.on("-e", "--exclude 'EXCLUDE'", String, + "Url to exclude (ex: 'redirect|news')") { |v| + @options[:exclude] = v + } + o.on("-f", "--file 'FILE'", String, + "Save not well formed or not found urls") { |v| @options[:file] = v } + o.on("-a", "--auth 'USER,PASS'", Array, "Basic http authentification") { |v| @options[:auth] = v } - o.on("-c", "--cookies=val", "Set defaults cookies") { |v| @options[:cookies] = v } - + o.on("-c", "--cookies 'COOKIES'", String, + "Set defaults cookies") { |v| @options[:cookies] = v } o.on("-m", "--[no-]markup-validation", - "Markup validation (Default: #{@options[:markup_validation]})") { |v| @options[:markup_validation] = v } + "Markup validation (Default: #{@options[:markup_validation]})") { |v| + @options[:markup_validation] = v + } o.on("-n", "--not-found", - "Log not found url (Default: #{@options[:not_found]})") { |v| @options[:not_found] = v } + "Log not found url (Default: #{@options[:not_found]})") { |v| + @options[:not_found] = v + } o.on("-v", "--verbose", - "Show detail of validator errors (Default: #{@options[:verbose]})") { |v| @options[:verbose] = v } + "Show validator errors (Default: #{@options[:verbose]})") { |v| + @options[:verbose] = v + } o.on("-d", "--debug", - "Show anemone log (Default: #{@options[:debug]})") { |v| @options[:debug] = v } + "Show anemone log (Default: #{@options[:debug]})") { |v| + @options[:debug] = v + } o.separator "" o.on_tail("-h", "--help", "Show this help message.") { puts o; exit } diff --git a/man/man1/validate-website.1 b/man/man1/validate-website.1 new file mode 100644 index 0000000..cfbdec9 --- /dev/null +++ b/man/man1/validate-website.1 @@ -0,0 +1,114 @@ +'\" t +.\" Title: validate-website +.\" Author: [see the "AUTHOR" section] +.\" Generator: DocBook XSL Stylesheets v1.75.2 +.\" Date: 10/30/2010 +.\" Manual: \ \& +.\" Source: \ \& +.\" Language: English +.\" +.TH "VALIDATE\-WEBSITE" "1" "10/30/2010" "\ \&" "\ \&" +.\" ----------------------------------------------------------------- +.\" * Define some portability stuff +.\" ----------------------------------------------------------------- +.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.\" http://bugs.debian.org/507673 +.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html +.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" ----------------------------------------------------------------- +.\" * set default formatting +.\" ----------------------------------------------------------------- +.\" disable hyphenation +.nh +.\" disable justification (adjust text to left margin only) +.ad l +.\" ----------------------------------------------------------------- +.\" * MAIN CONTENT STARTS HERE * +.\" ----------------------------------------------------------------- +.SH "NAME" +validate-website \- Web crawler for checking the validity of your documents +.SH "SYNOPSIS" +.sp +\fBvalidate\-website\fR [\fIOPTIONS\fR] +.SH "DESCRIPTION" +.sp +validate\-website is a web crawler for checking the markup validity and not found urls\&. +.SH "OPTIONS" +.PP +\fB\-s\fR, \fB\-\-site\fR \fISITE\fR +.RS 4 +Website to crawl (Default: +http://localhost:3000/) +.RE +.PP +\fB\-u\fR, \fB\-\-useragent\fR \fIUSERAGENT\fR +.RS 4 +Change user agent (Default: Anemone/VERSION) +.RE +.PP +\fB\-e\fR, \fB\-\-exclude\fR \fIEXCLUDE\fR +.RS 4 +Url to exclude (ex: +\fIredirect|news\fR) +.RE +.PP +\fB\-f\fR, \fB\-\-file\fR \fIFILE\fR +.RS 4 +Save not well formed or not found (with \-n used) urls +.RE +.PP +\fB\-a\fR, \fB\-\-auth\fR \fIUSER,PASS\fR +.RS 4 +Basic http authentification +.RE +.PP +\fB\-c\fR, \fB\-\-cookies\fR \fICOOKIES\fR +.RS 4 +Set defaults cookies +.RE +.PP +\fB\-m\fR, \fB\-\-[no\-]markup\-validation\fR +.RS 4 +Markup validation (Default: true) +.RE +.PP +\fB\-n\fR, \fB\-\-not\-found\fR +.RS 4 +Log not found url (Default: false) +.RE +.PP +\fB\-v\fR, \fB\-\-verbose\fR +.RS 4 +Show detail of validator errors (Default: false)\&. +.RE +.PP +\fB\-d\fR, \fB\-\-debug\fR +.RS 4 +Show anemone log (Default: false) +.RE +.PP +\fB\-h\fR, \fB\-\-help\fR +.RS 4 +Show help message and exit\&. +.RE +.SH "EXIT STATUS" +.PP +0 +.RS 4 +Markup is valid and no 404 found\&. +.RE +.PP +1 +.RS 4 +Markup not valid or not found urls\&. +.RE +.SH "AUTHOR" +.sp +Laurent Arnoud +.SH "LICENSE" +.sp +The MIT License +.sp +Copyright (c) 2009\-2010 Laurent Arnoud