From ed9857626bfbf65b68d62fde79b64dd93254f4a3 Mon Sep 17 00:00:00 2001 From: matugm Date: Mon, 2 Apr 2012 21:07:28 +0200 Subject: [PATCH] improved crawler output --- lib/crawl.rb | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/lib/crawl.rb b/lib/crawl.rb index 06ee208..ca6652f 100644 --- a/lib/crawl.rb +++ b/lib/crawl.rb @@ -93,33 +93,34 @@ def normalize @abs_links.map { |link| link.sub('http://','') } end - def print_output(*string) - string = string[0] || "" + def puts_file(string) puts string @ofile.puts string if @ofile end + def print_link(title,data) + puts_file title + + if data == [] + puts_file 'nothing found.' + else + puts_file data + end + + puts_file '' + end + def print_links(ofile) @ofile = ofile @abs_links = normalize final_links = @abs_links + to_absolute(@rel_links) final_links = final_links.sort.uniq { |link| link[/.*\?\w+/] } # Conseguir links con parametros unicos - print_output "---- External links" - print_output @ext_links.sort.uniq - print_output - print_output "---- Absolute links" - print_output @abs_links.sort.uniq { |link| link[/.*#\w+/] } - print_output - print_output "---- Relative links" - print_output @rel_links.sort.uniq { |link| link[/(?:\/\w+)+/] } - print_output - print_output "---- E-mail accounts (:mailto)" - print_output @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }.uniq.map { |m| m.sub('mailto:','') } - print_output - print_output "**** Parametized queries" - print_output final_links.grep(/\?/) - print_output + print_link "[External links]", @ext_links.sort.uniq + print_link "[Absolute links]", @abs_links.sort.uniq { |link| link[/.*#\w+/] } + print_link "[Relative links]", @rel_links.sort.uniq { |link| link[/(?:\/\w+)+/] } + print_link "[E-mail accounts] (:mailto)", @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }.uniq.map { |m| m.sub('mailto:','') } + print_link "[Parametized queries]", final_links.grep(/\?/) end end