Browse files

improved crawler output

  • Loading branch information...
1 parent 52760af commit ed9857626bfbf65b68d62fde79b64dd93254f4a3 @matugm committed Apr 2, 2012
Showing with 18 additions and 17 deletions.
  1. +18 −17 lib/crawl.rb
View
35 lib/crawl.rb
@@ -93,33 +93,34 @@ def normalize
@abs_links.map { |link| link.sub('http://','') }
end
- def print_output(*string)
- string = string[0] || ""
+ def puts_file(string)
puts string
@ofile.puts string if @ofile
end
+ def print_link(title,data)
+ puts_file title
+
+ if data == []
+ puts_file 'nothing found.'
+ else
+ puts_file data
+ end
+
+ puts_file ''
+ end
+
def print_links(ofile)
@ofile = ofile
@abs_links = normalize
final_links = @abs_links + to_absolute(@rel_links)
final_links = final_links.sort.uniq { |link| link[/.*\?\w+/] } # Conseguir links con parametros unicos
- print_output "---- External links"
- print_output @ext_links.sort.uniq
- print_output
- print_output "---- Absolute links"
- print_output @abs_links.sort.uniq { |link| link[/.*#\w+/] }
- print_output
- print_output "---- Relative links"
- print_output @rel_links.sort.uniq { |link| link[/(?:\/\w+)+/] }
- print_output
- print_output "---- E-mail accounts (:mailto)"
- print_output @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }.uniq.map { |m| m.sub('mailto:','') }
- print_output
- print_output "**** Parametized queries"
- print_output final_links.grep(/\?/)
- print_output
+ print_link "[External links]", @ext_links.sort.uniq
+ print_link "[Absolute links]", @abs_links.sort.uniq { |link| link[/.*#\w+/] }
+ print_link "[Relative links]", @rel_links.sort.uniq { |link| link[/(?:\/\w+)+/] }
+ print_link "[E-mail accounts] (:mailto)", @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }.uniq.map { |m| m.sub('mailto:','') }
+ print_link "[Parametized queries]", final_links.grep(/\?/)
end
end

0 comments on commit ed98576

Please sign in to comment.