Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

refactor print_links again to avoid super long lines

  • Loading branch information...
commit c86a7d4ade591591b733346f22fa27f5d3fb84f1 1 parent 2498a61
@matugm authored
Showing with 23 additions and 6 deletions.
  1. +23 −6 lib/crawl.rb
View
29 lib/crawl.rb
@@ -126,17 +126,34 @@ def print_link(title,data)
puts_file ''
end
+ def formated_links(type)
+ case type
+ when "external"
+ @ext_links = @ext_links.sort.uniq
+ when "absolute"
+ @abs_links.sort.uniq { |link| link[/.*\?(?:\w+=)(?=\d+)|[\w\/.-]+/] }
+ when "relative"
+ @rel_links = @rel_links.sort.uniq { |link| link[/.*\/?(?:[\w_-]+)/] }
+ @rel_links.map { |e| e.gsub(/^\/\w+\/\w+/) { |link| " "*4 + link } }
+ when "mail"
+ @mail_links = @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }
+ @mail_links.uniq.map { |m| m.sub('mailto:','') }
+ when "robots"
+ Http.open(@host + '/robots.txt').body.scan(/Disallow: (.*)/).sort.uniq
+ end
+ end
+
def print_links(ofile)
@ofile = ofile
@abs_links = normalize
final_links = @abs_links + expanded_relative_links()
final_links = final_links.sort.uniq { |link| link[/.*\?\w+/] } # Conseguir links con parametros unicos
- print_link "[External links]", @ext_links.sort.uniq
- print_link "[Absolute links]", @abs_links.sort.uniq { |link| link[/.*\?(?:\w+=)(?=\d+)|[\w\/.-]+/] }
- print_link "[Relative links]", @rel_links.sort.uniq { |link| link[/.*\/?(?:[\w_-]+)/] }.map { |e| e.gsub(/^\/\w+\/\w+/) { |link| " "*4 + link } }
- print_link "[E-mail accounts] (:mailto)", @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }.uniq.map { |m| m.sub('mailto:','') }
- print_link "[Robots.txt]", Http.open(@host + '/robots.txt').body.scan(/Disallow: (.*)/).sort.uniq
- print_link "[Parametized queries]", final_links.grep(/\?/)
+ print_link "[External links]", formated_links('external')
+ print_link "[Absolute links]", formated_links('absolute')
+ print_link "[Relative links]", formated_links('relative')
+ print_link "[E-mail accounts]", formated_links('mail')
+ print_link "[Robots.txt]", formated_links('robots')
+ print_link "[Links with parameters]", final_links.grep(/\?/)
end
end
Please sign in to comment.
Something went wrong with that request. Please try again.