Skip to content

Commit

Permalink
refactor print_links again to avoid super long lines
Browse files Browse the repository at this point in the history
  • Loading branch information
matugm committed May 12, 2012
1 parent 2498a61 commit c86a7d4
Showing 1 changed file with 23 additions and 6 deletions.
29 changes: 23 additions & 6 deletions lib/crawl.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -126,17 +126,34 @@ def print_link(title,data)
puts_file '' puts_file ''
end end


def formated_links(type)
case type
when "external"
@ext_links = @ext_links.sort.uniq
when "absolute"
@abs_links.sort.uniq { |link| link[/.*\?(?:\w+=)(?=\d+)|[\w\/.-]+/] }
when "relative"
@rel_links = @rel_links.sort.uniq { |link| link[/.*\/?(?:[\w_-]+)/] }
@rel_links.map { |e| e.gsub(/^\/\w+\/\w+/) { |link| " "*4 + link } }
when "mail"
@mail_links = @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }
@mail_links.uniq.map { |m| m.sub('mailto:','') }
when "robots"
Http.open(@host + '/robots.txt').body.scan(/Disallow: (.*)/).sort.uniq
end
end

def print_links(ofile) def print_links(ofile)
@ofile = ofile @ofile = ofile
@abs_links = normalize @abs_links = normalize
final_links = @abs_links + expanded_relative_links() final_links = @abs_links + expanded_relative_links()
final_links = final_links.sort.uniq { |link| link[/.*\?\w+/] } # Conseguir links con parametros unicos final_links = final_links.sort.uniq { |link| link[/.*\?\w+/] } # Conseguir links con parametros unicos


print_link "[External links]", @ext_links.sort.uniq print_link "[External links]", formated_links('external')
print_link "[Absolute links]", @abs_links.sort.uniq { |link| link[/.*\?(?:\w+=)(?=\d+)|[\w\/.-]+/] } print_link "[Absolute links]", formated_links('absolute')
print_link "[Relative links]", @rel_links.sort.uniq { |link| link[/.*\/?(?:[\w_-]+)/] }.map { |e| e.gsub(/^\/\w+\/\w+/) { |link| " "*4 + link } } print_link "[Relative links]", formated_links('relative')
print_link "[E-mail accounts] (:mailto)", @mail_links.sort_by { |s| [ s[/@.*/], s[/.*@/] ] }.uniq.map { |m| m.sub('mailto:','') } print_link "[E-mail accounts]", formated_links('mail')
print_link "[Robots.txt]", Http.open(@host + '/robots.txt').body.scan(/Disallow: (.*)/).sort.uniq print_link "[Robots.txt]", formated_links('robots')
print_link "[Parametized queries]", final_links.grep(/\?/) print_link "[Links with parameters]", final_links.grep(/\?/)
end end
end end

0 comments on commit c86a7d4

Please sign in to comment.