Skip to content

Commit

Permalink
Move LinkCollector into nanoc-checking
Browse files Browse the repository at this point in the history
  • Loading branch information
denisdefreyne committed Jan 8, 2024
1 parent c1d49ec commit c19b92c
Show file tree
Hide file tree
Showing 14 changed files with 573 additions and 314 deletions.
1 change: 0 additions & 1 deletion .rubocop_todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,6 @@ Style/ClassAndModuleChildren:
- 'nanoc/lib/nanoc/extra.rb'
- 'nanoc/lib/nanoc/extra/core_ext/time.rb'
- 'nanoc/lib/nanoc/extra/jruby_nokogiri_warner.rb'
- 'nanoc/lib/nanoc/extra/link_collector.rb'
- 'nanoc/lib/nanoc/filters.rb'
- 'nanoc/lib/nanoc/filters/asciidoc.rb'
- 'nanoc/lib/nanoc/filters/asciidoctor.rb'
Expand Down
9 changes: 8 additions & 1 deletion nanoc-checking/Rakefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true

require 'rake/testtask'
require 'rspec/core/rake_task'
require 'rubocop/rake_task'

Expand All @@ -9,7 +10,13 @@ RSpec::Core::RakeTask.new(:spec) do |t|
t.verbose = false
end

task test: :spec
Rake::TestTask.new(:test_all) do |t|
t.test_files = Dir['test/**/test_*.rb']
t.libs << 'test'
t.verbose = false
end

task test: %i[spec test_all]

task :gem do
sh('gem build *.gemspec')
Expand Down
1 change: 1 addition & 0 deletions nanoc-checking/lib/nanoc/checking.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module Checking
require_relative 'checking/checks'
require_relative 'checking/command_runners'
require_relative 'checking/dsl'
require_relative 'checking/link_collector'
require_relative 'checking/runner'
require_relative 'checking/loader'
require_relative 'checking/issue'
Expand Down
2 changes: 1 addition & 1 deletion nanoc-checking/lib/nanoc/checking/checks/external_links.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def run
# Find all broken external hrefs
# TODO: de-duplicate this (duplicated in internal links check)
filenames = output_html_filenames.reject { |f| excluded_file?(f) }
hrefs_with_filenames = ::Nanoc::Extra::LinkCollector.new(filenames, :external).filenames_per_href
hrefs_with_filenames = ::Nanoc::Checking::LinkCollector.new(filenames, :external).filenames_per_href
results = select_invalid(hrefs_with_filenames.keys.shuffle)

# Report them
Expand Down
2 changes: 1 addition & 1 deletion nanoc-checking/lib/nanoc/checking/checks/internal_links.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class InternalLinks < ::Nanoc::Checking::Check
def run
# TODO: de-duplicate this (duplicated in external links check)
filenames = output_html_filenames
uris = ::Nanoc::Extra::LinkCollector.new(filenames, :internal).filenames_per_href
uris = ::Nanoc::Checking::LinkCollector.new(filenames, :internal).filenames_per_href

uris.each_pair do |href, fns|
fns.each do |filename|
Expand Down
2 changes: 1 addition & 1 deletion nanoc-checking/lib/nanoc/checking/checks/mixed_content.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class MixedContent < ::Nanoc::Checking::Check

def run
filenames = output_html_filenames
resource_uris_with_filenames = ::Nanoc::Extra::LinkCollector.new(filenames).filenames_per_resource_uri
resource_uris_with_filenames = ::Nanoc::Checking::LinkCollector.new(filenames).filenames_per_resource_uri

resource_uris_with_filenames.each_pair do |uri, fns|
next unless guaranteed_insecure?(uri)
Expand Down
132 changes: 132 additions & 0 deletions nanoc-checking/lib/nanoc/checking/link_collector.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# frozen_string_literal: true

module ::Nanoc
module Checking
class LinkCollector
# HTML5 element attributes
URI_ATTRS = {
'a' => %i[href ping],
'area' => %i[href ping],
'audio' => %i[src],
'base' => %i[href],
'blockquote' => %i[cite],
'form' => %i[action],
'iframe' => %i[src],
'img' => %i[src srcset],
'link' => %i[href],
'object' => %i[data],
'script' => %i[src],
'source' => %i[src srcset],
'video' => %i[poster src],
}.freeze
# HTML+RDFa global URI attributes
GLOBAL_ATTRS = %i[about resource].freeze

def initialize(filenames, mode = nil)
Nanoc::Extra::JRubyNokogiriWarner.check_and_warn

@filenames = filenames
@filter =
case mode
when nil
->(_h) { true }
when :external
->(h) { external_href?(h) }
when :internal
->(h) { internal_href?(h) }
else
raise ArgumentError, 'Expected mode argument to be :internal, :external or nil'
end
end

def filenames_per_href
grouped_filenames { |filename| hrefs_in_file(filename) }
end

def filenames_per_resource_uri
grouped_filenames { |filename| resource_uris_in_file(filename) }
end

def external_href?(href)
return false if internal_href?(href)

href =~ %r{^(//|[a-z-]+:)}
end

def internal_href?(href)
return false if href.nil?

href.start_with?('file:/')
end

# all links
def hrefs_in_file(filename)
uris_in_file filename, nil
end

# embedded resources, used by the mixed-content checker
def resource_uris_in_file(filename)
uris_in_file filename, %w[audio base form iframe img link object script source video]
end

private

def grouped_filenames
require 'nokogiri'
grouped_filenames = {}
@filenames.each do |filename|
yield(filename).each do |resouce_uri|
grouped_filenames[resouce_uri] ||= Set.new
grouped_filenames[resouce_uri] << filename
end
end
grouped_filenames
end

def uris_in_file(filename, tag_names)
uris = Set.new
base_uri = URI("file://#{filename}")
doc = Nokogiri::HTML(::File.read(filename))
doc.traverse do |tag|
next unless tag_names.nil? || tag_names.include?(tag.name)

attrs = []
attrs += URI_ATTRS[tag.name] unless URI_ATTRS[tag.name].nil?
attrs += GLOBAL_ATTRS if tag_names.nil?
next if attrs.nil?

attrs.each do |attr_name|
next if tag[attr_name].nil?

if attr_name == :srcset
uris = uris.merge(tag[attr_name].split(',').map { |v| v.strip.split[0].strip }.compact)
elsif %i[about ping resource].include?(attr_name)
uris = uris.merge(tag[attr_name].split.map(&:strip).compact)
else
uris << tag[attr_name.to_s]
end
end
end

# Strip fragment
uris.map! { |uri| uri.gsub(/#.*$/, '') }

# Resolve paths relative to the filename, return invalid URIs as-is
uris.map! do |uri|
if uri.start_with?('//')
# Don’t modify protocol-relative URLs. They’re absolute!
uri
else
begin
URI.join(base_uri, uri).to_s
rescue
uri
end
end
end

uris.select(&@filter)
end
end
end
end
1 change: 1 addition & 0 deletions nanoc-checking/nanoc-checking.manifest
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ lib/nanoc/checking/command_runners/check.rb
lib/nanoc/checking/commands/check.rb
lib/nanoc/checking/dsl.rb
lib/nanoc/checking/issue.rb
lib/nanoc/checking/link_collector.rb
lib/nanoc/checking/loader.rb
lib/nanoc/checking/runner.rb
lib/nanoc/checking/version.rb

0 comments on commit c19b92c

Please sign in to comment.