Skip to content

Commit

Permalink
chore: replace nokogiri with oga
Browse files Browse the repository at this point in the history
  • Loading branch information
ninoseki committed Sep 1, 2018
1 parent 9f6059b commit 7ff8cbb
Show file tree
Hide file tree
Showing 15 changed files with 15 additions and 20 deletions.
2 changes: 1 addition & 1 deletion SimpleWhatWeb.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
spec.add_development_dependency "webmock", "~> 3.4"

spec.add_dependency "http", "~> 3.3"
spec.add_dependency "oga", "~> 2.15"
spec.add_dependency "require_all", "~> 2.0"
spec.add_dependency "sanitize", "~> 4.6"
spec.add_dependency "thor", "~> 0.19"
end
9 changes: 6 additions & 3 deletions lib/whatweb/helper.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true

require "digest/md5"
require "sanitize"
require "oga"

module WhatWeb
module Helper
Expand All @@ -10,8 +10,11 @@ def md5sum
Digest::MD5.hexdigest(body.to_s)
end

def sanitized_body
Sanitize.document(body.to_s, elements: ["html"])
def text
doc = Oga.parse_html(body.to_s.force_encoding('UTF-8'))
path = /\A<body(?:\s|>)/i.match?(body.to_s) ? '/html/body' : '/html/body/node()'
nodes = doc.xpath(path)
nodes.map(&:text).join
end

def tag_pattern
Expand Down
4 changes: 2 additions & 2 deletions lib/whatweb/matcher/ghdb.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def match_others?
# does it start with a - ?
if w[0] == '-'
# reverse true/false if it begins with a -
!target.sanitized_body.match? /#{Regexp.escape(w[1..-1])}/i
!target.text.match? /#{Regexp.escape(w[1..-1])}/i
else
w = w[1..-1] if w[0] == '+' # if it starts with +, ignore the 1st char
target.sanitized_body.match? /#{Regexp.escape(w)}/i
target.text.match? /#{Regexp.escape(w)}/i
end
end
end
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/anygate.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/bm-classifieds.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/fidion-cms.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/mihalism-multi-host.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/netsnap-web-camera.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/rvi-camera.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/s-cms.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/samphpweb.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
1 change: 0 additions & 1 deletion lib/whatweb/plugins/snografx.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding: ascii-8bit
# frozen_string_literal: true

##
Expand Down
6 changes: 3 additions & 3 deletions lib/whatweb/plugins/title.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# Version 0.2
# removed :certainty=>100

require "nokogiri"
require "oga"

WhatWeb::Plugin.define "Title" do
@author = "Andrew Horton"
Expand All @@ -22,8 +22,8 @@
def passive(target)
m = []

html = Nokogiri.parse(target.body)
title = html.css("title")
html = Oga.parse_html(target.body)
title = html.at_css("title")
if title
# Give warining if title element contains newline(s)
m << { name: "WARNING", module: "Title element contains newline(s)!" } if title.text.include? "\n"
Expand Down
4 changes: 2 additions & 2 deletions lib/whatweb/target.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def tag_pattern
@tag_pattern ||= response.tag_pattern
end

def sanitized_body
@sanitized_body ||= response.sanitized_body
def text
@text ||= response.text
end

def self.meta_refresh_regex
Expand Down
1 change: 1 addition & 0 deletions spec/cli_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
output = capture(:stdout) { subject.start %w(scan https://github.com) }
json = JSON.parse(output)
expect(json).to be_a(Hash)
expect(json.dig("Title").first.dig("string")).to eq("The world’s leading software development platform · GitHub")
end
end
end
Expand Down

0 comments on commit 7ff8cbb

Please sign in to comment.