Merge branch 'master' into xml-file

sparklemotion · Feb 20, 2013 · 336d3d1 · 336d3d1
2 parents dbb2837 + 1af855a
commit 336d3d1
Show file tree

Hide file tree

Showing 9 changed files with 166 additions and 9 deletions.
diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc
@@ -10,12 +10,16 @@
     an `overwrite` option which when set to true will overwrite the original
     file if it exists, otherwise fall back to the default behaviour. #219
   * DirectorySaver::save_to now accepts an option to decode filename. #262
+  * element(s)_with methods now accept a :search option for doing xpath/css
+    selector searching. #287 Philippe Bourgau
+  * Added httponly option for Mechanize::Cookie #242 by Paolo Perego.
 
 * Minor enhancements
   * Added Mechanize::Download#save_as as an alias to #save. #246
   * Fix documentation for `Mechanize::Page` element matchers. #269
   * Added Mechanize::Form::Field#raw_value for fetching a fields value
     before it's sent through Mechanize::Util.html_unescape. #283
+  * Added iPad and Android user agents.  #277 by sambit, #278 by seansay.
 
 * Bug fix
   * Mechanize#cert and Mechanize#key now return the values set by
@@ -29,6 +33,8 @@
     `PluggableParser`, not just `text/xml`. #266 James Gregory
   * Mechanize no longer attempts to follow redirects in non HEAD/GET
     requests. See RFC 2616#10.3. #281
+  * Mechanize now writes cookiestxt with a prefixed dot for wildcard domain
+    handling. #295 by Mike Morearty.
 
 === 2.5.2
 

diff --git a/Rakefile b/Rakefile
@@ -22,7 +22,7 @@ hoe = Hoe.spec 'mechanize' do
   self.extra_deps << ['mime-types',           '~> 1.17', '>= 1.17.2']
   self.extra_deps << ['nokogiri',             '~> 1.4']
   self.extra_deps << ['ntlm-http',            '~> 0.1', '>= 0.1.1']
-  self.extra_deps << ['webrobots',            '~> 0.0', '>= 0.0.9']
+  self.extra_deps << ['webrobots',            '<  0.2', '>= 0.0.9']
   self.extra_deps << ['domain_name',          '~> 0.5', '>= 0.5.1']
 
   self.spec_extras[:required_ruby_version] = '>= 1.8.7'

diff --git a/lib/mechanize.rb b/lib/mechanize.rb
@@ -134,6 +134,7 @@ class Error < RuntimeError
   }
 
   AGENT_ALIASES['Mac FireFox'] = AGENT_ALIASES['Mac Firefox']
+  AGENT_ALIASES['Linux FireFox'] = AGENT_ALIASES['Linux Firefox']
 
   def self.inherited(child) # :nodoc:
     child.html_parser = html_parser

diff --git a/lib/mechanize/cookie.rb b/lib/mechanize/cookie.rb
@@ -10,6 +10,7 @@ class Mechanize::Cookie
   attr_accessor :comment, :max_age
 
   attr_accessor :session
+  attr_accessor :httponly
 
   attr_accessor :created_at
   attr_accessor :accessed_at
@@ -33,7 +34,7 @@ def initialize(*args)
     @version = 0     # Netscape Cookie
 
     @domain = @path = @secure = @comment = @max_age =
-      @expires = @comment_url = @discard = @port = nil
+      @expires = @comment_url = @discard = @port = @httponly = nil
 
     @created_at = @accessed_at = Time.now
     case args.size
@@ -97,6 +98,7 @@ def parse(uri, str, log = Mechanize.log)
           next unless key
           value = WEBrick::HTTPUtils.dequote(value.strip) if value
 
+
           case key.downcase
           when 'domain'
             next unless value && !value.empty?
@@ -134,6 +136,8 @@ def parse(uri, str, log = Mechanize.log)
               log.warn("Couldn't parse version '#{value}'") if log
               cookie.version = nil
             end
+          when 'httponly'
+            cookie.httponly = true
           when 'secure'
             cookie.secure = true
           end
@@ -142,6 +146,7 @@ def parse(uri, str, log = Mechanize.log)
         cookie.path    ||= (uri + './').path
         cookie.secure  ||= false
         cookie.domain  ||= uri.host
+        cookie.httponly ||= false
 
         # RFC 6265 4.1.2.2
         cookie.expires   = Time.now + cookie.max_age if cookie.max_age
@@ -193,6 +198,7 @@ def expired?
   end
 
   alias secure? secure
+  alias httponly? httponly
 
   def acceptable_from_uri?(uri)
     host = DomainName.new(uri.host)

diff --git a/lib/mechanize/cookie_jar.rb b/lib/mechanize/cookie_jar.rb
@@ -187,9 +187,12 @@ def load_cookiestxt(io)
 
   # Write cookies to Mozilla cookies.txt-style IO stream
   def dump_cookiestxt(io)
+    io.puts "# HTTP Cookie File"
+    io.puts "# This file was generated by Ruby Mechanize " \
+      "#{Mechanize::VERSION} https://github.com/sparklemotion/mechanize.\n\n"
     to_a.each do |cookie|
       io.puts([
-        cookie.domain,
+        (cookie.for_domain? ? "." : "") + cookie.domain,
         cookie.for_domain? ? "TRUE" : "FALSE",
         cookie.path,
         cookie.secure ? "TRUE" : "FALSE",

diff --git a/lib/mechanize/element_matcher.rb b/lib/mechanize/element_matcher.rb
@@ -6,14 +6,14 @@ def #{plural}_with criteria = {}
         criteria = if String === criteria then
                      {:name => criteria}
                    else
-                     criteria.map do |k, v|
-                       k = :dom_id if k.to_sym == :id
-                       k = :dom_class if k.to_sym == :class
-                       [k, v]
-                     end
+                     Hash[criteria.map do |k, v|
+                            k = :dom_id if k.to_sym == :id
+                            k = :dom_class if k.to_sym == :class
+                            [k, v]
+                          end]
                    end
 
-        f = #{plural}.find_all do |thing|
+        f = select_#{plural}(criteria.delete(:search)).find_all do |thing|
           criteria.all? do |k,v|
             v === thing.send(k)
           end
@@ -28,6 +28,17 @@ def #{singular}_with criteria = {}
         f
       end
 
+      def select_#{plural} selector
+        if selector.nil? then
+          #{plural}
+        else
+          nodes = search(selector)
+          #{plural}.find_all do |element|
+            nodes.include?(element.node)
+          end
+        end
+      end
+
       alias :#{singular} :#{singular}_with
     CODE
   end

diff --git a/test/test_mechanize_cookie.rb b/test/test_mechanize_cookie.rb
@@ -494,5 +494,39 @@ def o.to_str
     }
     assert 'example.com', cookie.domain
   end
+
+  def test_cookie_httponly
+    url = URI.parse('http://rubyforge.org/')
+    cookie_params = {}
+    cookie_params['httponly']  = 'HttpOnly'
+    cookie_value = '12345%7D=ASDFWEE345%3DASda'
+
+    expires = Time.parse('Sun, 27-Sep-2037 00:00:00 GMT')
+
+    cookie_params.keys.combine.each do |c|
+      cookie_text = "#{cookie_value}; "
+      c.each_with_index do |key, idx|
+        if idx == (c.length - 1)
+          cookie_text << "#{cookie_params[key]}"
+        else
+          cookie_text << "#{cookie_params[key]}; "
+        end
+      end
+      cookie = nil
+      Mechanize::Cookie.parse(url, cookie_text) { |p_cookie| cookie = p_cookie; }
+
+      assert_equal(true, cookie.httponly)
+
+
+      # if expires was set, make sure we parsed it
+      if c.find { |k| k == 'expires' }
+        assert_equal(expires, cookie.expires)
+      else
+        assert_nil(cookie.expires)
+      end
+    end
+  end
+
+
 end
 
diff --git a/test/test_mechanize_cookie_jar.rb b/test/test_mechanize_cookie_jar.rb
@@ -508,4 +508,42 @@ def test_secure_cookie
     assert_equal('Foo1',      @jar.cookies(nurl).map { |c| c.name }.sort.join(' ') )
     assert_equal('Foo1 Foo2', @jar.cookies(surl).map { |c| c.name }.sort.join(' ') )
   end
+
+  def test_save_cookies_cookiestxt_subdomain
+    top_url = URI 'http://rubyforge.org/'
+    subdomain_url = URI 'http://admin.rubyforge.org/'
+
+    # cookie1 is for *.rubyforge.org; cookie2 is only for rubyforge.org, no subdomains
+    cookie1 = Mechanize::Cookie.new(cookie_values)
+    cookie2 = Mechanize::Cookie.new(cookie_values(:name => 'Boo', :for_domain => false))
+
+    @jar.add(top_url, cookie1)
+    @jar.add(top_url, cookie2)
+
+    assert_equal(2, @jar.cookies(top_url).length)
+    assert_equal(1, @jar.cookies(subdomain_url).length)
+
+    in_tmpdir do
+      @jar.save_as("cookies.txt", :cookiestxt)
+
+      jar = Mechanize::CookieJar.new
+      jar.load("cookies.txt", :cookiestxt) # HACK test the format
+      assert_equal(2, jar.cookies(top_url).length)
+      assert_equal(1, jar.cookies(subdomain_url).length)
+
+      # Check that we actually wrote the file correctly (not just that we were
+      # able to read what we wrote):
+      #
+      # * Cookies that only match exactly the domain specified must not have a
+      #   leading dot, and must have FALSE as the second field.
+      # * Cookies that match subdomains must have a leading dot, and must have
+      #   TRUE as the second field.
+      cookies_txt = File.readlines("cookies.txt")
+      assert_equal(1, cookies_txt.grep( /^rubyforge\.org\tFALSE/ ).length)
+      assert_equal(1, cookies_txt.grep( /^\.rubyforge\.org\tTRUE/ ).length)
+    end
+
+    assert_equal(2, @jar.cookies(top_url).length)
+    assert_equal(1, @jar.cookies(subdomain_url).length)
+  end
 end
diff --git a/test/test_mechanize_page.rb b/test/test_mechanize_page.rb
@@ -143,5 +143,63 @@ def test_parser_no_attributes
     assert_kind_of Nokogiri::HTML::Document, page.root
   end
 
+  def test_search_links
+    page = html_page <<-BODY
+<html>
+  <meta>
+  <head><title></title>
+  <body>
+    <span id="spany">
+      <a href="b.html">b</a>
+      <a href="a.html">a</a>
+    </span>
+    <a href="6.html">6</a>
+  </body>
+</html>
+    BODY
+
+    links = page.links_with(:search => "#spany a")
+
+    assert_equal 2, links.size
+    assert_equal "b.html", links[0].href
+    assert_equal "b",      links[0].text
+
+    assert_equal "a.html", links[1].href
+    assert_equal "a",      links[1].text
+  end
+
+  def test_search_images
+    page = html_page <<-BODY
+<html>
+  <meta>
+  <head><title></title>
+  <body>
+    <img src="a.jpg" class="pretty">
+    <img src="b.jpg">
+    <img src="c.png" class="pretty">
+  </body>
+</html>
+    BODY
+
+    images = page.images_with(:search => "//img[@class='pretty']")
+
+    assert_equal 2, images.size
+    assert_equal "pretty", images[0].dom_class
+    assert_equal "a.jpg", images[0].src
+
+    assert_equal "pretty", images[1].dom_class
+    assert_equal "c.png", images[1].src
+  end
+
+  def test_search_bad_selectors
+    page = html_page <<-BODY
+<a href="foo.html">foo</a>
+<img src="foo.jpg" />
+    BODY
+
+    assert_empty page.images_with(:search => '//a')
+    assert_empty page.links_with(:search => '//img')
+  end
+
 end