Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 651576e972
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 73 lines (53 sloc) 2.908 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
$:.unshift(File.dirname(__FILE__) + '/../lib')

require 'rubygems'
require 'test/unit'
require 'robotstxt'
require 'fakeweb'

FakeWeb.allow_net_connect = false

class TestRobotstxt < Test::Unit::TestCase

  def test_absense
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :status => ["404", "Not found"])
    assert true == Robotstxt.get_allowed?("http://example.com/index.html", "Google")
  end

  def test_error
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :status => ["500", "Internal Server Error"])
    assert true == Robotstxt.get_allowed?("http://example.com/index.html", "Google")
  end

  def test_unauthorized
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :status => ["401", "Unauthorized"])
    assert false == Robotstxt.get_allowed?("http://example.com/index.html", "Google")
  end

  def test_forbidden
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :status => ["403", "Forbidden"])
    assert false == Robotstxt.get_allowed?("http://example.com/index.html", "Google")
  end

  def test_uri_object
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :body => "User-agent:*\nDisallow: /test")

    robotstxt = Robotstxt.get(URI.parse("http://example.com/index.html"), "Google")

    assert true == robotstxt.allowed?("/index.html")
    assert false == robotstxt.allowed?("/test/index.html")
  end

  def test_existing_http_connection
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :body => "User-agent:*\nDisallow: /test")

    http = Net::HTTP.start("example.com", 80) do |http|
      robotstxt = Robotstxt.get(http, "Google")
      assert true == robotstxt.allowed?("/index.html")
      assert false == robotstxt.allowed?("/test/index.html")
    end
  end

  def test_redirects
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :response => "HTTP/1.1 303 See Other\nLocation: http://www.exemplar.com/robots.txt\n\n")
    FakeWeb.register_uri(:get, "http://www.exemplar.com/robots.txt", :body => "User-agent:*\nDisallow: /private")

    robotstxt = Robotstxt.get("http://example.com/", "Google")

    assert true == robotstxt.allowed?("/index.html")
    assert false == robotstxt.allowed?("/private/index.html")
  end

  def test_encoding
    # "User-agent: *\n Disallow: /encyclop@dia" where @ is the ae ligature (U+00E6)
    FakeWeb.register_uri(:get, "http://example.com/robots.txt", :response => "HTTP/1.1 200 OK\nContent-type: text/plain; charset=utf-16\n\n" +
        "\xff\xfeU\x00s\x00e\x00r\x00-\x00a\x00g\x00e\x00n\x00t\x00:\x00 \x00*\x00\n\x00D\x00i\x00s\x00a\x00l\x00l\x00o\x00w\x00:\x00 \x00/\x00e\x00n\x00c\x00y\x00c\x00l\x00o\x00p\x00\xe6\x00d\x00i\x00a\x00")
    robotstxt = Robotstxt.get("http://example.com/#index", "Google")

    assert true == robotstxt.allowed?("/index.html")
    assert false == robotstxt.allowed?("/encyclop%c3%a6dia/index.html")

  end

end
Something went wrong with that request. Please try again.