Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

0.7.4 changelog

  • Loading branch information...
commit be75c0ad7ec2bb17edbf0bf0b1bc3769c92797d0 2 parents 883a7fe + ffcdc4b
@jamesturk jamesturk authored
View
1  .travis.yml
@@ -3,6 +3,7 @@ python:
- "2.6"
- "2.7"
- "3.2"
+ - "3.3"
install: pip install mock nose requests --use-mirrors --upgrade
script: nosetests
notifications:
View
4 README.rst
@@ -5,7 +5,7 @@ scrapelib
scrapelib is a library for making requests to websites, particularly those
that may be less-than-reliable.
-scrapelib originated as part of the `Open States <http://openstates.org/`_
+scrapelib originated as part of the `Open States <http://openstates.org/>`_
project to scrape the websites of all 50 state legislatures and as a result
was therefore designed with features desirable when dealing with sites that
have intermittent errors or require rate-limiting.
@@ -19,7 +19,7 @@ requests as-is:
* All of the power of the suberb `requests <http://python-requests.org>`_ library.
* HTTP, HTTPS, and FTP requests via an identical API
* support for simple caching with pluggable cache backends
-* request throtting
+* request throttling
* configurable retries for non-permanent site failures
* optional robots.txt compliance
View
7 docs/changelog.rst
@@ -1,6 +1,13 @@
scrapelib changelog
===================
+0.7.4
+-----
+**20 December 2012**
+ * bugfix for status_code coming from a cache
+ * bugfix for setting user-agent from headers
+ * fix requests version at <1.0
+
0.7.3
-----
**21 June 2012**
View
3  scrapelib/__init__.py
@@ -336,7 +336,8 @@ def __init__(self,
self.follow_redirects = follow_redirects
self.requests_per_minute = requests_per_minute
# properties (pass through to config/headers)
- self.user_agent = user_agent
+ if user_agent != _user_agent or 'user-agent' not in self.headers:
+ self.user_agent = user_agent
self.follow_robots = follow_robots
self.retry_attempts = retry_attempts
self.retry_wait_seconds = retry_wait_seconds
View
3  scrapelib/cache.py
@@ -136,7 +136,8 @@ def get(self, orig_key):
resp._content = f.read()
# status & encoding will be in headers, but are faked
- resp.status_code = int(resp.headers.pop('status'))
+ # need to split spaces out of status to get code (e.g. '200 OK')
+ resp.status_code = int(resp.headers.pop('status').split(' ')[0])
resp.encoding = resp.headers.pop('encoding')
resp.url = resp.headers['content-location'] or orig_key
#TODO: resp.request = request
View
14 scrapelib/tests/test_scraper.py
@@ -71,7 +71,7 @@ def test_get():
def test_post():
s = Scraper(requests_per_minute=0, follow_robots=False)
- resp = s.urlopen(HTTPBIN + 'post', 'POST', b'woo=woo')
+ resp = s.urlopen(HTTPBIN + 'post', 'POST', {'woo': 'woo'})
assert_equal(resp.response.code, 200)
resp_json = json.loads(resp)
assert_equal(resp_json['form']['woo'], 'woo')
@@ -120,6 +120,14 @@ def test_user_agent():
assert_equal(ua, 'a different agent')
+def test_user_agent_from_headers():
+ s = Scraper(requests_per_minute=0, follow_robots=False,
+ headers={'user-agent':'from headers'})
+ resp = s.urlopen(HTTPBIN + 'user-agent')
+ ua = json.loads(resp)['user-agent']
+ assert_equal(ua, 'from headers')
+
+
def test_follow_robots():
s = Scraper(requests_per_minute=0, follow_robots=True)
@@ -321,8 +329,8 @@ def test_disable_compression():
# default is restored
s.disable_compression = False
data = s.urlopen(HTTPBIN + 'headers')
- assert_equal(json.loads(data)['headers']['Accept-Encoding'],
- 'identity, deflate, compress, gzip')
+ assert 'compress' in json.loads(data)['headers']['Accept-Encoding']
+ assert 'gzip' in json.loads(data)['headers']['Accept-Encoding']
# A supplied Accept-Encoding headers overrides the
# disable_compression option
View
4 setup.py
@@ -20,7 +20,9 @@
"License :: OSI Approved :: BSD License",
"Natural Language :: English",
"Operating System :: OS Independent",
- "Programming Language :: Python",
+ "Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3.2",
("Topic :: Software Development :: Libraries :: "
"Python Modules"),
],
Please sign in to comment.
Something went wrong with that request. Please try again.