Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
kanzure committed Jan 2, 2013
0 parents commit 64416fe
Show file tree
Hide file tree
Showing 8 changed files with 374 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .gitignore
@@ -0,0 +1,12 @@
# ignore editor leftovers
.*.sw*
.*~
*~

# ignore precompiled python files
*.pyc

# ignore setup stuff
build/
dist/
careful_requests.egg-info/
16 changes: 16 additions & 0 deletions Makefile
@@ -0,0 +1,16 @@
SHELL := /bin/bash

test-python3:
python3 test_careful_requests.py

test-python2:
python test_careful_requests.py

test: test-python3 test-python2

clean:
rm -fr build/
rm -fr dist/
rm -fr *.egg-info
find . -type f -name "*.pyc" -exec rm '{}' \;

16 changes: 16 additions & 0 deletions README.rst
@@ -0,0 +1,16 @@
careful-requests
~~~~~~~~~~~~~~~

This module provides an HTTP adapter for servers that are over-sensitive to
HTTP headers. It may be sad, but not all HTTP servers are HTTP-compliant and
some are suspicious of otherwise normal headers! Use careful-requests if you
still want to use the excellent Requests module.

Example usage::

from careful_requests import Careful

s = Careful()

>>> s.get("http://httpbin.org/get", omit_headers=["accept-encoding"])
<Response [200]>
4 changes: 4 additions & 0 deletions careful_requests/__init__.py
@@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
from .careful_requests import Careful

from . import utils
249 changes: 249 additions & 0 deletions careful_requests/careful_requests.py
@@ -0,0 +1,249 @@
# -*- coding: utf-8 -*-
"""
The requests library is wonderful. However, it injects custom headers into
outgoing requests due to urllib3 and httplib/http.client. This is an attempt to
isolate and fix this issue.
see also:
https://github.com/shazow/urllib3/pull/138
https://github.com/kennethreitz/requests/pull/1077
http://bugs.python.org/issue16830
"""

import requests

try: # Python 3
from http.client import HTTPConnection, HTTPException
from http.client import HTTP_PORT, HTTPS_PORT
except ImportError:
from httplib import HTTPConnection, HTTPException
from httplib import HTTP_PORT, HTTPS_PORT

from requests.packages.urllib3.connectionpool import (
ConnectionPool,
VerifiedHTTPSConnection,
HTTPConnectionPool,
HTTPSConnectionPool,
)

from requests.packages.urllib3.request import RequestMethods
from requests.packages.urllib3.poolmanager import PoolManager
from requests.packages.urllib3.poolmanager import log as pool_log
from requests.packages.urllib3.connectionpool import port_by_scheme

class CarefulHTTPConnection(HTTPConnection):
def __init__(self, *args, **kwargs):
HTTPConnection.__init__(self, *args, **kwargs)

self.omit_headers = None

def request(self, method, url, body=None, skip_host=False, skip_accept_encoding=False, headers={}):
"""Send a complete request to the server."""
self._send_request(method, url, body, headers, skip_host=skip_host, skip_accept_encoding=skip_accept_encoding)

def _send_request(self, method, url, body, headers, skip_host=False, skip_accept_encoding=False):
header_names = dict.fromkeys([k.lower() for k in headers])

skips = {}

# enable skip_host and skip_accept_encoding from py2.4
if skip_host or ('host' in header_names):
skips['skip_host'] = 1
if skip_accept_encoding or ('accept-encoding' in header_names):
skips['skip_accept_encoding'] = 1

# omit some default headers when asked
if self.omit_headers:
omits = [key.lower() for key in self.omit_headers]
if "host" in omits:
skips["skip_host"] = True
if "accept-encoding" in omits:
skips["skip_accept_encoding"] = True

self.putrequest(method, url, **skips)

if body is not None and ('content-length' not in header_names):
self._set_content_length(body)
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = body.encode('iso-8859-1')
self.endheaders(body)

try: # Compiled with SSL?
try: # Python 3
from http.client import HTTPSConnection
except ImportError:
from httplib import HTTPSConnection

class CarefulHTTPSConnection(HTTPSConnection, CarefulHTTPConnection): pass
except (ImportError, AttributeError):
pass

class CarefulVerifiedHTTPSConnection(VerifiedHTTPSConnection, CarefulHTTPSConnection):
pass

class CarefulHTTPConnectionPool(HTTPConnectionPool):
def __init__(self, *args, **kwargs):
self.omit_headers = None
HTTPConnectionPool.__init__(self, *args, **kwargs)

def _new_conn(self):
"""
Returns a fresh :class:`httplib.HTTPConnection`.
"""
self.num_connections += 1
pool_log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
return CarefulHTTPConnection(host=self.host,
port=self.port,
strict=self.strict)

def urlopen(self, *args, **kwargs):
#if "omit_headers" in kwargs.keys():
# omit_headers = kwargs["omit_headers"]
# del kwargs["omit_headers"]
# self.omit_headers = omit_headers
#else:
# self.omit_headers = None
# urlopen calls _make_request which sets conn.omit_headers
return HTTPConnectionPool.urlopen(self, *args, **kwargs)

def _make_request(self, conn, method, url, **kwargs):
conn.omit_headers = self.omit_headers
return HTTPConnectionPool._make_request(self, conn, method, url, **kwargs)

# inherit from CarefulHTTPConnectionPool to get the better urlopen
class CarefulHTTPSConnectionPool(HTTPSConnectionPool, CarefulHTTPConnectionPool):
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPSConnection`.
"""
self.num_connections += 1
pool_log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))

if not ssl: # Platform-specific: Python compiled without +ssl
if not CarefulHTTPSConnection or CarefulHTTPSConnection is object:
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")

return CarefulHTTPSConnection(host=self.host,
port=self.port,
strict=self.strict)

connection = CarefulVerifiedHTTPSConnection(host=self.host,
port=self.port,
strict=self.strict)
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)

if self.ssl_version is None:
connection.ssl_version = ssl.PROTOCOL_SSLv23
else:
connection.ssl_version = self.ssl_version

return connection

class CarefulPoolManager(PoolManager):
pool_classes_by_scheme = {
"http": CarefulHTTPConnectionPool,
"https": CarefulHTTPSConnectionPool,
}

def connection_from_host(self, host, port=None, scheme='http'):
"""
Get a :class:`ConnectionPool` based on the host, port, and scheme.
If ``port`` isn't given, it will be derived from the ``scheme`` using
``urllib3.connectionpool.port_by_scheme``.
"""
port = port or port_by_scheme.get(scheme, 80)

pool_key = (scheme, host, port)

# If the scheme, host, or port doesn't match existing open connections,
# open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool

# Make a fresh ConnectionPool of the desired type.
# Use self.pool_classes_by_scheme instead of urllib3 defaults.
pool_cls = self.pool_classes_by_scheme[scheme]
pool = pool_cls(host, port, **self.connection_pool_kw)

self.pools[pool_key] = pool

return pool

class CarefulHTTPAdapter(requests.adapters.HTTPAdapter):
def init_poolmanager(self, connections, maxsize):
self.poolmanager = CarefulPoolManager(num_pools=connections, maxsize=maxsize)

def send(self, request, **kwargs):
omit_headers = None
if "omit_headers" in kwargs.keys():
omit_headers = kwargs["omit_headers"]
del kwargs["omit_headers"]
# get an existing or new connection
conn_pool = self.get_connection(request.url, proxies=kwargs["proxies"])
# store omit_headers
conn_pool.omit_headers = omit_headers
# HTTPAdapter.send will call get_connection and get back this conn_pool
return requests.adapters.HTTPAdapter.send(self, request, **kwargs)

# An alternative implementation might choose to rewrite send to pass
# omits_headers to urlopen.

class CarefulSession(requests.Session):
def __init__(self, *args, **kwargs):
always_omit = None
if "always_omit" in kwargs.keys():
always_omit = kwargs["always_omit"]
del kwargs["always_omit"]

omit_headers = set()
if "omit_headers" in kwargs.keys():
omit_headers = kwargs["omit_headers"]
del kwargs["omit_headers"]

# Persistent by default if the session was created with
# omit_headers.
if omit_headers != None and len(omit_headers) > 0:
if always_omit == None:
always_omit = True
elif always_omit == False:
raise Exception("Can't use session-level omit_headers when explicitly not using always_omit.")

requests.Session.__init__(self, *args, **kwargs)

self.always_omit = always_omit
self.default_omit_headers = set(omit_headers)
self.headers = {}

self.adapters = {}
self.mount("http://", CarefulHTTPAdapter())
self.mount("https://", CarefulHTTPAdapter())

def request(self, *args, **kwargs):
if "omit_headers" in kwargs.keys():
self.omit_headers = set(kwargs["omit_headers"])
del kwargs["omit_headers"]
elif not self.always_omit:
self.omit_headers = set()
elif self.always_omit:
self.omit_headers = self.default_omit_headers
r = requests.Session.request(self, *args, **kwargs)
return r

def send(self, request, **kwargs):
if self.omit_headers:
kwargs["omit_headers"] = self.omit_headers
adapter = self.get_adapter(url=request.url)
r = adapter.send(request, **kwargs)
return r
Careful = CarefulSession

13 changes: 13 additions & 0 deletions careful_requests/utils.py
@@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-

def lower_keys(headers):
"""
Lowercases all keys in the given dictionary to remove inconsistencies
between "Content-Type" and "Content-type". This is particularly useful for
comparing two sets of headers that may or may not retain the same case
formatting.
"""
new_headers = {}
for key in headers.keys():
new_headers[key.lower()] = headers[key]
return new_headers
36 changes: 36 additions & 0 deletions setup.py
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
from setuptools import setup

setup(
name="careful-requests",
version="0.1.0",
url="https://github.com/kanzure/careful-requests",
license="BSD",
author="Bryan Bishop",
author_email="kanzure@gmail.com",
description="Requests for header-sensitive servers (like Accept-Encoding)",
long_description=open("README.rst", "r").read(),
py_modules=["careful_requests"],
packages=["careful_requests"],
zip_safe=False,
include_package_data=True,
install_requires=["requests"],
platforms="any",
classifiers=[
"Environment :: Web Environment",
"Intended Audience :: Developers",
'Natural Language :: English',
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Topic :: Internet :: WWW/HTTP :: Dynamic Content",
"Topic :: Software Development :: Libraries :: Python Modules",
#"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.1",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
]
)
28 changes: 28 additions & 0 deletions test_careful_requests.py
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-

import json
import unittest

from careful_requests import Careful
from careful_requests.utils import lower_keys

class TestCarefulHTTPAdapter(unittest.TestCase):
def setUp(self):
self.session = Careful()

def test_default_header_presence(self):
response = self.session.get("http://httpbin.org/get", headers={})
request_headers = lower_keys(response.request.headers)
response_headers = lower_keys(json.loads(response.text)["headers"])
self.assertIn("accept-encoding", response_headers)
self.assertIn("host", response_headers)

def test_skip_accept_encoding(self):
response = self.session.get("http://httpbin.org/get", headers={}, omit_headers=["accept-encoding"])
request_headers = lower_keys(response.request.headers)
response_headers = lower_keys(json.loads(response.text)["headers"])
self.assertIn("host", response_headers)
self.assertNotIn("accept-encoding", response_headers)

if __name__ == "__main__":
unittest.main()

0 comments on commit 64416fe

Please sign in to comment.