#!/usr/bin/env python
# Copyright 2009 Facebook
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""HTTP utility code shared by clients and servers."""
import logging
import urllib
import re
from tornado.util import b
class HTTPHeaders(dict):
"""A dictionary that maintains Http-Header-Case for all keys.
Supports multiple values per key via a pair of new methods,
add() and get_list(). The regular dictionary interface returns a single
value per key, with multiple values joined by a comma.
>>> h = HTTPHeaders({"content-type": "text/html"})
>>> h.keys()
>>> h["Content-Type"]
>>> h.add("Set-Cookie", "A=B")
>>> h.add("Set-Cookie", "C=D")
>>> h["set-cookie"]
>>> h.get_list("set-cookie")
['A=B', 'C=D']
>>> for (k,v) in sorted(h.get_all()):
... print '%s: %s' % (k,v)
Content-Type: text/html
Set-Cookie: A=B
Set-Cookie: C=D
def __init__(self, *args, **kwargs):
# Don't pass args or kwargs to dict.__init__, as it will bypass
# our __setitem__
self._as_list = {}
self.update(*args, **kwargs)
# new public methods
def add(self, name, value):
"""Adds a new value for the given key."""
norm_name = HTTPHeaders._normalize_name(name)
if norm_name in self:
# bypass our override of __setitem__ since it modifies _as_list
dict.__setitem__(self, norm_name, self[norm_name] + ',' + value)
self[norm_name] = value
def get_list(self, name):
"""Returns all values for the given header as a list."""
norm_name = HTTPHeaders._normalize_name(name)
return self._as_list.get(norm_name, [])
def get_all(self):
"""Returns an iterable of all (name, value) pairs.
If a header has multiple values, multiple pairs will be
returned with the same name.
for name, list in self._as_list.iteritems():
for value in list:
yield (name, value)
def parse_line(self, line):
"""Updates the dictionary with a single header line.
>>> h = HTTPHeaders()
>>> h.parse_line("Content-Type: text/html")
>>> h.get('content-type')
name, value = line.split(":", 1)
self.add(name, value.strip())
def parse(cls, headers):
"""Returns a dictionary from HTTP header text.
>>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
>>> sorted(h.iteritems())
[('Content-Length', '42'), ('Content-Type', 'text/html')]
h = cls()
for line in headers.splitlines():
if line:
return h
# dict implementation overrides
def __setitem__(self, name, value):
norm_name = HTTPHeaders._normalize_name(name)
dict.__setitem__(self, norm_name, value)
self._as_list[norm_name] = [value]
def __getitem__(self, name):
return dict.__getitem__(self, HTTPHeaders._normalize_name(name))
def __delitem__(self, name):
norm_name = HTTPHeaders._normalize_name(name)
dict.__delitem__(self, norm_name)
del self._as_list[norm_name]
def get(self, name, default=None):
return dict.get(self, HTTPHeaders._normalize_name(name), default)
def update(self, *args, **kwargs):
# dict.update bypasses our __setitem__
for k, v in dict(*args, **kwargs).iteritems():
self[k] = v
_NORMALIZED_HEADER_RE = re.compile(r'^[A-Z0-9][a-z0-9]*(-[A-Z0-9][a-z0-9]*)*$')
_normalized_headers = {}
def _normalize_name(name):
"""Converts a name to Http-Header-Case.
>>> HTTPHeaders._normalize_name("coNtent-TYPE")
return HTTPHeaders._normalized_headers[name]
except KeyError:
if HTTPHeaders._NORMALIZED_HEADER_RE.match(name):
normalized = name
normalized = "-".join([w.capitalize() for w in name.split("-")])
HTTPHeaders._normalized_headers[name] = normalized
return normalized
def url_concat(url, args):
"""Concatenate url and argument dictionary regardless of whether
url has existing query parameters.
>>> url_concat("", dict(c="d"))
if not args: return url
if url[-1] not in ('?', '&'):
url += '&' if ('?' in url) else '?'
return url + urllib.urlencode(args)
def parse_multipart_form_data(boundary, data, arguments, files):
"""Parses a multipart/form-data body.
The boundary and data parameters are both byte strings.
The dictionaries given in the arguments and files parameters
will be updated with the contents of the body.
# The standard allows for the boundary to be quoted in the header,
# although it's rare (it happens at least for google app engine
# xmpp). I think we're also supposed to handle backslash-escapes
# here but I'll save that until we see a client that uses them
# in the wild.
if boundary.startswith(b('"')) and boundary.endswith(b('"')):
boundary = boundary[1:-1]
if data.endswith(b("\r\n")):
footer_length = len(boundary) + 6
footer_length = len(boundary) + 4
parts = data[:-footer_length].split(b("--") + boundary + b("\r\n"))
for part in parts:
if not part: continue
eoh = part.find(b("\r\n\r\n"))
if eoh == -1:
logging.warning("multipart/form-data missing headers")
headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
name_header = headers.get("Content-Disposition", "")
if not name_header.startswith("form-data;") or \
not part.endswith(b("\r\n")):
logging.warning("Invalid multipart/form-data")
value = part[eoh + 4:-2]
name_values = {}
for name_part in name_header[10:].split(";"):
name, name_value = name_part.strip().split("=", 1)
name_values[name] = name_value.strip('"')
if not name_values.get("name"):
logging.warning("multipart/form-data value missing name")
name = name_values["name"]
if name_values.get("filename"):
ctype = headers.get("Content-Type", "application/unknown")
files.setdefault(name, []).append(dict(
filename=name_values["filename"], body=value,
arguments.setdefault(name, []).append(value)
def doctests():
import doctest
return doctest.DocTestSuite()
if __name__ == "__main__":
import doctest
