Skip to content

Commit

Permalink
add InvalidStringData exception and speed up check for c extension st…
Browse files Browse the repository at this point in the history
…ring encoding
  • Loading branch information
Mike Dirolf committed Aug 28, 2009
1 parent 721a763 commit 93e61c3
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 14 deletions.
24 changes: 17 additions & 7 deletions pymongo/_cbsonmodule.c
Expand Up @@ -27,6 +27,7 @@
static PyObject* CBSONError;
static PyObject* InvalidName;
static PyObject* InvalidDocument;
static PyObject* InvalidStringData;
static PyObject* SON;
static PyObject* Binary;
static PyObject* Code;
Expand Down Expand Up @@ -153,7 +154,7 @@ static int write_string(bson_buffer* buffer, PyObject* py_string) {

for (i = 0; i < string_length - 1; i++) {
if (string[i] == 0) {
PyErr_SetString(InvalidDocument, "BSON strings must not contain a NULL character");
PyErr_SetString(InvalidStringData, "BSON strings must not contain a NULL character");
return 0;
}
}
Expand All @@ -167,6 +168,17 @@ static int write_string(bson_buffer* buffer, PyObject* py_string) {
return 1;
}

/* returns 0 on invalid ascii */
static int validate_ascii(const char* data, int length) {
int i;
for (i = 0; i < length; i++) {
if (data[i] & 0x80) {
return 0;
}
}
return 1;
}

/* TODO our platform better be little-endian w/ 4-byte ints! */
/* returns 0 on failure */
static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject* value, unsigned char check_keys) {
Expand Down Expand Up @@ -330,17 +342,14 @@ static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject*
memcpy(buffer->buffer + length_location, &length, 4);
return 1;
} else if (PyString_Check(value)) {
PyObject* encoded;
int result;

*(buffer->buffer + type_byte) = 0x02;
/* we have to do the encoding so we can fail fast if they give us non utf-8 */
encoded = PyString_AsEncodedObject(value, "utf-8", "strict");
if (!encoded) {
if (!validate_ascii(PyString_AsString(value), PyString_Size(value))) {
PyErr_SetString(InvalidStringData, "strings in documents must be ASCII only");
return 0;
}
result = write_string(buffer, encoded);
Py_DECREF(encoded);
result = write_string(buffer, value);
return result;
} else if (PyUnicode_Check(value)) {
PyObject* encoded;
Expand Down Expand Up @@ -1180,6 +1189,7 @@ PyMODINIT_FUNC init_cbson(void) {
CBSONError = PyObject_GetAttrString(module, "InvalidDocument");
InvalidName = PyObject_GetAttrString(module, "InvalidName");
InvalidDocument = PyObject_GetAttrString(module, "InvalidDocument");
InvalidStringData = PyObject_GetAttrString(module, "InvalidStringData");
Py_DECREF(module);

module = PyImport_ImportModule("pymongo.son");
Expand Down
10 changes: 7 additions & 3 deletions pymongo/bson.py
Expand Up @@ -28,7 +28,8 @@
from objectid import ObjectId
from dbref import DBRef
from son import SON
from errors import InvalidBSON, InvalidDocument, UnsupportedTag, InvalidName
from errors import InvalidBSON, InvalidDocument, UnsupportedTag
from errors import InvalidName, InvalidStringData

try:
import _cbson
Expand Down Expand Up @@ -57,8 +58,11 @@ def _get_c_string(data):

def _make_c_string(string):
if "\x00" in string:
raise InvalidDocument("BSON strings must not contain a NULL character")
return string.encode("utf-8") + "\x00"
raise InvalidStringData("BSON strings must not contain a NULL character")
try:
return string.encode("utf-8") + "\x00"
except:
raise InvalidStringData("strings in documents must be ASCII only")


def _validate_number(data):
Expand Down
4 changes: 4 additions & 0 deletions pymongo/errors.py
Expand Up @@ -55,6 +55,10 @@ class InvalidBSON(ValueError):
"""Raised when trying to create a BSON object from invalid data.
"""

class InvalidStringData(ValueError):
"""Raised when trying to encode a string containing non-ASCII data.
"""


class InvalidDocument(ValueError):
"""Raised when trying to create a BSON object from an invalid document.
Expand Down
8 changes: 4 additions & 4 deletions test/test_bson.py
Expand Up @@ -30,7 +30,7 @@
from pymongo.dbref import DBRef
from pymongo.son import SON
from pymongo.bson import BSON, is_valid, _to_dicts
from pymongo.errors import UnsupportedTag, InvalidDocument
from pymongo.errors import UnsupportedTag, InvalidDocument, InvalidStringData


class TestBSON(unittest.TestCase):
Expand Down Expand Up @@ -132,8 +132,8 @@ def test_basic_from_dict(self):
"\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x00\x00")

def test_null_character_encoding(self):
self.assertRaises(InvalidDocument, BSON.from_dict, {"with zero": "hello\x00world"})
self.assertRaises(InvalidDocument, BSON.from_dict, {"with zero": u"hello\x00world"})
self.assertRaises(InvalidStringData, BSON.from_dict, {"with zero": "hello\x00world"})
self.assertRaises(InvalidStringData, BSON.from_dict, {"with zero": u"hello\x00world"})

def test_from_then_to_dict(self):

Expand Down Expand Up @@ -199,7 +199,7 @@ def test_data_files(self):
f.close()

def test_bad_encode(self):
self.assertRaises(UnicodeDecodeError, BSON.from_dict,
self.assertRaises(InvalidStringData, BSON.from_dict,
{"lalala": '\xf4\xe0\xf0\xe1\xc0 Color Touch'})

def test_overflow(self):
Expand Down

0 comments on commit 93e61c3

Please sign in to comment.