Permalink
Browse files

Fix accuracy issues in datetime decoding PYTHON-392

  • Loading branch information...
1 parent 6242782 commit 168fdc61cd717767851cf8efded9cd6e8a939fba @behackett behackett committed Dec 14, 2012
Showing with 42 additions and 9 deletions.
  1. +7 −3 bson/__init__.py
  2. +31 −2 bson/_cbsonmodule.c
  3. +4 −4 test/test_bson.py
View
@@ -217,11 +217,15 @@ def _get_boolean(data, position, as_class, tz_aware, uuid_subtype):
def _get_date(data, position, as_class, tz_aware, uuid_subtype):
- seconds = float(struct.unpack("<q", data[position:position + 8])[0]) / 1000.0
+ millis = struct.unpack("<q", data[position:position + 8])[0]
+ diff = millis % 1000
+ seconds = (millis - diff) / 1000
position += 8
if tz_aware:
- return EPOCH_AWARE + datetime.timedelta(seconds=seconds), position
- return EPOCH_NAIVE + datetime.timedelta(seconds=seconds), position
+ dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds)
+ else:
+ dt = EPOCH_NAIVE + datetime.timedelta(seconds=seconds)
+ return dt.replace(microsecond=diff * 1000), position
def _get_code(data, position, as_class, tz_aware, uuid_subtype):
View
@@ -117,8 +117,37 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_by
/* Date stuff */
static PyObject* datetime_from_millis(long long millis) {
- int microseconds = (millis % 1000) * 1000;
- Time64_T seconds = millis / 1000;
+ /* To encode a datetime instance like datetime(9999, 12, 31, 23, 59, 59, 999999)
+ * we follow these steps:
+ * 1. Calculate a timestamp in seconds: 253402300799
+ * 2. Multiply that by 1000: 253402300799000
+ * 3. Add in microseconds divided by 1000 253402300799999
+ *
+ * (Note: BSON doesn't support microsecond accuracy, hence the rounding.)
+ *
+ * To decode we could do:
+ * 1. Get seconds: timestamp / 1000: 253402300799
+ * 2. Get micros: (timestamp % 1000) * 1000: 999000
+ * Resulting in datetime(9999, 12, 31, 23, 59, 59, 999000) -- the expected result
+ *
+ * Now what if the we encode (1, 1, 1, 1, 1, 1, 111111)?
+ * 1. and 2. gives: -62135593139000
+ * 3. Gives us: -62135593138889
+ *
+ * Now decode:
+ * 1. Gives us: -62135593138
+ * 2. Gives us: -889000
+ * Resulting in datetime(1, 1, 1, 1, 1, 2, 15888216) -- an invalid result
+ *
+ * If instead to decode we do:
+ * diff = ((millis % 1000) + 1000) % 1000: 111
+ * seconds = (millis - diff) / 1000: -62135593139
+ * micros = diff * 1000 111000
+ * Resulting in datetime(1, 1, 1, 1, 1, 1, 111000) -- the expected result
+ */
+ int diff = (int)(((millis % 1000) + 1000) % 1000);
+ int microseconds = diff * 1000;
+ Time64_T seconds = (millis - diff) / 1000;
struct TM timeinfo;
gmtime64_r(&seconds, &timeinfo);
View
@@ -224,20 +224,20 @@ def test_bytes_as_keys(self):
def test_datetime_encode_decode(self):
# Negative timestamps
- dt1 = datetime.datetime(1, 1, 1, 1, 1, 1)
+ dt1 = datetime.datetime(1, 1, 1, 1, 1, 1, 111000)
dt2 = BSON.encode({"date": dt1}).decode()["date"]
self.assertEqual(dt1, dt2)
- dt1 = datetime.datetime(1959, 6, 25, 12, 16, 59)
+ dt1 = datetime.datetime(1959, 6, 25, 12, 16, 59, 999000)
dt2 = BSON.encode({"date": dt1}).decode()["date"]
self.assertEqual(dt1, dt2)
# Positive timestamps
- dt1 = datetime.datetime(9999, 12, 31, 23, 59, 59)
+ dt1 = datetime.datetime(9999, 12, 31, 23, 59, 59, 999000)
dt2 = BSON.encode({"date": dt1}).decode()["date"]
self.assertEqual(dt1, dt2)
- dt1 = datetime.datetime(2011, 6, 14, 10, 47, 53)
+ dt1 = datetime.datetime(2011, 6, 14, 10, 47, 53, 444000)
dt2 = BSON.encode({"date": dt1}).decode()["date"]
self.assertEqual(dt1, dt2)

0 comments on commit 168fdc6

Please sign in to comment.