Skip to content

Commit

Permalink
ENH: to_datetime will convert array of strings and NAs to datetime64 …
Browse files Browse the repository at this point in the history
…with NaT, close pandas-dev#999
  • Loading branch information
wesm committed Apr 13, 2012
1 parent 270a10b commit 33f515a
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 50 deletions.
2 changes: 1 addition & 1 deletion pandas/core/api.py
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from pandas.core.datetools import DateOffset
from pandas.core.datetools import DateOffset, to_datetime
import pandas.core.datetools as datetools

from pandas.core.common import isnull, notnull, save, load
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/datetools.py
Expand Up @@ -80,10 +80,15 @@ def to_datetime(arg, errors='ignore'):
-------
ret : datetime if parsing succeeded
"""
from pandas.core.series import Series
if arg is None:
return arg
elif isinstance(arg, datetime):
return arg
elif isinstance(arg, Series):
values = lib.string_to_datetime(com._ensure_object(arg.values),
raise_=errors == 'raise')
return Series(values, index=arg.index, name=arg.name)
elif isinstance(arg, np.ndarray):
return lib.string_to_datetime(com._ensure_object(arg),
raise_=errors == 'raise')
Expand Down
43 changes: 43 additions & 0 deletions pandas/src/datetime.pyx
Expand Up @@ -613,6 +613,49 @@ cdef class DayOffset(_Offset):
# offset.next()
# return i

def string_to_datetime(ndarray[object] strings, raise_=False):
cdef:
Py_ssize_t i, n = len(strings)
object val
ndarray[int64_t] iresult
ndarray[object] oresult

from dateutil.parser import parse


try:
result = np.empty(n, dtype='M8[us]')
iresult = result.view('i8')
for i in range(n):
val = strings[i]
if util._checknull(val):
result[i] = NaT
elif PyDateTime_Check(val):
result[i] = val
else:
try:
result[i] = parse(val)
except Exception:
raise TypeError
return result
except TypeError:
oresult = np.empty(n, dtype=object)

for i in range(n):
val = strings[i]
if util._checknull(val):
oresult[i] = val
else:
try:
oresult[i] = parse(val)
except Exception:
if raise_:
raise
oresult[i] = val

return oresult


# Conversion routines
# ------------------------------------------------------------------------------

Expand Down
21 changes: 0 additions & 21 deletions pandas/src/tseries.pyx
Expand Up @@ -571,27 +571,6 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):

return maybe_convert_bool(result)

def string_to_datetime(ndarray[object] strings, raise_=False):
cdef:
Py_ssize_t i, n = len(strings)
object val
from dateutil.parser import parse

result = np.empty(n, dtype=object)

for i in range(n):
val = strings[i]
if util._checknull(val):
result[i] = val
else:
try:
result[i] = parse(val)
except Exception:
if raise_:
raise
result[i] = val

return result

def value_count_int64(ndarray[int64_t] values):
cdef:
Expand Down
82 changes: 54 additions & 28 deletions pandas/tests/test_timeseries.py
Expand Up @@ -11,7 +11,7 @@
from pandas import (Index, Series, TimeSeries, DataFrame, isnull,
date_range, Timestamp)

from pandas import DatetimeIndex
from pandas import DatetimeIndex, to_datetime

from pandas.core.daterange import DateRange

Expand Down Expand Up @@ -111,33 +111,6 @@ def assert_range_equal(left, right):

class TestTimeSeries(unittest.TestCase):

def test_string_na_conversion(self):
from dateutil.parser import parse
from pandas.core.datetools import to_datetime

strings = np.array(['1/1/2000', '1/2/2000', np.nan,
'1/4/2000, 12:34:56'], dtype=object)

expected = []
for val in strings:
if com.isnull(val):
expected.append(val)
else:
expected.append(parse(val))

result = lib.string_to_datetime(strings)
assert_almost_equal(result, expected)

result2 = to_datetime(strings)
assert_almost_equal(result, result2)

malformed = np.array(['1/100/2000', np.nan], dtype=object)
result = to_datetime(malformed)
assert_almost_equal(result, malformed)

self.assertRaises(ValueError, to_datetime, malformed,
errors='raise')

def test_dti_slicing(self):
dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
dti2 = dti[[1,3,5]]
Expand Down Expand Up @@ -349,6 +322,59 @@ def test_fillna_nat(self):
assert_frame_equal(filled, expected)
assert_frame_equal(filled2, expected)

def test_string_na_nat_conversion(self):
# GH #999, #858

from dateutil.parser import parse
from pandas.core.datetools import to_datetime

strings = np.array(['1/1/2000', '1/2/2000', np.nan,
'1/4/2000, 12:34:56'], dtype=object)

expected = np.empty(4, dtype='M8')
for i, val in enumerate(strings):
if com.isnull(val):
expected[i] = NaT
else:
expected[i] = parse(val)

result = lib.string_to_datetime(strings)
assert_almost_equal(result, expected)

result2 = to_datetime(strings)
assert_almost_equal(result, result2)

malformed = np.array(['1/100/2000', np.nan], dtype=object)
result = to_datetime(malformed)
assert_almost_equal(result, malformed)

self.assertRaises(ValueError, to_datetime, malformed,
errors='raise')

idx = ['a', 'b', 'c', 'd', 'e']
series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan,
'1/5/2000'], index=idx, name='foo')
dseries = Series([to_datetime('1/1/2000'), np.nan,
to_datetime('1/3/2000'), np.nan,
to_datetime('1/5/2000')], index=idx, name='foo')

result = to_datetime(series)
dresult = to_datetime(dseries)

expected = Series(np.empty(5, dtype='M8[us]'), index=idx)
for i in range(5):
x = series[i]
if isnull(x):
expected[i] = NaT
else:
expected[i] = to_datetime(x)

assert_series_equal(result, expected)
self.assertEquals(result.name, 'foo')

assert_series_equal(dresult, expected)
self.assertEquals(dresult.name, 'foo')

def _skip_if_no_pytz():
try:
import pytz
Expand Down

0 comments on commit 33f515a

Please sign in to comment.