diff --git a/pandas/core/api.py b/pandas/core/api.py index 2bed120cf934c..703dd73e5d542 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -2,7 +2,7 @@ import numpy as np -from pandas.core.datetools import DateOffset +from pandas.core.datetools import DateOffset, to_datetime import pandas.core.datetools as datetools from pandas.core.common import isnull, notnull, save, load diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index bc7a999b2e26a..1a7fd87c2f55b 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -80,10 +80,15 @@ def to_datetime(arg, errors='ignore'): ------- ret : datetime if parsing succeeded """ + from pandas.core.series import Series if arg is None: return arg elif isinstance(arg, datetime): return arg + elif isinstance(arg, Series): + values = lib.string_to_datetime(com._ensure_object(arg.values), + raise_=errors == 'raise') + return Series(values, index=arg.index, name=arg.name) elif isinstance(arg, np.ndarray): return lib.string_to_datetime(com._ensure_object(arg), raise_=errors == 'raise') diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx index d56e1a1fd09e5..2560bca00650c 100644 --- a/pandas/src/datetime.pyx +++ b/pandas/src/datetime.pyx @@ -613,6 +613,49 @@ cdef class DayOffset(_Offset): # offset.next() # return i +def string_to_datetime(ndarray[object] strings, raise_=False): + cdef: + Py_ssize_t i, n = len(strings) + object val + ndarray[int64_t] iresult + ndarray[object] oresult + + from dateutil.parser import parse + + + try: + result = np.empty(n, dtype='M8[us]') + iresult = result.view('i8') + for i in range(n): + val = strings[i] + if util._checknull(val): + result[i] = NaT + elif PyDateTime_Check(val): + result[i] = val + else: + try: + result[i] = parse(val) + except Exception: + raise TypeError + return result + except TypeError: + oresult = np.empty(n, dtype=object) + + for i in range(n): + val = strings[i] + if util._checknull(val): + oresult[i] = val + else: + try: + oresult[i] = parse(val) + except Exception: + if raise_: + raise + oresult[i] = val + + return oresult + + # Conversion routines # ------------------------------------------------------------------------------ diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx index 3a4df13d8eaa3..52f1c8bf35941 100644 --- a/pandas/src/tseries.pyx +++ b/pandas/src/tseries.pyx @@ -571,27 +571,6 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op): return maybe_convert_bool(result) -def string_to_datetime(ndarray[object] strings, raise_=False): - cdef: - Py_ssize_t i, n = len(strings) - object val - from dateutil.parser import parse - - result = np.empty(n, dtype=object) - - for i in range(n): - val = strings[i] - if util._checknull(val): - result[i] = val - else: - try: - result[i] = parse(val) - except Exception: - if raise_: - raise - result[i] = val - - return result def value_count_int64(ndarray[int64_t] values): cdef: diff --git a/pandas/tests/test_timeseries.py b/pandas/tests/test_timeseries.py index 7a151adf9bba1..d69cdb9015a53 100644 --- a/pandas/tests/test_timeseries.py +++ b/pandas/tests/test_timeseries.py @@ -11,7 +11,7 @@ from pandas import (Index, Series, TimeSeries, DataFrame, isnull, date_range, Timestamp) -from pandas import DatetimeIndex +from pandas import DatetimeIndex, to_datetime from pandas.core.daterange import DateRange @@ -111,33 +111,6 @@ def assert_range_equal(left, right): class TestTimeSeries(unittest.TestCase): - def test_string_na_conversion(self): - from dateutil.parser import parse - from pandas.core.datetools import to_datetime - - strings = np.array(['1/1/2000', '1/2/2000', np.nan, - '1/4/2000, 12:34:56'], dtype=object) - - expected = [] - for val in strings: - if com.isnull(val): - expected.append(val) - else: - expected.append(parse(val)) - - result = lib.string_to_datetime(strings) - assert_almost_equal(result, expected) - - result2 = to_datetime(strings) - assert_almost_equal(result, result2) - - malformed = np.array(['1/100/2000', np.nan], dtype=object) - result = to_datetime(malformed) - assert_almost_equal(result, malformed) - - self.assertRaises(ValueError, to_datetime, malformed, - errors='raise') - def test_dti_slicing(self): dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') dti2 = dti[[1,3,5]] @@ -349,6 +322,59 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) + def test_string_na_nat_conversion(self): + # GH #999, #858 + + from dateutil.parser import parse + from pandas.core.datetools import to_datetime + + strings = np.array(['1/1/2000', '1/2/2000', np.nan, + '1/4/2000, 12:34:56'], dtype=object) + + expected = np.empty(4, dtype='M8') + for i, val in enumerate(strings): + if com.isnull(val): + expected[i] = NaT + else: + expected[i] = parse(val) + + result = lib.string_to_datetime(strings) + assert_almost_equal(result, expected) + + result2 = to_datetime(strings) + assert_almost_equal(result, result2) + + malformed = np.array(['1/100/2000', np.nan], dtype=object) + result = to_datetime(malformed) + assert_almost_equal(result, malformed) + + self.assertRaises(ValueError, to_datetime, malformed, + errors='raise') + + idx = ['a', 'b', 'c', 'd', 'e'] + series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, + '1/5/2000'], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000'), np.nan, + to_datetime('1/3/2000'), np.nan, + to_datetime('1/5/2000')], index=idx, name='foo') + + result = to_datetime(series) + dresult = to_datetime(dseries) + + expected = Series(np.empty(5, dtype='M8[us]'), index=idx) + for i in range(5): + x = series[i] + if isnull(x): + expected[i] = NaT + else: + expected[i] = to_datetime(x) + + assert_series_equal(result, expected) + self.assertEquals(result.name, 'foo') + + assert_series_equal(dresult, expected) + self.assertEquals(dresult.name, 'foo') + def _skip_if_no_pytz(): try: import pytz