From bd26dec7e1697a1a05c65eb352fa2732242fc850 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 2 Oct 2015 17:46:59 -0400 Subject: [PATCH] use datetime64[ns, UTC] for 'datetime with timezone' sql types --- pandas/io/sql.py | 29 ++++++++++------ pandas/io/tests/test_sql.py | 66 +++++++++++++++++++++---------------- 2 files changed, 57 insertions(+), 38 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 496d1350a3943..721a2c1f350ee 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -18,6 +18,7 @@ from pandas.core.api import DataFrame, Series from pandas.core.common import isnull from pandas.core.base import PandasObject +from pandas.core.dtypes import DatetimeTZDtype from pandas.tseries.tools import to_datetime from pandas.util.decorators import Appender @@ -89,6 +90,10 @@ def _handle_date_column(col, format=None): # parse dates as timestamp format = 's' if format is None else format return to_datetime(col, errors='coerce', unit=format, utc=True) + elif com.is_datetime64tz_dtype(col): + # coerce to UTC timezone + # GH11216 + return to_datetime(col,errors='coerce').astype('datetime64[ns, UTC]') else: return to_datetime(col, errors='coerce', format=format, utc=True) @@ -906,11 +911,10 @@ def _harmonize_columns(self, parse_dates=None): try: df_col = self.frame[col_name] # the type the dataframe column should have - col_type = self._numpy_type(sql_col.type) + col_type = self._get_dtype(sql_col.type) - if col_type is datetime or col_type is date: - if not issubclass(df_col.dtype.type, np.datetime64): - self.frame[col_name] = _handle_date_column(df_col) + if col_type is datetime or col_type is date or col_type is DatetimeTZDtype: + self.frame[col_name] = _handle_date_column(df_col) elif col_type is float: # floats support NA, can always convert! @@ -990,20 +994,25 @@ def _sqlalchemy_type(self, col): return Text - def _numpy_type(self, sqltype): - from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date + def _get_dtype(self, sqltype): + from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date, TIMESTAMP if isinstance(sqltype, Float): return float - if isinstance(sqltype, Integer): + elif isinstance(sqltype, Integer): # TODO: Refine integer size. return np.dtype('int64') - if isinstance(sqltype, DateTime): + elif isinstance(sqltype, TIMESTAMP): + # we have a timezone capable type + if not sqltype.timezone: + return datetime + return DatetimeTZDtype + elif isinstance(sqltype, DateTime): # Caution: np.datetime64 is also a subclass of np.number. return datetime - if isinstance(sqltype, Date): + elif isinstance(sqltype, Date): return date - if isinstance(sqltype, Boolean): + elif isinstance(sqltype, Boolean): return bool return object diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 78f2bd956b07e..aced92ec8abd0 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1255,6 +1255,29 @@ def test_datetime_with_timezone(self): # to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok # but should be more natural, so coerce to datetime64[ns] for now + def check(col): + # check that a column is either datetime64[ns] + # or datetime64[ns, UTC] + if com.is_datetime64_dtype(col.dtype): + + # "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00" + self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00')) + + # "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00" + self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00')) + + elif com.is_datetime64tz_dtype(col.dtype): + self.assertTrue(str(col.dt.tz) == 'UTC') + + # "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00" + self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00', tz='UTC')) + + # "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00" + self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00', tz='UTC')) + + else: + raise AssertionError("DateCol loaded with incorrect type -> {0}".format(col.dtype)) + # GH11216 df = pd.read_sql_query("select * from types_test_data", self.conn) if not hasattr(df,'DateColWithTz'): @@ -1263,25 +1286,29 @@ def test_datetime_with_timezone(self): # this is parsed on Travis (linux), but not on macosx for some reason # even with the same versions of psycopg2 & sqlalchemy, possibly a Postgrsql server # version difference - dtype = df.DateColWithTz.dtype - self.assertTrue(com.is_object_dtype(dtype) or com.is_datetime64_dtype(dtype), - "DateCol loaded with incorrect type -> {0}".format(dtype)) + col = df.DateColWithTz + self.assertTrue(com.is_object_dtype(col.dtype) or com.is_datetime64_dtype(col.dtype) \ + or com.is_datetime64tz_dtype(col.dtype), + "DateCol loaded with incorrect type -> {0}".format(col.dtype)) df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz']) if not hasattr(df,'DateColWithTz'): raise nose.SkipTest("no column with datetime with time zone") - - dtype = df.DateColWithTz.dtype - self.assertTrue(com.is_datetime64_dtype(dtype), - "DateCol loaded with incorrect type -> {0}".format(dtype)) + check(df.DateColWithTz) df = pd.concat(list(pd.read_sql_query("select * from types_test_data", self.conn,chunksize=1)),ignore_index=True) - dtype = df.DateColWithTz.dtype - self.assertTrue(com.is_datetime64_dtype(dtype), - "DateCol loaded with incorrect type -> {0}".format(dtype)) + col = df.DateColWithTz + self.assertTrue(com.is_datetime64tz_dtype(col.dtype), + "DateCol loaded with incorrect type -> {0}".format(col.dtype)) + self.assertTrue(str(col.dt.tz) == 'UTC') expected = sql.read_sql_table("types_test_data", self.conn) - tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) + tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz.astype('datetime64[ns, UTC]')) + + # xref #7139 + # this might or might not be converted depending on the postgres driver + df = sql.read_sql_table("types_test_data", self.conn) + check(df.DateColWithTz) def test_date_parsing(self): # No Parsing @@ -1781,23 +1808,6 @@ def test_schema_support(self): res2 = pdsql.read_table('test_schema_other2') tm.assert_frame_equal(res1, res2) - def test_datetime_with_time_zone(self): - - # Test to see if we read the date column with timezones that - # the timezone information is converted to utc and into a - # np.datetime64 (GH #7139) - - df = sql.read_sql_table("types_test_data", self.conn) - self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64), - "DateColWithTz loaded with incorrect type -> {0}".format(df.DateColWithTz.dtype)) - - # "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00" - self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00')) - - # "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00" - self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00')) - - class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): pass