Skip to content

Commit

Permalink
use datetime64[ns, UTC] for 'datetime with timezone' sql types
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Oct 3, 2015
1 parent dc3de6a commit bd26dec
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 38 deletions.
29 changes: 19 additions & 10 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pandas.core.api import DataFrame, Series
from pandas.core.common import isnull
from pandas.core.base import PandasObject
from pandas.core.dtypes import DatetimeTZDtype
from pandas.tseries.tools import to_datetime
from pandas.util.decorators import Appender

Expand Down Expand Up @@ -89,6 +90,10 @@ def _handle_date_column(col, format=None):
# parse dates as timestamp
format = 's' if format is None else format
return to_datetime(col, errors='coerce', unit=format, utc=True)
elif com.is_datetime64tz_dtype(col):
# coerce to UTC timezone
# GH11216
return to_datetime(col,errors='coerce').astype('datetime64[ns, UTC]')
else:
return to_datetime(col, errors='coerce', format=format, utc=True)

Expand Down Expand Up @@ -906,11 +911,10 @@ def _harmonize_columns(self, parse_dates=None):
try:
df_col = self.frame[col_name]
# the type the dataframe column should have
col_type = self._numpy_type(sql_col.type)
col_type = self._get_dtype(sql_col.type)

if col_type is datetime or col_type is date:
if not issubclass(df_col.dtype.type, np.datetime64):
self.frame[col_name] = _handle_date_column(df_col)
if col_type is datetime or col_type is date or col_type is DatetimeTZDtype:
self.frame[col_name] = _handle_date_column(df_col)

elif col_type is float:
# floats support NA, can always convert!
Expand Down Expand Up @@ -990,20 +994,25 @@ def _sqlalchemy_type(self, col):

return Text

def _numpy_type(self, sqltype):
from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date
def _get_dtype(self, sqltype):
from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date, TIMESTAMP

if isinstance(sqltype, Float):
return float
if isinstance(sqltype, Integer):
elif isinstance(sqltype, Integer):
# TODO: Refine integer size.
return np.dtype('int64')
if isinstance(sqltype, DateTime):
elif isinstance(sqltype, TIMESTAMP):
# we have a timezone capable type
if not sqltype.timezone:
return datetime
return DatetimeTZDtype
elif isinstance(sqltype, DateTime):
# Caution: np.datetime64 is also a subclass of np.number.
return datetime
if isinstance(sqltype, Date):
elif isinstance(sqltype, Date):
return date
if isinstance(sqltype, Boolean):
elif isinstance(sqltype, Boolean):
return bool
return object

Expand Down
66 changes: 38 additions & 28 deletions pandas/io/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,29 @@ def test_datetime_with_timezone(self):
# to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok
# but should be more natural, so coerce to datetime64[ns] for now

def check(col):
# check that a column is either datetime64[ns]
# or datetime64[ns, UTC]
if com.is_datetime64_dtype(col.dtype):

# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00'))

# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00'))

elif com.is_datetime64tz_dtype(col.dtype):
self.assertTrue(str(col.dt.tz) == 'UTC')

# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00', tz='UTC'))

# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00', tz='UTC'))

else:
raise AssertionError("DateCol loaded with incorrect type -> {0}".format(col.dtype))

# GH11216
df = pd.read_sql_query("select * from types_test_data", self.conn)
if not hasattr(df,'DateColWithTz'):
Expand All @@ -1263,25 +1286,29 @@ def test_datetime_with_timezone(self):
# this is parsed on Travis (linux), but not on macosx for some reason
# even with the same versions of psycopg2 & sqlalchemy, possibly a Postgrsql server
# version difference
dtype = df.DateColWithTz.dtype
self.assertTrue(com.is_object_dtype(dtype) or com.is_datetime64_dtype(dtype),
"DateCol loaded with incorrect type -> {0}".format(dtype))
col = df.DateColWithTz
self.assertTrue(com.is_object_dtype(col.dtype) or com.is_datetime64_dtype(col.dtype) \
or com.is_datetime64tz_dtype(col.dtype),
"DateCol loaded with incorrect type -> {0}".format(col.dtype))

df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz'])
if not hasattr(df,'DateColWithTz'):
raise nose.SkipTest("no column with datetime with time zone")

dtype = df.DateColWithTz.dtype
self.assertTrue(com.is_datetime64_dtype(dtype),
"DateCol loaded with incorrect type -> {0}".format(dtype))
check(df.DateColWithTz)

df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
self.conn,chunksize=1)),ignore_index=True)
dtype = df.DateColWithTz.dtype
self.assertTrue(com.is_datetime64_dtype(dtype),
"DateCol loaded with incorrect type -> {0}".format(dtype))
col = df.DateColWithTz
self.assertTrue(com.is_datetime64tz_dtype(col.dtype),
"DateCol loaded with incorrect type -> {0}".format(col.dtype))
self.assertTrue(str(col.dt.tz) == 'UTC')
expected = sql.read_sql_table("types_test_data", self.conn)
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz)
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz.astype('datetime64[ns, UTC]'))

# xref #7139
# this might or might not be converted depending on the postgres driver
df = sql.read_sql_table("types_test_data", self.conn)
check(df.DateColWithTz)

def test_date_parsing(self):
# No Parsing
Expand Down Expand Up @@ -1781,23 +1808,6 @@ def test_schema_support(self):
res2 = pdsql.read_table('test_schema_other2')
tm.assert_frame_equal(res1, res2)

def test_datetime_with_time_zone(self):

# Test to see if we read the date column with timezones that
# the timezone information is converted to utc and into a
# np.datetime64 (GH #7139)

df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64),
"DateColWithTz loaded with incorrect type -> {0}".format(df.DateColWithTz.dtype))

# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00'))

# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00'))


class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy):
pass

Expand Down

0 comments on commit bd26dec

Please sign in to comment.