diff --git a/doc/source/io.rst b/doc/source/io.rst index 68faefa872c88..9f458b58717d6 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4806,6 +4806,36 @@ default ``Text`` type for string columns: Because of this, reading the database table back in does **not** generate a categorical. +.. _io.sql_datetime_data: + +Datetime data types +''''''''''''''''''' + +Using SQLAlchemy, :func:`~pandas.DataFrame.to_sql` is capable of writing +datetime data that is timezone naive or timezone aware. However, the resulting +data stored in the database ultimately depends on the supported data type +for datetime data of the database system being used. + +The following table lists supported data types for datetime data for some +common databases. Other database dialects may have different data types for +datetime data. + +=========== ============================================= =================== +Database SQL Datetime Types Timezone Support +=========== ============================================= =================== +SQLite ``TEXT`` No +MySQL ``TIMESTAMP`` or ``DATETIME`` No +PostgreSQL ``TIMESTAMP`` or ``TIMESTAMP WITH TIME ZONE`` Yes +=========== ============================================= =================== + +When writing timezone aware data to databases that do not support timezones, +the data will be written as timezone naive timestamps that are in local time +with respect to the timezone. + +:func:`~pandas.read_sql_table` is also capable of reading datetime data that is +timezone aware or naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas +will convert the data to UTC. + Reading Tables '''''''''''''' diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 0c2a176869829..5fefb9e3e405c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -222,6 +222,7 @@ Other Enhancements - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). +- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) @@ -1246,6 +1247,9 @@ MultiIndex I/O ^^^ +- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) + .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a6224478070ec..4d292e956e96b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2397,6 +2397,15 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, -------- pandas.read_sql : read a DataFrame from a table + Notes + ----- + Timezone aware datetime columns will be written as + ``Timestamp with timezone`` type with SQLAlchemy if supported by the + database. Otherwise, the datetimes will be stored as timezone unaware + timestamps local to the original timezone. + + .. versionadded:: 0.24.0 + References ---------- .. [1] http://docs.sqlalchemy.org diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 00fbc35ed1e7d..2f411a956dfb8 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -592,12 +592,17 @@ def insert_data(self): data_list = [None] * ncols blocks = temp._data.blocks - for i in range(len(blocks)): - b = blocks[i] + for b in blocks: if b.is_datetime: - # convert to microsecond resolution so this yields - # datetime.datetime - d = b.values.astype('M8[us]').astype(object) + # return datetime.datetime objects + if b.is_datetimetz: + # GH 9086: Ensure we return datetimes with timezone info + # Need to return 2-D data; DatetimeIndex is 1D + d = b.values.to_pydatetime() + d = np.expand_dims(d, axis=0) + else: + # convert to microsecond resolution for datetime.datetime + d = b.values.astype('M8[us]').astype(object) else: d = np.array(b.get_values(), dtype=object) @@ -612,7 +617,7 @@ def insert_data(self): return column_names, data_list def _execute_insert(self, conn, keys, data_iter): - data = [{k: v for k, v in zip(keys, row)} for row in data_iter] + data = [dict(zip(keys, row)) for row in data_iter] conn.execute(self.insert_statement(), data) def insert(self, chunksize=None): @@ -741,8 +746,9 @@ def _get_column_names_and_types(self, dtype_mapper): def _create_table_setup(self): from sqlalchemy import Table, Column, PrimaryKeyConstraint - column_names_and_types = \ - self._get_column_names_and_types(self._sqlalchemy_type) + column_names_and_types = self._get_column_names_and_types( + self._sqlalchemy_type + ) columns = [Column(name, typ, index=is_index) for name, typ, is_index in column_names_and_types] @@ -841,14 +847,19 @@ def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean, - DateTime, Date, Time) + DateTime, Date, Time, TIMESTAMP) if col_type == 'datetime64' or col_type == 'datetime': + # GH 9086: TIMESTAMP is the suggested type if the column contains + # timezone information try: - tz = col.tzinfo # noqa - return DateTime(timezone=True) + if col.dt.tz is not None: + return TIMESTAMP(timezone=True) except AttributeError: - return DateTime + # The column is actually a DatetimeIndex + if col.tz is not None: + return TIMESTAMP(timezone=True) + return DateTime if col_type == 'timedelta64': warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " @@ -1275,8 +1286,9 @@ def _create_table_setup(self): structure of a DataFrame. The first entry will be a CREATE TABLE statement while the rest will be CREATE INDEX statements. """ - column_names_and_types = \ - self._get_column_names_and_types(self._sql_type_name) + column_names_and_types = self._get_column_names_and_types( + self._sql_type_name + ) pat = re.compile(r'\s+') column_names = [col_name for col_name, _, _ in column_names_and_types] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 237cc2936919e..777b04bbae97d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -961,7 +961,8 @@ def test_sqlalchemy_type_mapping(self): utc=True)}) db = sql.SQLDatabase(self.conn) table = sql.SQLTable("test_type", db, frame=df) - assert isinstance(table.table.c['time'].type, sqltypes.DateTime) + # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones + assert isinstance(table.table.c['time'].type, sqltypes.TIMESTAMP) def test_database_uri_string(self): @@ -1361,9 +1362,51 @@ def check(col): df = sql.read_sql_table("types_test_data", self.conn) check(df.DateColWithTz) + def test_datetime_with_timezone_roundtrip(self): + # GH 9086 + # Write datetimetz data to a db and read it back + # For dbs that support timestamps with timezones, should get back UTC + # otherwise naive data should be returned + expected = DataFrame({'A': date_range( + '2013-01-01 09:00:00', periods=3, tz='US/Pacific' + )}) + expected.to_sql('test_datetime_tz', self.conn, index=False) + + if self.flavor == 'postgresql': + # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC + expected['A'] = expected['A'].dt.tz_convert('UTC') + else: + # Otherwise, timestamps are returned as local, naive + expected['A'] = expected['A'].dt.tz_localize(None) + + result = sql.read_sql_table('test_datetime_tz', self.conn) + tm.assert_frame_equal(result, expected) + + result = sql.read_sql_query( + 'SELECT * FROM test_datetime_tz', self.conn + ) + if self.flavor == 'sqlite': + # read_sql_query does not return datetime type like read_sql_table + assert isinstance(result.loc[0, 'A'], string_types) + result['A'] = to_datetime(result['A']) + tm.assert_frame_equal(result, expected) + + def test_naive_datetimeindex_roundtrip(self): + # GH 23510 + # Ensure that a naive DatetimeIndex isn't converted to UTC + dates = date_range('2018-01-01', periods=5, freq='6H') + expected = DataFrame({'nums': range(5)}, index=dates) + expected.to_sql('foo_table', self.conn, index_label='info_date') + result = sql.read_sql_table('foo_table', self.conn, + index_col='info_date') + # result index with gain a name from a set_index operation; expected + tm.assert_frame_equal(result, expected, check_names=False) + def test_date_parsing(self): # No Parsing df = sql.read_sql_table("types_test_data", self.conn) + expected_type = object if self.flavor == 'sqlite' else np.datetime64 + assert issubclass(df.DateCol.dtype.type, expected_type) df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol'])