From 5278cbd9d1c2d757007063ebd23e945b1e316519 Mon Sep 17 00:00:00 2001 From: Takuya Kitazawa Date: Thu, 5 Sep 2019 15:49:34 -0700 Subject: [PATCH 1/2] Drop `type` query option as it is conflicted with `engine` --- pytd/query_engine.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pytd/query_engine.py b/pytd/query_engine.py index 6c2fb84..83c7bdc 100644 --- a/pytd/query_engine.py +++ b/pytd/query_engine.py @@ -131,7 +131,6 @@ def _get_tdclient_cursor(self, con, **kwargs): """ api_param_names = set( [ - "type", "db", "result_url", "priority", @@ -141,6 +140,13 @@ def _get_tdclient_cursor(self, con, **kwargs): ] ) + if "type" in kwargs: + raise RuntimeError( + "optional query parameter 'type' is unsupported. Issue query " + "from a proper QueryEngine instance: " + "{PrestoQueryEngine, HiveQueryEngine}." + ) + # update a clone of the original params cursor_kwargs = con._cursor_kwargs.copy() for k, v in kwargs.items(): From eda21b86c6801255e9f0c69ee9a0d135e372d0b9 Mon Sep 17 00:00:00 2001 From: Takuya Kitazawa Date: Thu, 5 Sep 2019 15:50:06 -0700 Subject: [PATCH 2/2] Update docstring for TD-specific query params --- pytd/client.py | 19 ++++++++++ pytd/pandas_td/__init__.py | 21 ++++++++--- pytd/query_engine.py | 75 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 5 deletions(-) diff --git a/pytd/client.py b/pytd/client.py index 61222cd..86ec749 100644 --- a/pytd/client.py +++ b/pytd/client.py @@ -146,6 +146,25 @@ def query(self, query, engine=None, **kwargs): Query engine. If not given, default query engine created in the constructor will be used. + **kwargs + Treasure Data-specific optional query parameters. Giving these + keyword arguments forces query engine to issue a query via Treasure + Data REST API provided by ``tdclient``; that is, if ``engine`` is + Presto, you cannot enjoy efficient direct access to the query + engine provided by ``prestodb``. + + - ``db`` (str): use the database + - ``result_url`` (str): result output URL + - ``priority`` (int or str): priority + - -2: "VERY LOW" + - -1: "LOW" + - 0: "NORMAL" + - 1: "HIGH" + - 2: "VERY HIGH" + - ``retry_limit`` (int): max number of automatic retries + - ``wait_interval`` (int): sleep interval until job finish + - ``wait_callback`` (function): called every interval against job itself + Returns ------- dict : keys ('data', 'columns') diff --git a/pytd/pandas_td/__init__.py b/pytd/pandas_td/__init__.py index 9db7ca1..e0b5cd3 100644 --- a/pytd/pandas_td/__init__.py +++ b/pytd/pandas_td/__init__.py @@ -121,12 +121,23 @@ def read_td_query( See https://prestodb.io/docs/current/release/release-0.77.html params : dict, optional - Parameters to pass to execute method. + Parameters to pass to execute method. pytd does not support parameter + ``type`` ('hive', 'presto'), and query type needs to be defined by + ``engine``. + Available parameters: - - result_url (str): result output URL - - priority (int or str): priority (e.g. "NORMAL", "HIGH", etc.) - - retry_limit (int): retry limit - pytd: This argument will be ignored. + + - ``db`` (str): use the database + - ``result_url`` (str): result output URL + - ``priority`` (int or str): priority + - -2: "VERY LOW" + - -1: "LOW" + - 0: "NORMAL" + - 1: "HIGH" + - 2: "VERY HIGH" + - ``retry_limit`` (int): max number of automatic retries + - ``wait_interval`` (int): sleep interval until job finish + - ``wait_callback`` (function): called every interval against job itself Returns ------- diff --git a/pytd/query_engine.py b/pytd/query_engine.py index 83c7bdc..7308327 100644 --- a/pytd/query_engine.py +++ b/pytd/query_engine.py @@ -49,6 +49,24 @@ def execute(self, query, **kwargs): query : string Query. + **kwargs + Treasure Data-specific optional query parameters. Giving these + keyword arguments forces query engine to issue a query via Treasure + Data REST API provided by ``tdclient``, rather than using a direct + connection established by the ``prestodb`` package. + + - ``db`` (str): use the database + - ``result_url`` (str): result output URL + - ``priority`` (int or str): priority + - -2: "VERY LOW" + - -1: "LOW" + - 0: "NORMAL" + - 1: "HIGH" + - 2: "VERY HIGH" + - ``retry_limit`` (int): max number of automatic retries + - ``wait_interval`` (int): sleep interval until job finish + - ``wait_callback`` (function): called every interval against job itself + Returns ------- dict : keys ('data', 'columns') @@ -125,6 +143,24 @@ def _get_tdclient_cursor(self, con, **kwargs): con : tdclient.connection.Connection Handler created by ``tdclient#connect``. + **kwargs + Treasure Data-specific optional query parameters. Giving these + keyword arguments forces query engine to issue a query via Treasure + Data REST API provided by ``tdclient``, rather than using a direct + connection established by the ``prestodb`` package. + + - ``db`` (str): use the database + - ``result_url`` (str): result output URL + - ``priority`` (int or str): priority + - -2: "VERY LOW" + - -1: "LOW" + - 0: "NORMAL" + - 1: "HIGH" + - 2: "VERY HIGH" + - ``retry_limit`` (int): max number of automatic retries + - ``wait_interval`` (int): sleep interval until job finish + - ``wait_callback`` (function): called every interval against job itself + Returns ------- tdclient.cursor.Cursor @@ -221,6 +257,26 @@ def presto_api_host(self): def cursor(self, **kwargs): """Get cursor defined by DB-API. + Parameters + ---------- + **kwargs + Treasure Data-specific optional query parameters. Giving these + keyword arguments forces query engine to issue a query via Treasure + Data REST API provided by ``tdclient``, rather than using a direct + connection established by the ``prestodb`` package. + + - ``db`` (str): use the database + - ``result_url`` (str): result output URL + - ``priority`` (int or str): priority + - -2: "VERY LOW" + - -1: "LOW" + - 0: "NORMAL" + - 1: "HIGH" + - 2: "VERY HIGH" + - ``retry_limit`` (int): max number of automatic retries + - ``wait_interval`` (int): sleep interval until job finish + - ``wait_callback`` (function): called every interval against job itself + Returns ------- prestodb.dbapi.Cursor, or tdclient.cursor.Cursor @@ -288,6 +344,25 @@ def user_agent(self): def cursor(self, **kwargs): """Get cursor defined by DB-API. + Parameters + ---------- + **kwargs + Treasure Data-specific optional query parameters. Giving these + keyword arguments forces query engine to issue a query via Treasure + Data REST API provided by ``tdclient``. + + - ``db`` (str): use the database + - ``result_url`` (str): result output URL + - ``priority`` (int or str): priority + - -2: "VERY LOW" + - -1: "LOW" + - 0: "NORMAL" + - 1: "HIGH" + - 2: "VERY HIGH" + - ``retry_limit`` (int): max number of automatic retries + - ``wait_interval`` (int): sleep interval until job finish + - ``wait_callback`` (function): called every interval against job itself + Returns ------- tdclient.cursor.Cursor