From 10aafeb1af81fa49af355e304f46c94547b27836 Mon Sep 17 00:00:00 2001 From: Jonas Kammerer Date: Fri, 11 Feb 2022 14:20:23 +0100 Subject: [PATCH 1/4] Add an example that demonstrates several possibilties that became available with the new `external()` functionality. --- .../query-external-data/README.md | 44 ++++++++ .../query-external-data/lineitem.csv | 50 ++++++++++ .../query-external-data/lineitem_2.csv | 47 +++++++++ .../query-external-data/orders_10rows.parquet | Bin 0 -> 2936 bytes .../query_external_data.py | 94 ++++++++++++++++++ 5 files changed, 235 insertions(+) create mode 100644 Community-Supported/query-external-data/README.md create mode 100644 Community-Supported/query-external-data/lineitem.csv create mode 100644 Community-Supported/query-external-data/lineitem_2.csv create mode 100644 Community-Supported/query-external-data/orders_10rows.parquet create mode 100644 Community-Supported/query-external-data/query_external_data.py diff --git a/Community-Supported/query-external-data/README.md b/Community-Supported/query-external-data/README.md new file mode 100644 index 0000000..d4abee7 --- /dev/null +++ b/Community-Supported/query-external-data/README.md @@ -0,0 +1,44 @@ +# query-external-data + +![Community Supported](https://img.shields.io/badge/Support%20Level-Community%20Supported-53bd92.svg) + +__Current Version__: 1.0 + +This sample demonstrates how you can use Hyper to query external data like parquet or CSV files directly. This enables a variety of ETL capabilities like accessing multiple files at once, filtering the read data and creating additional calculated columns. + +# Get started + +## __Prerequisites__ + +To run the script, you will need: + +- a computer running Windows, macOS, or Linux + +- Python 3.6 or 3.7 + +## Run the sample + +Ensure that you have installed the requirements and then just run the sample Python file. +The following instructions assume that you have set up a virtual environment for Python. For more information on +creating virtual environments, see [venv - Creation of virtual environments](https://docs.python.org/3/library/venv.html) +in the Python Standard Library. + +1. Open a terminal and activate the Python virtual environment (`venv`). + +1. Navigate to the folder where you installed the sample. + +1. Run the Python script: + + **python query_external_data.py** + + It will read the `orders_10rows.parquet` file from the working directory and create a new Hyper database + named `orders.hyper` with a table named "orders", which will contain the 10 rows copied from the Parquet file. + +## __Resources__ +Check out these resources to learn more: + +- [Hyper API docs](https://help.tableau.com/current/api/hyper_api/en-us/index.html) + +- [Tableau Hyper API Reference (Python)](https://help.tableau.com/current/api/hyper_api/en-us/reference/py/index.html) + +- [The Hyper API SQL Reference](https://help.tableau.com/current/api/hyper_api/en-us/reference/sql) \ No newline at end of file diff --git a/Community-Supported/query-external-data/lineitem.csv b/Community-Supported/query-external-data/lineitem.csv new file mode 100644 index 0000000..9dafaa9 --- /dev/null +++ b/Community-Supported/query-external-data/lineitem.csv @@ -0,0 +1,50 @@ +1,88233,742,1,0.2,R,F,1994-01-12,1993-12-28,1994-01-26,COLLECT COD,FOB,"" +1,118399,3422,2,0.1,A,F,1993-11-30,1993-12-13,1993-12-02,NONE,FOB,"" +1,129111,4136,3,0.8,R,F,1994-01-14,1994-01-17,1994-02-10,COLLECT COD,SHIP,"" +9,2425,7426,1,0.8,A,F,1995-01-15,1995-01-09,1995-02-12,COLLECT COD,SHIP,"" +9,154053,6569,2,0.7,A,F,1995-01-19,1994-12-31,1995-02-09,DELIVER IN PERSON,FOB,"" +9,156831,1862,3,0.1,R,F,1994-12-13,1994-11-28,1995-01-07,TAKE BACK RETURN,MAIL,"" +10,129111,6463,1,0.2,R,F,1994-08-24,1994-06-20,1994-09-09,NONE,FOB,"" +10,84557,9574,2,0.3,A,F,1994-07-21,1994-07-16,1994-08-08,TAKE BACK RETURN,TRUCK,"" +10,142891,434,3,0.4,R,F,1994-08-18,1994-07-31,1994-08-22,NONE,TRUCK,"" +2,134560,7074,1,0.5,N,O,1996-05-05,1996-07-03,1996-05-24,DELIVER IN PERSON,MAIL,"" +2,135829,8343,2,0.5,N,O,1996-05-24,1996-07-04,1996-05-27,DELIVER IN PERSON,SHIP,"" +2,66349,3868,3,0.4,N,O,1996-05-17,1996-06-20,1996-06-11,DELIVER IN PERSON,SHIP,"" +2,129111,2569,4,0.2,N,O,1996-05-02,1996-06-16,1996-05-18,COLLECT COD,RAIL,"" +2,2425,7426,5,0.5,N,O,1996-08-16,1996-07-11,1996-09-07,COLLECT COD,SHIP,"" +2,151908,1909,6,0.8,N,O,1996-07-13,1996-06-13,1996-07-30,COLLECT COD,AIR,"" +2,115477,500,7,0.6,N,O,1996-08-25,1996-06-12,1996-09-10,COLLECT COD,REG AIR,"" +8,155330,8343,1,0.4,A,F,1993-03-04,1993-01-02,1993-03-28,DELIVER IN PERSON,RAIL,"" +8,22973,7978,2,0.3,R,F,1993-02-17,1993-01-13,1993-02-24,COLLECT COD,FOB,"" +8,101476,3987,3,0.4,A,F,1993-03-01,1993-03-01,1993-03-22,NONE,SHIP,"" +8,101423,3934,4,0.1,R,F,1993-02-03,1993-02-02,1993-03-03,TAKE BACK RETURN,RAIL,"" +8,2425,7426,5,0.8,A,F,1993-03-26,1993-01-17,1993-04-23,NONE,MAIL,"" +8,146084,8599,6,0.6,R,F,1993-02-10,1993-02-12,1993-02-17,TAKE BACK RETURN,RAIL,"" +5,108570,8571,1,0.4,R,F,1994-10-31,1994-08-31,1994-11-20,NONE,AIR,"" +5,123927,3928,2,0.8,R,F,1994-10-16,1994-09-25,1994-10-19,NONE,FOB,"" +5,37531,35,3,0.3,A,F,1994-08-08,1994-10-13,1994-08-26,DELIVER IN PERSON,AIR,"" +3,98494,1004,1,0.4,N,O,1995-07-21,1995-07-01,1995-08-14,TAKE BACK RETURN,AIR,"" +3,177103,9621,2,0.2,A,F,1995-04-17,1995-07-01,1995-04-27,DELIVER IN PERSON,SHIP,"" +3,155829,8345,3,0.2,N,O,1995-08-02,1995-06-23,1995-08-03,COLLECT COD,REG AIR,"" +3,129111,8343,4,0.1,N,F,1995-06-13,1995-05-23,1995-06-24,TAKE BACK RETURN,FOB,"" +3,41466,3971,5,0.1,R,F,1995-05-08,1995-05-24,1995-05-12,TAKE BACK RETURN,RAIL,"" +3,105880,901,6,0.5,N,O,1995-07-15,1995-06-21,1995-08-11,COLLECT COD,RAIL,"" +6,83580,8597,1,0.3,R,F,1994-01-11,1994-01-31,1994-01-26,DELIVER IN PERSON,REG AIR,"" +6,128904,8905,2,0.5,A,F,1994-03-15,1994-03-02,1994-03-26,COLLECT COD,SHIP,"" +6,78526,8527,3,0.2,R,F,1994-02-10,1994-02-02,1994-03-10,DELIVER IN PERSON,FOB,"" +6,176948,1983,4,0.8,R,F,1994-03-22,1994-03-24,1994-04-04,DELIVER IN PERSON,REG AIR,"" +6,155180,211,5,0.6,R,F,1994-03-25,1994-02-11,1994-04-13,NONE,FOB,"" +6,105393,7904,6,0.3,R,F,1994-01-14,1994-02-16,1994-01-22,NONE,FOB,"" +4,109555,9556,1,0.7,N,O,1997-01-05,1997-01-16,1997-01-19,COLLECT COD,TRUCK,"" +4,29949,9950,3,0.6,N,O,1997-02-04,1997-03-09,1997-02-23,NONE,REG AIR,"" +4,76304,8343,2,0.1,N,O,1997-03-05,1997-02-15,1997-03-10,COLLECT COD,RAIL,"" +4,143662,1205,4,0.3,N,O,1997-01-15,1997-02-10,1997-02-07,DELIVER IN PERSON,TRUCK,"" +4,119534,4557,5,0.5,N,O,1997-01-15,1997-01-12,1997-02-09,COLLECT COD,REG AIR,"" +4,2425,2426,6,0.6,N,O,1997-02-24,1997-03-13,1997-03-20,NONE,RAIL,"" +7,182052,9607,1,0.3,N,O,1996-05-07,1996-03-13,1996-06-03,TAKE BACK RETURN,FOB,"" +7,145243,7758,2,0.8,N,O,1996-02-01,1996-03-02,1996-02-19,TAKE BACK RETURN,SHIP,"" +7,129111,9799,3,0.4,N,O,1996-01-15,1996-03-27,1996-02-03,COLLECT COD,MAIL,"" +7,163073,8343,4,0.4,N,O,1996-03-21,1996-04-08,1996-04-20,NONE,FOB,"" +7,151894,9440,5,0.1,N,O,1996-02-11,1996-02-24,1996-02-18,DELIVER IN PERSON,TRUCK,"" +7,79251,1759,6,0.3,N,O,1996-01-16,1996-02-23,1996-01-22,TAKE BACK RETURN,FOB,"" +7,157238,2269,7,0.2,N,O,1996-02-10,1996-03-26,1996-02-13,NONE,FOB,"" \ No newline at end of file diff --git a/Community-Supported/query-external-data/lineitem_2.csv b/Community-Supported/query-external-data/lineitem_2.csv new file mode 100644 index 0000000..c07d9cc --- /dev/null +++ b/Community-Supported/query-external-data/lineitem_2.csv @@ -0,0 +1,47 @@ +295,60621,3128,4,0.04,A,F,1995-01-12,1994-11-22,1995-01-22,DELIVER IN PERSON,MAIL,"" +166,64888,9901,1,0.03,N,O,1995-11-16,1995-10-17,1995-12-13,NONE,MAIL,"" +166,166366,6367,2,0.05,N,O,1995-11-09,1995-11-18,1995-11-14,COLLECT COD,SHIP,"" +166,99652,2162,3,0.03,N,O,1995-11-13,1995-11-07,1995-12-08,COLLECT COD,FOB,"" +166,45027,7532,4,0.02,N,O,1995-12-30,1995-11-29,1996-01-29,DELIVER IN PERSON,RAIL,"" +386,152405,9951,1,0.07,A,F,1995-05-10,1995-02-28,1995-05-25,NONE,SHIP,"" +386,68123,5642,2,0.01,A,F,1995-04-12,1995-04-18,1995-05-11,DELIVER IN PERSON,MAIL,"" +386,130081,82,3,0.04,A,F,1995-05-23,1995-03-01,1995-05-25,TAKE BACK RETURN,MAIL,"" +135,108205,8206,1,0.08,N,O,1996-02-18,1996-01-01,1996-02-25,COLLECT COD,RAIL,"" +135,198344,5902,2,0.07,N,O,1996-02-11,1996-01-12,1996-02-13,DELIVER IN PERSON,SHIP,"" +135,157510,5056,3,0.00,N,O,1996-01-03,1995-11-21,1996-02-01,TAKE BACK RETURN,MAIL,"" +135,67005,9512,4,0.03,N,O,1996-01-12,1996-01-19,1996-02-05,NONE,TRUCK,"" +135,136248,1275,5,0.04,N,O,1996-01-25,1995-11-20,1996-02-09,NONE,MAIL,"" +135,115000,2534,6,0.02,N,O,1995-11-12,1995-12-22,1995-11-17,NONE,FOB,"" +551,23786,6289,1,0.02,N,O,1995-07-29,1995-07-18,1995-08-02,NONE,REG AIR,"" +551,158813,3844,2,0.07,N,O,1995-09-18,1995-08-25,1995-10-11,COLLECT COD,TRUCK,"" +551,161089,6122,3,0.06,N,O,1995-07-29,1995-08-19,1995-08-10,COLLECT COD,MAIL,"" +930,44804,2317,1,0.04,R,F,1994-12-21,1995-02-20,1994-12-24,COLLECT COD,RAIL,"" +930,17295,4799,2,0.00,A,F,1995-03-20,1995-02-04,1995-04-04,DELIVER IN PERSON,AIR,"" +930,64230,1749,3,0.08,A,F,1994-12-18,1995-01-27,1995-01-16,COLLECT COD,AIR,"" +930,99635,2145,4,0.02,A,F,1995-02-16,1995-03-03,1995-03-13,DELIVER IN PERSON,SHIP,"" +930,163239,788,5,0.06,A,F,1995-04-03,1995-01-29,1995-04-22,COLLECT COD,MAIL,"" +930,144557,2100,6,0.04,A,F,1995-02-09,1995-02-17,1995-02-16,NONE,SHIP,"" +930,166196,1229,7,0.08,R,F,1995-01-20,1995-02-28,1995-02-04,TAKE BACK RETURN,RAIL,"" +2191970,2425,4926,1,0.01,A,F,1994-10-13,1994-10-06,1994-10-28,TAKE BACK RETURN,RAIL,"" +2191970,145574,5575,2,0.04,A,F,1994-11-05,1994-10-15,1994-11-12,TAKE BACK RETURN,REG AIR,"" +2191970,48791,6304,3,0.00,A,F,1994-10-03,1994-11-16,1994-10-06,COLLECT COD,MAIL,"" +2191970,27964,5471,4,0.01,R,F,1994-09-24,1994-10-08,1994-09-30,TAKE BACK RETURN,FOB,"" +341792,2425,2426,1,0.05,N,O,1995-10-13,1995-10-24,1995-10-27,DELIVER IN PERSON,AIR,"" +341792,141675,4190,2,0.06,N,O,1995-10-22,1995-10-29,1995-11-15,NONE,SHIP,"" +341792,116665,9177,3,0.00,N,O,1995-12-19,1995-10-29,1996-01-07,DELIVER IN PERSON,RAIL,"" +341792,185393,7912,4,0.07,N,O,1995-10-30,1995-10-28,1995-11-12,TAKE BACK RETURN,MAIL,"" +391,121586,6611,1,0.02,R,F,1995-02-11,1995-02-03,1995-02-13,TAKE BACK RETURN,TRUCK,"" +4636866,26435,3942,1,0.06,N,O,1998-04-16,1998-03-17,1998-05-07,NONE,FOB,"" +4636866,79753,2261,2,0.05,N,O,1998-03-24,1998-04-05,1998-04-08,TAKE BACK RETURN,FOB,"" +4636866,2425,2426,3,0.00,N,O,1998-04-08,1998-04-14,1998-05-06,NONE,MAIL,"" +4636866,153295,8326,4,0.03,N,O,1998-02-13,1998-04-07,1998-02-24,NONE,FOB,"" +4636866,23973,1480,5,0.01,N,O,1998-03-26,1998-03-23,1998-04-12,COLLECT COD,AIR,"" +4636866,108175,686,6,0.02,N,O,1998-02-19,1998-04-11,1998-03-13,NONE,FOB,"" +291,122565,102,1,0.07,A,F,1994-05-26,1994-05-10,1994-06-23,COLLECT COD,TRUCK,"" +291,137316,7317,2,0.02,R,F,1994-06-14,1994-04-25,1994-06-19,NONE,REG AIR,"" +291,60874,5887,3,0.02,R,F,1994-03-22,1994-04-30,1994-03-24,DELIVER IN PERSON,FOB,"" +132,140449,2964,1,0.08,R,F,1993-07-10,1993-08-05,1993-07-13,NONE,TRUCK,"" +132,119053,9054,2,0.08,R,F,1993-09-01,1993-08-16,1993-09-22,NONE,TRUCK,"" +132,114419,4420,3,0.04,A,F,1993-07-12,1993-08-05,1993-08-05,COLLECT COD,TRUCK,"" +132,28082,5589,4,0.00,A,F,1993-06-16,1993-08-27,1993-06-23,DELIVER IN PERSON,AIR,"" +2549888,173333,3334,1,0.02,A,F,1993-11-23,1993-11-20,1993-12-01,TAKE BACK RETURN,RAIL,"" \ No newline at end of file diff --git a/Community-Supported/query-external-data/orders_10rows.parquet b/Community-Supported/query-external-data/orders_10rows.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5f3477d37b6813b43ca42388f40361babf755901 GIT binary patch literal 2936 zcmbtWZ)jUp6u&P`+BB_gw&lLPH;X6po=$H;=A4CMF7KEXGn8h!Oig@nLd&ygz4CYJn z-o59Z^Si%u?m6dWV)L#9ZpA}*Xc9*-f?o*k2%#4^f5_(xz=&S0wMYF9CO>lUy@%16 zOIKTtettVBg7V2%Zd|$Z#;dbOeF!Z-{OwQs?JxfL;geIt{Gac?RLAM4iqQgbMoyMe*-J<9IW=8U4zWvyWG)Z3uyC= z^XP-$t7zY9i>rLJf^Hsrnp?ZF5ly^)7BFGMCQN@IoZNHp6u`jpVSLzK_C)i-aqgE@ zouF|ofW|lW{~cerwp+?}fZ#qCaD5&=?|uK}r`!-dAB5<+qzA^v_XlB+7}&dO+wjgk z-7roK>>S@C?VcFkGCDFkyp^6?a$x)Dw(V4L)4-15t)qK)0L3s4yA++Mv;=~v{5a#k z%;Dww(RMZR0Ug8d`;cZlHEAQ#7(z~X&H9GQuT@{nHnKgsc0Lj zZkyViB$xF{Ns@~CA!5d)Dyb+c%;iEsw=0$@$(CeI6Ib3ZnW{!AmL!{V<*KDy6rreA zWUXP5*LBm1Nd?&;MO%Y)gOqGdHY8OcvI+9%bd5BYW0FFux~W>QJD_>=XJx~h)=REQ z%mwOq2b7tv=$dK~AX+f=1GZY2(dJ@OUe}a5uH!NT2M^`Kj9dZ(+a$FOOC^oK-c?di zWv$jh|PcCo0|7u;YW``s6n@&MIpe7++)(+s3}%DqTrFR z+VUvVnxypT!z0c{MdDn0`gkz3)z5`!ur(@Xat?jM4p%BBwt6|!UgpfYP1T$o=CTQ2E3btvn zc_%15%L>%Eij^G%IR$RPBXc!zE(g|O* zLIEM21wYMg23VhY0k4Xf=YM5@M_PJtr`U_aVpDV(tj256BP~N2)W?kr+z1l;A);_L zw3UY$x34MG0>ARv$ZYza{x{+sig*=MZx;iuyWYcN zc#w0894LUtv;qY`El28cW9Iev$T}?WD-85{Yu)4Kqi4vsIQ|wFz1~Wh)=F5Y7xg{p zo?ZuZ&+s@4bm6o+MTkK?Zx|rd!MqO^DPVKKdpiK@2{qDPEVdUY>92VFEQ($T(MYgk zYRW&h#UwuoDi8}uYKR5B(^xcS_aOcmC4PP_jq`kmOT3ZC3>RwkVmmBT>i2IS!UH!a zuh+}e>+ja$n;haVNB9V=39DwIGKmo@>k83g6}e1=HgZTVS2co!XR5LR?;>lUY)A*8 zoxq!4O2iWImHP6w3NOdOOggWm3(8 DESCRIPTOR(l_orderkey int, l_partkey int, l_suppkey int, l_linenumber int, l_quantity float, + l_returnflag text, l_linestatus text, l_shipdate date, l_commitdate date, l_receiptdate date, + l_shipinstruct text, l_shipmode text, l_comment text), + DELIMITER => ',', FORMAT => 'csv', HEADER => false) + on o_orderkey = l_orderkey GROUP BY l_partkey + ORDER BY l_partkey""" + print("result:") + print_list(connection.execute_list_query(command_2)) + print() + + + print("Scenario 3: Query multiple CSV files that have the same schema in one go.") + # Note that, for CSV files, the schema of the file has to be provided and currently cannot be inferred form the file directly. + command_3 = """SELECT * + FROM external(ARRAY['lineitem.csv','lineitem_2.csv'], + COLUMNS => DESCRIPTOR(l_orderkey int, l_partkey int, l_suppkey int, l_linenumber int, l_quantity float, + l_returnflag text, l_linestatus text, l_shipdate date, l_commitdate date, l_receiptdate date, + l_shipinstruct text, l_shipmode text, l_comment text), + DELIMITER => ',', FORMAT => 'csv', HEADER => false) + ORDER BY l_orderkey""" + + print("result:") + print_list(connection.execute_list_query(command_3)) + + + +if __name__ == '__main__': + try: + run_hyper_query_external() + + except HyperException as ex: + print(ex) + exit(1) From 7903c123dcbe08726eb86c03a48fc25141118d66 Mon Sep 17 00:00:00 2001 From: Jonas Kammerer Date: Fri, 11 Feb 2022 14:52:43 +0100 Subject: [PATCH 2/4] reviews 1 --- Community-Supported/query-external-data/README.md | 2 +- Community-Supported/query-external-data/query_external_data.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Community-Supported/query-external-data/README.md b/Community-Supported/query-external-data/README.md index d4abee7..e50b321 100644 --- a/Community-Supported/query-external-data/README.md +++ b/Community-Supported/query-external-data/README.md @@ -14,7 +14,7 @@ To run the script, you will need: - a computer running Windows, macOS, or Linux -- Python 3.6 or 3.7 +- Python 3.7 or newer ## Run the sample diff --git a/Community-Supported/query-external-data/query_external_data.py b/Community-Supported/query-external-data/query_external_data.py index 88450c9..14c91f2 100644 --- a/Community-Supported/query-external-data/query_external_data.py +++ b/Community-Supported/query-external-data/query_external_data.py @@ -55,7 +55,7 @@ def run_hyper_query_external(): print("\nScenario 2: Query multiple external data sources in one query.") # This query reads data from a parquet and a CSV file and joins it. Note that, for CSV files, the schema of the file - # has to be provided and currently cannot be inferred form the file directly. + # has to be provided and currently cannot be inferred form the file directly (see the `DESCRIPTOR` argument below). command_2 = """SELECT l_partkey, SUM(l_quantity) FROM external('orders_10rows.parquet') join external('lineitem.csv', @@ -72,6 +72,7 @@ def run_hyper_query_external(): print("Scenario 3: Query multiple CSV files that have the same schema in one go.") # Note that, for CSV files, the schema of the file has to be provided and currently cannot be inferred form the file directly. + # (see the `DESCRIPTOR` argument below). command_3 = """SELECT * FROM external(ARRAY['lineitem.csv','lineitem_2.csv'], COLUMNS => DESCRIPTOR(l_orderkey int, l_partkey int, l_suppkey int, l_linenumber int, l_quantity float, From 9261172229361c44c278faa9d81a82f74815908f Mon Sep 17 00:00:00 2001 From: Jonas Kammerer Date: Tue, 8 Mar 2022 14:13:24 +0100 Subject: [PATCH 3/4] reviews 2 --- .../query-external-data/README.md | 3 --- .../query-external-data/query_external_data.py | 17 ++++++----------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/Community-Supported/query-external-data/README.md b/Community-Supported/query-external-data/README.md index e50b321..895d655 100644 --- a/Community-Supported/query-external-data/README.md +++ b/Community-Supported/query-external-data/README.md @@ -31,9 +31,6 @@ in the Python Standard Library. **python query_external_data.py** - It will read the `orders_10rows.parquet` file from the working directory and create a new Hyper database - named `orders.hyper` with a table named "orders", which will contain the 10 rows copied from the Parquet file. - ## __Resources__ Check out these resources to learn more: diff --git a/Community-Supported/query-external-data/query_external_data.py b/Community-Supported/query-external-data/query_external_data.py index 14c91f2..d4fc94e 100644 --- a/Community-Supported/query-external-data/query_external_data.py +++ b/Community-Supported/query-external-data/query_external_data.py @@ -37,15 +37,15 @@ def run_hyper_query_external(): database="output_file.hyper", create_mode=CreateMode.CREATE_AND_REPLACE) as connection: - print("Scenario 1: Create a table from filtered parquet data with a calcualted extra column") + print("Scenario 1: Create a table from filtered parquet data with a calculated extra column") # This SQL command queries a parquet file directly and creates the table 'low_prio_orders' in Hyper. # The created table contains the data that is returned from the 'SELECT' part of the query. I.e., only # a selection of columns, a new calculated column 'clerk_nr' and only the rows with low order priority. command_1 = """CREATE TABLE low_prio_orders AS - SELECT o_orderkey, o_custkey, o_totalprice, CAST(SUBSTRING(o_clerk from 7) AS int) as clerk_nr - FROM external('orders_10rows.parquet') - WHERE o_orderpriority = '5-LOW'""" - + SELECT o_orderkey, o_custkey, o_totalprice, CAST(SUBSTRING(o_clerk from 7) AS int) as clerk_nr + FROM external('orders_10rows.parquet') + WHERE o_orderpriority = '5-LOW'""" + connection.execute_command(command_1) print("table content:") @@ -87,9 +87,4 @@ def run_hyper_query_external(): if __name__ == '__main__': - try: - run_hyper_query_external() - - except HyperException as ex: - print(ex) - exit(1) + run_hyper_query_external() From e91d207006a7f6959465d51c82d1a6d0e70ae7ee Mon Sep 17 00:00:00 2001 From: Jonas Kammerer Date: Wed, 9 Mar 2022 14:04:38 +0100 Subject: [PATCH 4/4] Change datasets --- .../query-external-data/customers.csv | 9 ++++ .../query-external-data/customers_2.csv | 3 ++ .../query-external-data/lineitem.csv | 50 ------------------ .../query-external-data/lineitem_2.csv | 47 ---------------- .../query-external-data/orders.parquet | Bin 0 -> 962 bytes .../query-external-data/orders_10rows.parquet | Bin 2936 -> 0 bytes .../query_external_data.py | 35 ++++++------ 7 files changed, 27 insertions(+), 117 deletions(-) create mode 100644 Community-Supported/query-external-data/customers.csv create mode 100644 Community-Supported/query-external-data/customers_2.csv delete mode 100644 Community-Supported/query-external-data/lineitem.csv delete mode 100644 Community-Supported/query-external-data/lineitem_2.csv create mode 100644 Community-Supported/query-external-data/orders.parquet delete mode 100644 Community-Supported/query-external-data/orders_10rows.parquet diff --git a/Community-Supported/query-external-data/customers.csv b/Community-Supported/query-external-data/customers.csv new file mode 100644 index 0000000..eb2e3c0 --- /dev/null +++ b/Community-Supported/query-external-data/customers.csv @@ -0,0 +1,9 @@ +2554,DE,Hansastrasse,15 +3554,DE,Ganghoferstrasse,24 +2654,US,180th Ave,174 +2564,US,150th Ave,114 +2114,US,80th Ave,74 +9954,US,42th Ave,94 +2444,EN,Oxford Rd,13 +1004,EN,Dowells Cl,41 +6454,DE,Radlkoferstrasse,75 \ No newline at end of file diff --git a/Community-Supported/query-external-data/customers_2.csv b/Community-Supported/query-external-data/customers_2.csv new file mode 100644 index 0000000..cb30b5f --- /dev/null +++ b/Community-Supported/query-external-data/customers_2.csv @@ -0,0 +1,3 @@ +2954,DE,Hansastrasse,11 +9664,DE,Ganghoferstrasse,14 +8554,US,10th Ave,184 diff --git a/Community-Supported/query-external-data/lineitem.csv b/Community-Supported/query-external-data/lineitem.csv deleted file mode 100644 index 9dafaa9..0000000 --- a/Community-Supported/query-external-data/lineitem.csv +++ /dev/null @@ -1,50 +0,0 @@ -1,88233,742,1,0.2,R,F,1994-01-12,1993-12-28,1994-01-26,COLLECT COD,FOB,"" -1,118399,3422,2,0.1,A,F,1993-11-30,1993-12-13,1993-12-02,NONE,FOB,"" -1,129111,4136,3,0.8,R,F,1994-01-14,1994-01-17,1994-02-10,COLLECT COD,SHIP,"" -9,2425,7426,1,0.8,A,F,1995-01-15,1995-01-09,1995-02-12,COLLECT COD,SHIP,"" -9,154053,6569,2,0.7,A,F,1995-01-19,1994-12-31,1995-02-09,DELIVER IN PERSON,FOB,"" -9,156831,1862,3,0.1,R,F,1994-12-13,1994-11-28,1995-01-07,TAKE BACK RETURN,MAIL,"" -10,129111,6463,1,0.2,R,F,1994-08-24,1994-06-20,1994-09-09,NONE,FOB,"" -10,84557,9574,2,0.3,A,F,1994-07-21,1994-07-16,1994-08-08,TAKE BACK RETURN,TRUCK,"" -10,142891,434,3,0.4,R,F,1994-08-18,1994-07-31,1994-08-22,NONE,TRUCK,"" -2,134560,7074,1,0.5,N,O,1996-05-05,1996-07-03,1996-05-24,DELIVER IN PERSON,MAIL,"" -2,135829,8343,2,0.5,N,O,1996-05-24,1996-07-04,1996-05-27,DELIVER IN PERSON,SHIP,"" -2,66349,3868,3,0.4,N,O,1996-05-17,1996-06-20,1996-06-11,DELIVER IN PERSON,SHIP,"" -2,129111,2569,4,0.2,N,O,1996-05-02,1996-06-16,1996-05-18,COLLECT COD,RAIL,"" -2,2425,7426,5,0.5,N,O,1996-08-16,1996-07-11,1996-09-07,COLLECT COD,SHIP,"" -2,151908,1909,6,0.8,N,O,1996-07-13,1996-06-13,1996-07-30,COLLECT COD,AIR,"" -2,115477,500,7,0.6,N,O,1996-08-25,1996-06-12,1996-09-10,COLLECT COD,REG AIR,"" -8,155330,8343,1,0.4,A,F,1993-03-04,1993-01-02,1993-03-28,DELIVER IN PERSON,RAIL,"" -8,22973,7978,2,0.3,R,F,1993-02-17,1993-01-13,1993-02-24,COLLECT COD,FOB,"" -8,101476,3987,3,0.4,A,F,1993-03-01,1993-03-01,1993-03-22,NONE,SHIP,"" -8,101423,3934,4,0.1,R,F,1993-02-03,1993-02-02,1993-03-03,TAKE BACK RETURN,RAIL,"" -8,2425,7426,5,0.8,A,F,1993-03-26,1993-01-17,1993-04-23,NONE,MAIL,"" -8,146084,8599,6,0.6,R,F,1993-02-10,1993-02-12,1993-02-17,TAKE BACK RETURN,RAIL,"" -5,108570,8571,1,0.4,R,F,1994-10-31,1994-08-31,1994-11-20,NONE,AIR,"" -5,123927,3928,2,0.8,R,F,1994-10-16,1994-09-25,1994-10-19,NONE,FOB,"" -5,37531,35,3,0.3,A,F,1994-08-08,1994-10-13,1994-08-26,DELIVER IN PERSON,AIR,"" -3,98494,1004,1,0.4,N,O,1995-07-21,1995-07-01,1995-08-14,TAKE BACK RETURN,AIR,"" -3,177103,9621,2,0.2,A,F,1995-04-17,1995-07-01,1995-04-27,DELIVER IN PERSON,SHIP,"" -3,155829,8345,3,0.2,N,O,1995-08-02,1995-06-23,1995-08-03,COLLECT COD,REG AIR,"" -3,129111,8343,4,0.1,N,F,1995-06-13,1995-05-23,1995-06-24,TAKE BACK RETURN,FOB,"" -3,41466,3971,5,0.1,R,F,1995-05-08,1995-05-24,1995-05-12,TAKE BACK RETURN,RAIL,"" -3,105880,901,6,0.5,N,O,1995-07-15,1995-06-21,1995-08-11,COLLECT COD,RAIL,"" -6,83580,8597,1,0.3,R,F,1994-01-11,1994-01-31,1994-01-26,DELIVER IN PERSON,REG AIR,"" -6,128904,8905,2,0.5,A,F,1994-03-15,1994-03-02,1994-03-26,COLLECT COD,SHIP,"" -6,78526,8527,3,0.2,R,F,1994-02-10,1994-02-02,1994-03-10,DELIVER IN PERSON,FOB,"" -6,176948,1983,4,0.8,R,F,1994-03-22,1994-03-24,1994-04-04,DELIVER IN PERSON,REG AIR,"" -6,155180,211,5,0.6,R,F,1994-03-25,1994-02-11,1994-04-13,NONE,FOB,"" -6,105393,7904,6,0.3,R,F,1994-01-14,1994-02-16,1994-01-22,NONE,FOB,"" -4,109555,9556,1,0.7,N,O,1997-01-05,1997-01-16,1997-01-19,COLLECT COD,TRUCK,"" -4,29949,9950,3,0.6,N,O,1997-02-04,1997-03-09,1997-02-23,NONE,REG AIR,"" -4,76304,8343,2,0.1,N,O,1997-03-05,1997-02-15,1997-03-10,COLLECT COD,RAIL,"" -4,143662,1205,4,0.3,N,O,1997-01-15,1997-02-10,1997-02-07,DELIVER IN PERSON,TRUCK,"" -4,119534,4557,5,0.5,N,O,1997-01-15,1997-01-12,1997-02-09,COLLECT COD,REG AIR,"" -4,2425,2426,6,0.6,N,O,1997-02-24,1997-03-13,1997-03-20,NONE,RAIL,"" -7,182052,9607,1,0.3,N,O,1996-05-07,1996-03-13,1996-06-03,TAKE BACK RETURN,FOB,"" -7,145243,7758,2,0.8,N,O,1996-02-01,1996-03-02,1996-02-19,TAKE BACK RETURN,SHIP,"" -7,129111,9799,3,0.4,N,O,1996-01-15,1996-03-27,1996-02-03,COLLECT COD,MAIL,"" -7,163073,8343,4,0.4,N,O,1996-03-21,1996-04-08,1996-04-20,NONE,FOB,"" -7,151894,9440,5,0.1,N,O,1996-02-11,1996-02-24,1996-02-18,DELIVER IN PERSON,TRUCK,"" -7,79251,1759,6,0.3,N,O,1996-01-16,1996-02-23,1996-01-22,TAKE BACK RETURN,FOB,"" -7,157238,2269,7,0.2,N,O,1996-02-10,1996-03-26,1996-02-13,NONE,FOB,"" \ No newline at end of file diff --git a/Community-Supported/query-external-data/lineitem_2.csv b/Community-Supported/query-external-data/lineitem_2.csv deleted file mode 100644 index c07d9cc..0000000 --- a/Community-Supported/query-external-data/lineitem_2.csv +++ /dev/null @@ -1,47 +0,0 @@ -295,60621,3128,4,0.04,A,F,1995-01-12,1994-11-22,1995-01-22,DELIVER IN PERSON,MAIL,"" -166,64888,9901,1,0.03,N,O,1995-11-16,1995-10-17,1995-12-13,NONE,MAIL,"" -166,166366,6367,2,0.05,N,O,1995-11-09,1995-11-18,1995-11-14,COLLECT COD,SHIP,"" -166,99652,2162,3,0.03,N,O,1995-11-13,1995-11-07,1995-12-08,COLLECT COD,FOB,"" -166,45027,7532,4,0.02,N,O,1995-12-30,1995-11-29,1996-01-29,DELIVER IN PERSON,RAIL,"" -386,152405,9951,1,0.07,A,F,1995-05-10,1995-02-28,1995-05-25,NONE,SHIP,"" -386,68123,5642,2,0.01,A,F,1995-04-12,1995-04-18,1995-05-11,DELIVER IN PERSON,MAIL,"" -386,130081,82,3,0.04,A,F,1995-05-23,1995-03-01,1995-05-25,TAKE BACK RETURN,MAIL,"" -135,108205,8206,1,0.08,N,O,1996-02-18,1996-01-01,1996-02-25,COLLECT COD,RAIL,"" -135,198344,5902,2,0.07,N,O,1996-02-11,1996-01-12,1996-02-13,DELIVER IN PERSON,SHIP,"" -135,157510,5056,3,0.00,N,O,1996-01-03,1995-11-21,1996-02-01,TAKE BACK RETURN,MAIL,"" -135,67005,9512,4,0.03,N,O,1996-01-12,1996-01-19,1996-02-05,NONE,TRUCK,"" -135,136248,1275,5,0.04,N,O,1996-01-25,1995-11-20,1996-02-09,NONE,MAIL,"" -135,115000,2534,6,0.02,N,O,1995-11-12,1995-12-22,1995-11-17,NONE,FOB,"" -551,23786,6289,1,0.02,N,O,1995-07-29,1995-07-18,1995-08-02,NONE,REG AIR,"" -551,158813,3844,2,0.07,N,O,1995-09-18,1995-08-25,1995-10-11,COLLECT COD,TRUCK,"" -551,161089,6122,3,0.06,N,O,1995-07-29,1995-08-19,1995-08-10,COLLECT COD,MAIL,"" -930,44804,2317,1,0.04,R,F,1994-12-21,1995-02-20,1994-12-24,COLLECT COD,RAIL,"" -930,17295,4799,2,0.00,A,F,1995-03-20,1995-02-04,1995-04-04,DELIVER IN PERSON,AIR,"" -930,64230,1749,3,0.08,A,F,1994-12-18,1995-01-27,1995-01-16,COLLECT COD,AIR,"" -930,99635,2145,4,0.02,A,F,1995-02-16,1995-03-03,1995-03-13,DELIVER IN PERSON,SHIP,"" -930,163239,788,5,0.06,A,F,1995-04-03,1995-01-29,1995-04-22,COLLECT COD,MAIL,"" -930,144557,2100,6,0.04,A,F,1995-02-09,1995-02-17,1995-02-16,NONE,SHIP,"" -930,166196,1229,7,0.08,R,F,1995-01-20,1995-02-28,1995-02-04,TAKE BACK RETURN,RAIL,"" -2191970,2425,4926,1,0.01,A,F,1994-10-13,1994-10-06,1994-10-28,TAKE BACK RETURN,RAIL,"" -2191970,145574,5575,2,0.04,A,F,1994-11-05,1994-10-15,1994-11-12,TAKE BACK RETURN,REG AIR,"" -2191970,48791,6304,3,0.00,A,F,1994-10-03,1994-11-16,1994-10-06,COLLECT COD,MAIL,"" -2191970,27964,5471,4,0.01,R,F,1994-09-24,1994-10-08,1994-09-30,TAKE BACK RETURN,FOB,"" -341792,2425,2426,1,0.05,N,O,1995-10-13,1995-10-24,1995-10-27,DELIVER IN PERSON,AIR,"" -341792,141675,4190,2,0.06,N,O,1995-10-22,1995-10-29,1995-11-15,NONE,SHIP,"" -341792,116665,9177,3,0.00,N,O,1995-12-19,1995-10-29,1996-01-07,DELIVER IN PERSON,RAIL,"" -341792,185393,7912,4,0.07,N,O,1995-10-30,1995-10-28,1995-11-12,TAKE BACK RETURN,MAIL,"" -391,121586,6611,1,0.02,R,F,1995-02-11,1995-02-03,1995-02-13,TAKE BACK RETURN,TRUCK,"" -4636866,26435,3942,1,0.06,N,O,1998-04-16,1998-03-17,1998-05-07,NONE,FOB,"" -4636866,79753,2261,2,0.05,N,O,1998-03-24,1998-04-05,1998-04-08,TAKE BACK RETURN,FOB,"" -4636866,2425,2426,3,0.00,N,O,1998-04-08,1998-04-14,1998-05-06,NONE,MAIL,"" -4636866,153295,8326,4,0.03,N,O,1998-02-13,1998-04-07,1998-02-24,NONE,FOB,"" -4636866,23973,1480,5,0.01,N,O,1998-03-26,1998-03-23,1998-04-12,COLLECT COD,AIR,"" -4636866,108175,686,6,0.02,N,O,1998-02-19,1998-04-11,1998-03-13,NONE,FOB,"" -291,122565,102,1,0.07,A,F,1994-05-26,1994-05-10,1994-06-23,COLLECT COD,TRUCK,"" -291,137316,7317,2,0.02,R,F,1994-06-14,1994-04-25,1994-06-19,NONE,REG AIR,"" -291,60874,5887,3,0.02,R,F,1994-03-22,1994-04-30,1994-03-24,DELIVER IN PERSON,FOB,"" -132,140449,2964,1,0.08,R,F,1993-07-10,1993-08-05,1993-07-13,NONE,TRUCK,"" -132,119053,9054,2,0.08,R,F,1993-09-01,1993-08-16,1993-09-22,NONE,TRUCK,"" -132,114419,4420,3,0.04,A,F,1993-07-12,1993-08-05,1993-08-05,COLLECT COD,TRUCK,"" -132,28082,5589,4,0.00,A,F,1993-06-16,1993-08-27,1993-06-23,DELIVER IN PERSON,AIR,"" -2549888,173333,3334,1,0.02,A,F,1993-11-23,1993-11-20,1993-12-01,TAKE BACK RETURN,RAIL,"" \ No newline at end of file diff --git a/Community-Supported/query-external-data/orders.parquet b/Community-Supported/query-external-data/orders.parquet new file mode 100644 index 0000000000000000000000000000000000000000..529bd7480f786f62a7d19860ccde7566d1ddb460 GIT binary patch literal 962 zcmZ9LO=uHA6vy9ecH7A|iOJF#vam>zX01@AHEC_}D?*ED30nK1Ai)^Zg+%k^HW7hH7O;C%}HeQ|Vy!-!+Z96R0_F(zyI!M_q6{3N&&x-jR0v|WO>O~O8qmNn3{-W(Fe0TF$BIS(2k1_U)j4PVwm z0}zWv$H$K@%#FqT-7un28jFo;R2?~?b;Gb`)5(RXpPm|()a1f=b7iXeJkgU|t~l;o zHs=&GEC5W2K-gP$7acc!%UN#hec9SlwOnjJqWP**Y!Ess=bNC)?pVY0>*NNb)=3ckMc9M^_aK!!t1d)hE%#DuyIWmZa1<|YcdK%8RRUu*l z)U-M!HIEd&MeJ==1_(%&Kl5_Kru<763`FNMHwsRs)|U`h+?P(*N_QN0DPJx*i)nf7 S>GZK7`M5*PAtTP<-}@INld#qR literal 0 HcmV?d00001 diff --git a/Community-Supported/query-external-data/orders_10rows.parquet b/Community-Supported/query-external-data/orders_10rows.parquet deleted file mode 100644 index 5f3477d37b6813b43ca42388f40361babf755901..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2936 zcmbtWZ)jUp6u&P`+BB_gw&lLPH;X6po=$H;=A4CMF7KEXGn8h!Oig@nLd&ygz4CYJn z-o59Z^Si%u?m6dWV)L#9ZpA}*Xc9*-f?o*k2%#4^f5_(xz=&S0wMYF9CO>lUy@%16 zOIKTtettVBg7V2%Zd|$Z#;dbOeF!Z-{OwQs?JxfL;geIt{Gac?RLAM4iqQgbMoyMe*-J<9IW=8U4zWvyWG)Z3uyC= z^XP-$t7zY9i>rLJf^Hsrnp?ZF5ly^)7BFGMCQN@IoZNHp6u`jpVSLzK_C)i-aqgE@ zouF|ofW|lW{~cerwp+?}fZ#qCaD5&=?|uK}r`!-dAB5<+qzA^v_XlB+7}&dO+wjgk z-7roK>>S@C?VcFkGCDFkyp^6?a$x)Dw(V4L)4-15t)qK)0L3s4yA++Mv;=~v{5a#k z%;Dww(RMZR0Ug8d`;cZlHEAQ#7(z~X&H9GQuT@{nHnKgsc0Lj zZkyViB$xF{Ns@~CA!5d)Dyb+c%;iEsw=0$@$(CeI6Ib3ZnW{!AmL!{V<*KDy6rreA zWUXP5*LBm1Nd?&;MO%Y)gOqGdHY8OcvI+9%bd5BYW0FFux~W>QJD_>=XJx~h)=REQ z%mwOq2b7tv=$dK~AX+f=1GZY2(dJ@OUe}a5uH!NT2M^`Kj9dZ(+a$FOOC^oK-c?di zWv$jh|PcCo0|7u;YW``s6n@&MIpe7++)(+s3}%DqTrFR z+VUvVnxypT!z0c{MdDn0`gkz3)z5`!ur(@Xat?jM4p%BBwt6|!UgpfYP1T$o=CTQ2E3btvn zc_%15%L>%Eij^G%IR$RPBXc!zE(g|O* zLIEM21wYMg23VhY0k4Xf=YM5@M_PJtr`U_aVpDV(tj256BP~N2)W?kr+z1l;A);_L zw3UY$x34MG0>ARv$ZYza{x{+sig*=MZx;iuyWYcN zc#w0894LUtv;qY`El28cW9Iev$T}?WD-85{Yu)4Kqi4vsIQ|wFz1~Wh)=F5Y7xg{p zo?ZuZ&+s@4bm6o+MTkK?Zx|rd!MqO^DPVKKdpiK@2{qDPEVdUY>92VFEQ($T(MYgk zYRW&h#UwuoDi8}uYKR5B(^xcS_aOcmC4PP_jq`kmOT3ZC3>RwkVmmBT>i2IS!UH!a zuh+}e>+ja$n;haVNB9V=39DwIGKmo@>k83g6}e1=HgZTVS2co!XR5LR?;>lUY)A*8 zoxq!4O2iWImHP6w3NOdOOggWm3(8 DESCRIPTOR(l_orderkey int, l_partkey int, l_suppkey int, l_linenumber int, l_quantity float, - l_returnflag text, l_linestatus text, l_shipdate date, l_commitdate date, l_receiptdate date, - l_shipinstruct text, l_shipmode text, l_comment text), - DELIMITER => ',', FORMAT => 'csv', HEADER => false) - on o_orderkey = l_orderkey GROUP BY l_partkey - ORDER BY l_partkey""" + command_2 = """SELECT country, SUM(quantity * price) + FROM external('orders.parquet') orders + join external('customers.csv', + COLUMNS => DESCRIPTOR(customer_key int, country text, street text, nr int), + DELIMITER => ',', FORMAT => 'csv', HEADER => false) customers + on orders.customer_key = customers.customer_key GROUP BY country + ORDER BY country""" print("result:") print_list(connection.execute_list_query(command_2)) print() @@ -74,12 +71,10 @@ def run_hyper_query_external(): # Note that, for CSV files, the schema of the file has to be provided and currently cannot be inferred form the file directly. # (see the `DESCRIPTOR` argument below). command_3 = """SELECT * - FROM external(ARRAY['lineitem.csv','lineitem_2.csv'], - COLUMNS => DESCRIPTOR(l_orderkey int, l_partkey int, l_suppkey int, l_linenumber int, l_quantity float, - l_returnflag text, l_linestatus text, l_shipdate date, l_commitdate date, l_receiptdate date, - l_shipinstruct text, l_shipmode text, l_comment text), + FROM external(ARRAY['customers.csv','customers.csv'], + COLUMNS => DESCRIPTOR(customer_key int, country text, street text, nr int), DELIMITER => ',', FORMAT => 'csv', HEADER => false) - ORDER BY l_orderkey""" + ORDER BY country""" print("result:") print_list(connection.execute_list_query(command_3))