Skip to content

Commit

Permalink
Add TPC-DS benchmark descriptor
Browse files Browse the repository at this point in the history
  • Loading branch information
findepi authored and Igor Demura committed Mar 29, 2017
1 parent aa97929 commit 2704e37
Show file tree
Hide file tree
Showing 105 changed files with 5,758 additions and 0 deletions.
49 changes: 49 additions & 0 deletions presto-benchto-benchmarks/presto-generate-tpcds-schemas.py
@@ -0,0 +1,49 @@
#!/usr/bin/env python

schemas = [
# (new_schema, source_schema)
('tpcds_10gb_orc', 'tpch.sf10'),
('tpcds_100gb_orc', 'tpch.sf100'),
('tpcds_1tb_orc', 'tpch.sf1000'),
]

tables = [
'call_center',
'catalog_page',
'catalog_returns',
'catalog_sales',
'customer',
'customer_address',
'customer_demographics',
'date_dim',
'household_demographics ',
'income_band',
'inventory',
'item',
'promotion',
'reason',
'ship_mode',
'store',
'store_returns',
'store_sales',
'time_dim',
'warehouse',
'web_page',
'web_returns',
'web_sales',
'web_site',
]

for (new_schema, source_schema) in schemas:

if new_schema.endswith('_orc'):
format = 'ORC'
elif new_schema.endswith('_text'):
format = 'TEXTFILE'
else:
raise ValueError(new_schema)

print 'CREATE SCHEMA hive.%s;' % (new_schema,)
for table in tables:
print 'CREATE TABLE "hive"."%s"."%s" WITH (format = \'%s\') AS SELECT * FROM %s."%s";' % \
(new_schema, table, format, source_schema, table)
@@ -0,0 +1,30 @@
datasource: presto
query-names: presto/tpcds/${query}.sql
runs: 6
prewarm-runs: 2
before-execution: sleep-4s, presto/session_set_reorder_joins.sql
frequency: 7
database: hive
variables:
1:
query: q01,q06,q14_1,q39_1,q39_2,q47,q57,q67,q81
schema: tpcds_10gb_orc
reorder_joins: true, false
2:
query: q02,q03,q04,q05,q07,q09,q10,q11,q13,q14_2,q16,q17,q19,q22,q23_1,q23_2,q24_1,q24_2,q25,q28,q29,q30,q31,q32,q33,q35,q37,q38,q42,q43,q44,q46,q48,q49,q50,q51,q52,q53,q54,q55,q56,q58,q59,q60,q61,q63,q65,q66,q68,q69,q70,q71,q72,q74,q75,q77,q78,q80,q82,q88,q89,q94,q95
schema: tpcds_100gb_orc
reorder_joins: true, false
3:
# query not passing quick enough without reordering
query: q18,q64
schema: tpcds_100gb_orc
reorder_joins: true
4:
query: q08,q12,q15,q20,q21,q26,q27,q34,q36,q40,q41,q45,q62,q73,q76,q79,q83,q84,q85,q86,q87,q90,q91,q92,q93,q96,q97,q98,q99
schema: tpcds_1tb_orc
reorder_joins: true, false
5:
# extra runs with reordering on 1tb schema (too slow without reordering on 1tb). For 100g we keep both runs, with and without reordering
query: q03,q37,q42,q43,q52,q53
schema: tpcds_1tb_orc
reorder_joins: true
@@ -0,0 +1,29 @@
WITH
customer_total_return AS (
SELECT
"sr_customer_sk" "ctr_customer_sk"
, "sr_store_sk" "ctr_store_sk"
, "sum"("sr_return_amt") "ctr_total_return"
FROM
${database}.${schema}.store_returns
, ${database}.${schema}.date_dim
WHERE ("sr_returned_date_sk" = "d_date_sk")
AND ("d_year" = 2000)
GROUP BY "sr_customer_sk", "sr_store_sk"
)
SELECT "c_customer_id"
FROM
customer_total_return ctr1
, ${database}.${schema}.store
, ${database}.${schema}.customer
WHERE ("ctr1"."ctr_total_return" > (
SELECT ("avg"("ctr_total_return") * DECIMAL '1.2')
FROM
customer_total_return ctr2
WHERE ("ctr1"."ctr_store_sk" = "ctr2"."ctr_store_sk")
))
AND ("s_store_sk" = "ctr1"."ctr_store_sk")
AND ("s_state" = 'TN')
AND ("ctr1"."ctr_customer_sk" = "c_customer_sk")
ORDER BY "c_customer_id" ASC
LIMIT 100
@@ -0,0 +1,80 @@
WITH
wscs AS (
SELECT
"sold_date_sk"
, "sales_price"
FROM
(
SELECT
"ws_sold_date_sk" "sold_date_sk"
, "ws_ext_sales_price" "sales_price"
FROM
${database}.${schema}.web_sales
)
UNION ALL (
SELECT
"cs_sold_date_sk" "sold_date_sk"
, "cs_ext_sales_price" "sales_price"
FROM
${database}.${schema}.catalog_sales
) )
, wswscs AS (
SELECT
"d_week_seq"
, "sum"((CASE WHEN ("d_day_name" = 'Sunday ') THEN "sales_price" ELSE null END)) "sun_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Monday ') THEN "sales_price" ELSE null END)) "mon_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Tuesday ') THEN "sales_price" ELSE null END)) "tue_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Wednesday') THEN "sales_price" ELSE null END)) "wed_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Thursday ') THEN "sales_price" ELSE null END)) "thu_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Friday ') THEN "sales_price" ELSE null END)) "fri_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Saturday ') THEN "sales_price" ELSE null END)) "sat_sales"
FROM
wscs
, ${database}.${schema}.date_dim
WHERE ("d_date_sk" = "sold_date_sk")
GROUP BY "d_week_seq"
)
SELECT
"d_week_seq1"
, "round"(("sun_sales1" / "sun_sales2"), 2)
, "round"(("mon_sales1" / "mon_sales2"), 2)
, "round"(("tue_sales1" / "tue_sales2"), 2)
, "round"(("wed_sales1" / "wed_sales2"), 2)
, "round"(("thu_sales1" / "thu_sales2"), 2)
, "round"(("fri_sales1" / "fri_sales2"), 2)
, "round"(("sat_sales1" / "sat_sales2"), 2)
FROM
(
SELECT
"wswscs"."d_week_seq" "d_week_seq1"
, "sun_sales" "sun_sales1"
, "mon_sales" "mon_sales1"
, "tue_sales" "tue_sales1"
, "wed_sales" "wed_sales1"
, "thu_sales" "thu_sales1"
, "fri_sales" "fri_sales1"
, "sat_sales" "sat_sales1"
FROM
wswscs
, ${database}.${schema}.date_dim
WHERE ("date_dim"."d_week_seq" = "wswscs"."d_week_seq")
AND ("d_year" = 2001)
) y
, (
SELECT
"wswscs"."d_week_seq" "d_week_seq2"
, "sun_sales" "sun_sales2"
, "mon_sales" "mon_sales2"
, "tue_sales" "tue_sales2"
, "wed_sales" "wed_sales2"
, "thu_sales" "thu_sales2"
, "fri_sales" "fri_sales2"
, "sat_sales" "sat_sales2"
FROM
wswscs
, ${database}.${schema}.date_dim
WHERE ("date_dim"."d_week_seq" = "wswscs"."d_week_seq")
AND ("d_year" = (2001 + 1))
) z
WHERE ("d_week_seq1" = ("d_week_seq2" - 53))
ORDER BY "d_week_seq1" ASC
@@ -0,0 +1,16 @@
SELECT
"dt"."d_year"
, "item"."i_brand_id" "brand_id"
, "item"."i_brand" "brand"
, "sum"("ss_ext_sales_price") "sum_agg"
FROM
${database}.${schema}.date_dim dt
, ${database}.${schema}.store_sales
, ${database}.${schema}.item
WHERE ("dt"."d_date_sk" = "store_sales"."ss_sold_date_sk")
AND ("store_sales"."ss_item_sk" = "item"."i_item_sk")
AND ("item"."i_manufact_id" = 128)
AND ("dt"."d_moy" = 11)
GROUP BY "dt"."d_year", "item"."i_brand", "item"."i_brand_id"
ORDER BY "dt"."d_year" ASC, "sum_agg" DESC, "brand_id" ASC
LIMIT 100
@@ -0,0 +1,93 @@
WITH
year_total AS (
SELECT
"c_customer_id" "customer_id"
, "c_first_name" "customer_first_name"
, "c_last_name" "customer_last_name"
, "c_preferred_cust_flag" "customer_preferred_cust_flag"
, "c_birth_country" "customer_birth_country"
, "c_login" "customer_login"
, "c_email_address" "customer_email_address"
, "d_year" "dyear"
, "sum"((((("ss_ext_list_price" - "ss_ext_wholesale_cost") - "ss_ext_discount_amt") + "ss_ext_sales_price") / 2)) "year_total"
, 's' "sale_type"
FROM
${database}.${schema}.customer
, ${database}.${schema}.store_sales
, ${database}.${schema}.date_dim
WHERE ("c_customer_sk" = "ss_customer_sk")
AND ("ss_sold_date_sk" = "d_date_sk")
GROUP BY "c_customer_id", "c_first_name", "c_last_name", "c_preferred_cust_flag", "c_birth_country", "c_login", "c_email_address", "d_year"
UNION ALL SELECT
"c_customer_id" "customer_id"
, "c_first_name" "customer_first_name"
, "c_last_name" "customer_last_name"
, "c_preferred_cust_flag" "customer_preferred_cust_flag"
, "c_birth_country" "customer_birth_country"
, "c_login" "customer_login"
, "c_email_address" "customer_email_address"
, "d_year" "dyear"
, "sum"((((("cs_ext_list_price" - "cs_ext_wholesale_cost") - "cs_ext_discount_amt") + "cs_ext_sales_price") / 2)) "year_total"
, 'c' "sale_type"
FROM
${database}.${schema}.customer
, ${database}.${schema}.catalog_sales
, ${database}.${schema}.date_dim
WHERE ("c_customer_sk" = "cs_bill_customer_sk")
AND ("cs_sold_date_sk" = "d_date_sk")
GROUP BY "c_customer_id", "c_first_name", "c_last_name", "c_preferred_cust_flag", "c_birth_country", "c_login", "c_email_address", "d_year"
UNION ALL SELECT
"c_customer_id" "customer_id"
, "c_first_name" "customer_first_name"
, "c_last_name" "customer_last_name"
, "c_preferred_cust_flag" "customer_preferred_cust_flag"
, "c_birth_country" "customer_birth_country"
, "c_login" "customer_login"
, "c_email_address" "customer_email_address"
, "d_year" "dyear"
, "sum"((((("ws_ext_list_price" - "ws_ext_wholesale_cost") - "ws_ext_discount_amt") + "ws_ext_sales_price") / 2)) "year_total"
, 'w' "sale_type"
FROM
${database}.${schema}.customer
, ${database}.${schema}.web_sales
, ${database}.${schema}.date_dim
WHERE ("c_customer_sk" = "ws_bill_customer_sk")
AND ("ws_sold_date_sk" = "d_date_sk")
GROUP BY "c_customer_id", "c_first_name", "c_last_name", "c_preferred_cust_flag", "c_birth_country", "c_login", "c_email_address", "d_year"
)
SELECT
"t_s_secyear"."customer_id"
, "t_s_secyear"."customer_first_name"
, "t_s_secyear"."customer_last_name"
, "t_s_secyear"."customer_preferred_cust_flag"
FROM
year_total t_s_firstyear
, year_total t_s_secyear
, year_total t_c_firstyear
, year_total t_c_secyear
, year_total t_w_firstyear
, year_total t_w_secyear
WHERE ("t_s_secyear"."customer_id" = "t_s_firstyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_c_secyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_c_firstyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_w_firstyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id")
AND ("t_s_firstyear"."sale_type" = 's')
AND ("t_c_firstyear"."sale_type" = 'c')
AND ("t_w_firstyear"."sale_type" = 'w')
AND ("t_s_secyear"."sale_type" = 's')
AND ("t_c_secyear"."sale_type" = 'c')
AND ("t_w_secyear"."sale_type" = 'w')
AND ("t_s_firstyear"."dyear" = 2001)
AND ("t_s_secyear"."dyear" = (2001 + 1))
AND ("t_c_firstyear"."dyear" = 2001)
AND ("t_c_secyear"."dyear" = (2001 + 1))
AND ("t_w_firstyear"."dyear" = 2001)
AND ("t_w_secyear"."dyear" = (2001 + 1))
AND ("t_s_firstyear"."year_total" > 0)
AND ("t_c_firstyear"."year_total" > 0)
AND ("t_w_firstyear"."year_total" > 0)
AND ((CASE WHEN ("t_c_firstyear"."year_total" > 0) THEN ("t_c_secyear"."year_total" / "t_c_firstyear"."year_total") ELSE null END) > (CASE WHEN ("t_s_firstyear"."year_total" > 0) THEN ("t_s_secyear"."year_total" / "t_s_firstyear"."year_total") ELSE null END))
AND ((CASE WHEN ("t_c_firstyear"."year_total" > 0) THEN ("t_c_secyear"."year_total" / "t_c_firstyear"."year_total") ELSE null END) > (CASE WHEN ("t_w_firstyear"."year_total" > 0) THEN ("t_w_secyear"."year_total" / "t_w_firstyear"."year_total") ELSE null END))
ORDER BY "t_s_secyear"."customer_id" ASC, "t_s_secyear"."customer_first_name" ASC, "t_s_secyear"."customer_last_name" ASC, "t_s_secyear"."customer_preferred_cust_flag" ASC
LIMIT 100

0 comments on commit 2704e37

Please sign in to comment.