In [1]:
%%HTML
<link rel="stylesheet" href="https://doc.splicemachine.com/jupyter/css/custom.css">

# Running the TPCH-100 Benchmark Queries

This notebook takes you through the following phases using the TPCH 100 data set (see http://www.tpc.org):

*1. Creating the Tables*
*2. Importing TPCH-100 Data From S3*
*3. Creating Indexes*
*4. Compacting and Collecting Statistics*
*5. Running TPCH-100 Queries*

We also present some graphical output from the queries at the bottom of this notebook.

<p class="noteIcon">The code paragraphs in this tutorial all use the Jupyter <em>%%SQL</em> magic to interact with Splice Machine.</p>

## About TPCH Data
The TPC-H (aka *TPCH*) benchmark is a suite of business-oriented, ad-hoc queries and concurrent data modifications using SQL with a database. This benchmark illustrates decision support systems that examine large volumes of data, execute queries with a high degree of complexity, and give answers to critical business questions.

### The TPCH Schema
Here's a view of the TPC-H schema:

<img class="fit3qtrwidth" src="https://s3.amazonaws.com/splice-examples/images/tutorials/sample-data-tpch-schema.png">

## 1. Creating the Tables

Run the next cell to create the `TPCH100` schema and tables in your database:

In [None]:
%%sql 
CREATE SCHEMA TPCH100;

CREATE TABLE TPCH100.LINEITEM (
 L_ORDERKEY BIGINT NOT NULL,
 L_PARTKEY INTEGER NOT NULL,
 L_SUPPKEY INTEGER NOT NULL, 
 L_LINENUMBER INTEGER NOT NULL, 
 L_QUANTITY DECIMAL(15,2),
 L_EXTENDEDPRICE DECIMAL(15,2),
 L_DISCOUNT DECIMAL(15,2),
 L_TAX DECIMAL(15,2),
 L_RETURNFLAG VARCHAR(1), 
 L_LINESTATUS VARCHAR(1),
 L_SHIPDATE DATE,
 L_COMMITDATE DATE,
 L_RECEIPTDATE DATE,
 L_SHIPINSTRUCT VARCHAR(25),
 L_SHIPMODE VARCHAR(10),
 L_COMMENT VARCHAR(44),
 PRIMARY KEY(L_ORDERKEY,L_LINENUMBER)
 );
 
 CREATE TABLE TPCH100.ORDERS (
 O_ORDERKEY BIGINT NOT NULL PRIMARY KEY,
 O_CUSTKEY INTEGER,
 O_ORDERSTATUS VARCHAR(1),
 O_TOTALPRICE DECIMAL(15,2),
 O_ORDERDATE DATE,
 O_ORDERPRIORITY VARCHAR(15),
 O_CLERK VARCHAR(15),
 O_SHIPPRIORITY INTEGER ,
 O_COMMENT VARCHAR(79)
 );
 
 CREATE TABLE TPCH100.CUSTOMER (
 C_CUSTKEY INTEGER NOT NULL PRIMARY KEY,
 C_NAME VARCHAR(25),
 C_ADDRESS VARCHAR(40),
 C_NATIONKEY INTEGER NOT NULL,
 C_PHONE VARCHAR(15),
 C_ACCTBAL DECIMAL(15,2),
 C_MKTSEGMENT VARCHAR(10),
 C_COMMENT VARCHAR(117)
 );
 
 CREATE TABLE TPCH100.PARTSUPP (
 PS_PARTKEY INTEGER NOT NULL ,
 PS_SUPPKEY INTEGER NOT NULL , 
 PS_AVAILQTY INTEGER,
 PS_SUPPLYCOST DECIMAL(15,2),
 PS_COMMENT VARCHAR(199),
 PRIMARY KEY(PS_PARTKEY,PS_SUPPKEY) 
 );
 
 CREATE TABLE TPCH100.SUPPLIER (
 S_SUPPKEY INTEGER NOT NULL PRIMARY KEY,
 S_NAME VARCHAR(25) ,
 S_ADDRESS VARCHAR(40) ,
 S_NATIONKEY INTEGER ,
 S_PHONE VARCHAR(15) ,
 S_ACCTBAL DECIMAL(15,2),
 S_COMMENT VARCHAR(101)
 );
 
 CREATE TABLE TPCH100.PART (
 P_PARTKEY INTEGER NOT NULL PRIMARY KEY,
 P_NAME VARCHAR(55) ,
 P_MFGR VARCHAR(25) ,
 P_BRAND VARCHAR(10) ,
 P_TYPE VARCHAR(25) ,
 P_SIZE INTEGER ,
 P_CONTAINER VARCHAR(10) ,
 P_RETAILPRICE DECIMAL(15,2),
 P_COMMENT VARCHAR(23)
 );
 
 CREATE TABLE TPCH100.REGION (
 R_REGIONKEY INTEGER NOT NULL PRIMARY KEY,
 R_NAME VARCHAR(25),
 R_COMMENT VARCHAR(152)
 );
 
 CREATE TABLE TPCH100.NATION (
 N_NATIONKEY INTEGER NOT NULL,
 N_NAME VARCHAR(25),
 N_REGIONKEY INTEGER NOT NULL,
 N_COMMENT VARCHAR(152),
 PRIMARY KEY (N_NATIONKEY)
 );

## 2. Importing TPCH-100 Data From S3

We have pre-created flat files with the TPCH-1 data and stored those filesin an S3 bucket, which makes it easy for you to import the data into your Splice Machine database. Run the next cell to import all of the data from those files:

<p class="noteNote">Importing this much data can take a few minutes; you'll see the result of each import displayed below the <code>IMPORT</code> statements as they complete.</p>


In [None]:
%%sql 

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'LINEITEM', null, 's3a://splice-benchmark-data/flat/TPCH/100/lineitem', '|', null, null, null, null, 0, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'ORDERS',   null, 's3a://splice-benchmark-data/flat/TPCH/100/orders',   '|', null, null, null, null, 0, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'CUSTOMER', null, 's3a://splice-benchmark-data/flat/TPCH/100/customer', '|', null, null, null, null, 0, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'PARTSUPP', null, 's3a://splice-benchmark-data/flat/TPCH/100/partsupp', '|', null, null, null, null, 0, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'SUPPLIER', null, 's3a://splice-benchmark-data/flat/TPCH/100/supplier', '|', null, null, null, null, 0, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'PART',     null, 's3a://splice-benchmark-data/flat/TPCH/100/part',     '|', null, null, null, null, 0, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'REGION',   null, 's3a://splice-benchmark-data/flat/TPCH/100/region',   '|', null, null, null, null, 0, '/tmp', true, null);

call SYSCS_UTIL.IMPORT_DATA ('TPCH100', 'NATION',   null, 's3a://splice-benchmark-data/flat/TPCH/100/nation',   '|', null, null, null, null, 0, '/tmp', true, null);

In [None]:
%%sql 
SELECT COUNT(*) AS LINEITEM FROM TPCH100.LINEITEM --SPLICE-PROPERTIES index=null
;
SELECT COUNT(*) AS ORDERS FROM TPCH100.ORDERS --SPLICE-PROPERTIES index=null
;
SELECT COUNT(*) AS CUSTOMER FROM TPCH100.CUSTOMER --SPLICE-PROPERTIES index=null
;
SELECT COUNT(*) AS PARTSUPP FROM TPCH100.PARTSUPP --SPLICE-PROPERTIES index=null
;
SELECT COUNT(*) AS SUPPLIER FROM TPCH100.SUPPLIER --SPLICE-PROPERTIES index=null
;
SELECT COUNT(*) AS PART FROM TPCH100.PART --SPLICE-PROPERTIES index=null
;
SELECT COUNT(*) AS REGION FROM TPCH100.REGION --SPLICE-PROPERTIES index=null
;
SELECT COUNT(*) AS NATION FROM TPCH100.NATION --SPLICE-PROPERTIES index=null
;

## 3. Creating Indexes

Next, run the following 2 cell to create and verify indexes.

In [None]:
%%sql 
create index TPCH100.O_CUST_IDX on TPCH100.ORDERS(
 O_CUSTKEY,
 O_ORDERKEY
 );
 
 create index TPCH100.O_DATE_PRI_KEY_IDX on TPCH100.ORDERS(
 O_ORDERDATE,
 O_ORDERPRIORITY,
 O_ORDERKEY
 );
 
 create index TPCH100.L_SHIPDATE_IDX on TPCH100.LINEITEM(
 L_SHIPDATE,
 L_PARTKEY,
 L_EXTENDEDPRICE,
 L_DISCOUNT
 );
 
 create index TPCH100.L_PART_IDX on TPCH100.LINEITEM(
 L_PARTKEY,
 L_ORDERKEY,
 L_SUPPKEY,
 L_SHIPDATE,
 L_EXTENDEDPRICE,
 L_DISCOUNT,
 L_QUANTITY,
 L_SHIPMODE,
 L_SHIPINSTRUCT
 );

In [None]:
%%sql 
SELECT COUNT(*) AS ORDERS FROM TPCH100.ORDERS --SPLICE-PROPERTIES index=O_CUST_IDX
;
SELECT COUNT(*) AS ORDERS FROM TPCH100.ORDERS --SPLICE-PROPERTIES index=O_DATE_PRI_KEY_IDX
;
SELECT COUNT(*) AS LINEITEM FROM TPCH100.LINEITEM --SPLICE-PROPERTIES index=L_SHIPDATE_IDX
;
SELECT COUNT(*) AS LINEITEM FROM TPCH100.LINEITEM --SPLICE-PROPERTIES index=L_PART_IDX
;

## 4. Compacting and Collecting Statistics

Since you've just imported a large amount of data into your database, it's a good idea to run a major compaction and collect statistics.

To do so, run each of the next two cells:

In [None]:
%%sql 
call SYSCS_UTIL.SYSCS_PERFORM_MAJOR_COMPACTION_ON_SCHEMA('TPCH100');

In [None]:
%%sql 
analyze schema TPCH100;

## 4. Running the TPCH-100 Queries

We'll now run a sampling of the TPCH queries. The full set is available to run with our Jupyter Notebook deployment for your cluster, or you can contact us, and we''ll provide them to you. The full set is configured with to `explain` each query; you can then remove the `explain` in front of each query to actually run it. We also recommend that you point a browser tab at `localhost:4040` to monitor the queries in the Spark Console while they're running.

### Query 1

In [None]:
%%sql 
-- QUERY 01
select
	l_returnflag,
	l_linestatus,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
	avg(l_quantity) as avg_qty,
	avg(l_extendedprice) as avg_price,
	avg(l_discount) as avg_disc,
	count(*) as count_order
from
	TPCH100.lineitem
where
	l_shipdate <= date({fn TIMESTAMPADD(SQL_TSI_DAY, -90, cast('1998-12-01 00:00:00' as timestamp))})
group by
	l_returnflag,
	l_linestatus
order by
	l_returnflag,
	l_linestatus
-- END OF QUERY

### Query 2

In [None]:
%%sql 
-- QUERY 02
select
	s_acctbal,
	s_name,
	n_name,
	p_partkey,
	p_mfgr,
	s_address,
	s_phone,
	s_comment
from
	TPCH100.part,
	TPCH100.supplier,
	TPCH100.partsupp,
	TPCH100.nation,
	TPCH100.region
where
	p_partkey = ps_partkey
	and s_suppkey = ps_suppkey
	and p_size = 15
	and p_type like '%BRASS'
	and s_nationkey = n_nationkey
	and n_regionkey = r_regionkey
	and r_name = 'EUROPE'
	and ps_supplycost = (
		select
			min(ps_supplycost)
		from
			TPCH100.partsupp,
			TPCH100.supplier,
			TPCH100.nation,
			TPCH100.region
		where
			p_partkey = ps_partkey
			and s_suppkey = ps_suppkey
			and s_nationkey = n_nationkey
			and n_regionkey = r_regionkey
			and r_name = 'EUROPE'
	)
order by
	s_acctbal desc,
	n_name,
	s_name,
	p_partkey
{limit 100}
-- END OF QUERY

### Query 3

In [None]:
%%sql 
-- QUERY 03
select
	l_orderkey,
	sum(l_extendedprice * (1 - l_discount)) as revenue,
	o_orderdate,
	o_shippriority
from
	TPCH100.customer,
	TPCH100.orders,
	TPCH100.lineitem
where
	c_mktsegment = 'BUILDING' 
	and c_custkey = o_custkey
	and l_orderkey = o_orderkey
	and o_orderdate < date('1995-03-15') 
	and l_shipdate > date('1995-03-15') 
group by
	l_orderkey,
	o_orderdate,
	o_shippriority
order by
	revenue desc,
	o_orderdate 
{limit 10}
-- END OF QUERY

In [None]:
%%sql 
-- QUERY 04
select
	o_orderpriority,
	count(*) as order_count
from
	TPCH100.orders
where
	o_orderdate >= date('1993-07-01')
	and o_orderdate < add_months('1993-07-01',3)
	and exists (
		select
			*
		from
			TPCH100.lineitem
		where
			l_orderkey = o_orderkey
			and l_commitdate < l_receiptdate
	)
group by
	o_orderpriority
order by
	o_orderpriority
-- END OF QUERY

### Query 5

In [None]:
%%sql 
-- QUERY 05
select
	n_name,
	sum(l_extendedprice * (1 - l_discount)) as revenue
from
	TPCH100.customer,
	TPCH100.orders,
	TPCH100.lineitem,
	TPCH100.supplier,
	TPCH100.nation,
	TPCH100.region
where
	c_custkey = o_custkey
	and l_orderkey = o_orderkey
	and l_suppkey = s_suppkey
	and c_nationkey = s_nationkey
	and s_nationkey = n_nationkey
	and n_regionkey = r_regionkey
	and r_name = 'ASIA'
	and o_orderdate >= date('1994-01-01')
	and o_orderdate < date({fn TIMESTAMPADD(SQL_TSI_YEAR, 1, cast('1994-01-01 00:00:00' as timestamp))})
group by
	n_name
order by
	revenue desc
-- END OF QUERY

In [None]:
%%sql 
-- QUERY 06
select
	sum(l_extendedprice * l_discount) as revenue
from
	TPCH100.lineitem
where
	l_shipdate >= date('1994-01-01')
	and l_shipdate < date({fn TIMESTAMPADD(SQL_TSI_YEAR, 1, cast('1994-01-01 00:00:00' as timestamp))})
	and l_discount between .06 - 0.01 and .06 + 0.01
	and l_quantity < 24
-- END OF QUERY

### Query 7

In [None]:
%%sql 
-- QUERY 07
select
	supp_nation,
	cust_nation,
	l_year,
	sum(volume) as revenue
from
	(
		select
			n1.n_name as supp_nation,
			n2.n_name as cust_nation,
			year(l_shipdate) as l_year,
			l_extendedprice * (1 - l_discount) as volume
		from
			TPCH100.supplier,
			TPCH100.lineitem,
			TPCH100.orders,
			TPCH100.customer,
			TPCH100.nation n1,
			TPCH100.nation n2
		where
			s_suppkey = l_suppkey
			and o_orderkey = l_orderkey
			and c_custkey = o_custkey
			and s_nationkey = n1.n_nationkey
			and c_nationkey = n2.n_nationkey
			and (
				(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
				or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
			)
			and l_shipdate between date('1995-01-01') and date('1996-12-31')
	) as shipping
group by
	supp_nation,
	cust_nation,
	l_year
order by
	supp_nation,
	cust_nation,
	l_year
-- END OF QUERY

### Query 8

In [None]:
%%sql 
-- QUERY 08
select
	o_year,
	sum(case
		when nation = 'BRAZIL' then volume
		else 0
	end) / sum(volume) as mkt_share
from
	(
		select
			year(o_orderdate) as o_year,
			l_extendedprice * (1 - l_discount) as volume,
			n2.n_name as nation
		from
			TPCH100.part,
			TPCH100.supplier,
			TPCH100.lineitem,
			TPCH100.orders,
			TPCH100.customer,
			TPCH100.nation n1,
			TPCH100.nation n2,
			TPCH100.region
		where
			p_partkey = l_partkey
			and s_suppkey = l_suppkey
			and l_orderkey = o_orderkey
			and o_custkey = c_custkey
			and c_nationkey = n1.n_nationkey
			and n1.n_regionkey = r_regionkey
			and r_name = 'AMERICA'
			and s_nationkey = n2.n_nationkey
			and o_orderdate between date('1995-01-01') and date('1996-12-31')
			and p_type = 'ECONOMY ANODIZED STEEL'
	) as all_nations
group by
	o_year
order by
	o_year
-- END OF QUERY

### Query 9

In [None]:
%%sql 
-- QUERY 09
select
	nation,
	o_year,
	sum(amount) as sum_profit
from
	(
		select
			n_name as nation,
			year(o_orderdate) as o_year,
			l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
		from
			TPCH100.part,
			TPCH100.supplier,
			TPCH100.lineitem,
			TPCH100.partsupp,
			TPCH100.orders,
			TPCH100.nation
		where
			s_suppkey = l_suppkey
			and ps_suppkey = l_suppkey
			and ps_partkey = l_partkey
			and p_partkey = l_partkey
			and o_orderkey = l_orderkey
			and s_nationkey = n_nationkey
			and p_name like '%green%'
	) as profit
group by
	nation,
	o_year
order by
	nation,
	o_year desc
-- END OF QUERY

### Query 10

In [None]:
%%sql 
-- QUERY 10
select
	c_custkey,
	c_name,
	sum(l_extendedprice * (1 - l_discount)) as revenue,
	c_acctbal,
	n_name,
	c_address,
	c_phone,
	c_comment
from
	TPCH100.customer,
	TPCH100.orders,
	TPCH100.lineitem,
	TPCH100.nation
where
	c_custkey = o_custkey
	and l_orderkey = o_orderkey
	and o_orderdate >= date('1993-10-01')
	and o_orderdate < ADD_MONTHS('1993-10-01',3) 
	and l_returnflag = 'R'
	and c_nationkey = n_nationkey
group by
	c_custkey,
	c_name,
	c_acctbal,
	c_phone,
	n_name,
	c_address,
	c_comment
order by
	revenue desc
{limit 20}
-- END OF QUERY

### Query 11

In [None]:
%%sql 
-- QUERY 11
select
	ps_partkey,
	sum(ps_supplycost * ps_availqty) as value
from
	TPCH100.partsupp,
	TPCH100.supplier,
	TPCH100.nation
where
	ps_suppkey = s_suppkey
	and s_nationkey = n_nationkey
	and n_name = 'GERMANY'
group by
	ps_partkey having
		sum(ps_supplycost * ps_availqty) > (
			select
				sum(ps_supplycost * ps_availqty) * 0.0000010000
			from
				TPCH100.partsupp,
				TPCH100.supplier,
				TPCH100.nation
			where
				ps_suppkey = s_suppkey
				and s_nationkey = n_nationkey
				and n_name = 'GERMANY'
		)
order by
	value desc
-- END OF QUERY

### Query 12

In [None]:
%%sql 
-- QUERY 12
select
	l_shipmode,
	sum(case
		when o_orderpriority = '1-URGENT'
			or o_orderpriority = '2-HIGH'
			then 1
		else 0
	end) as high_line_count,
	sum(case
		when o_orderpriority <> '1-URGENT'
			and o_orderpriority <> '2-HIGH'
			then 1
		else 0
	end) as low_line_count
from
	TPCH100.orders,
	TPCH100.lineitem
where
	o_orderkey = l_orderkey
	and l_shipmode in ('MAIL', 'SHIP')
	and l_commitdate < l_receiptdate
	and l_shipdate < l_commitdate
        and l_receiptdate >= date('1994-01-01')
        and l_receiptdate < date({fn TIMESTAMPADD(SQL_TSI_YEAR, 1, cast('1994-01-01 00:00:00' as timestamp))})
group by
	l_shipmode
order by
	l_shipmode
-- END OF QUERY

### Query 13

In [None]:
%%sql 
-- QUERY 13
select
	c_count,
	count(*) as custdist
from
	(
		select
			c_custkey,
			count(o_orderkey)
		from
			TPCH100.customer left outer join tpch100.orders on
				c_custkey = o_custkey
				and o_comment not like '%special%requests%'
		group by
			c_custkey
	) as c_orders (c_custkey, c_count)
group by
	c_count
order by
	custdist desc,
	c_count desc
-- END OF QUERY

### Query 14

In [None]:
%%sql 
-- QUERY 14
select
	100.00 * sum(case
		when p_type like 'PROMO%'
			then l_extendedprice * (1 - l_discount)
		else 0
	end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
	TPCH100.lineitem,
	TPCH100.part
where
	l_partkey = p_partkey
	and l_shipdate >= date('1995-09-01')
	and l_shipdate < add_months('1995-09-01',1)
-- END OF QUERY

### Query 15

In [None]:
%%sql 
-- QUERY 15
create view TPCH100.revenue0 (supplier_no, total_revenue) as
	select
		l_suppkey,
		sum(l_extendedprice * (1 - l_discount))
	from
		TPCH100.lineitem
	where
		l_shipdate >= date('1996-01-01')
		and l_shipdate < add_months('1996-01-01',3)
	group by
		l_suppkey;
-- END OF QUERY

-- QUERY 15
select
	s_suppkey,
	s_name,
	s_address,
	s_phone,
	total_revenue
from
	TPCH100.supplier,
	TPCH100.revenue0
where
	s_suppkey = supplier_no
	and total_revenue = (
		select
			max(total_revenue)
		from
			TPCH100.revenue0
	)
order by
	s_suppkey;
-- END OF QUERY

-- QUERY 15
drop view TPCH100.revenue0
-- END OF QUERY

### Query 16

In [None]:
%%sql 
-- QUERY 16
select
	p_brand,
	p_type,
	p_size,
	count(distinct ps_suppkey) as supplier_cnt
from
	TPCH100.partsupp,
	TPCH100.part
where
	p_partkey = ps_partkey
	and p_brand <> 'Brand#45'
	and p_type not like 'MEDIUM POLISHED%'
	and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
	and ps_suppkey not in (
		select
			s_suppkey
		from
			TPCH100.supplier
		where
			s_comment like '%Customer%Complaints%'
	)
group by
	p_brand,
	p_type,
	p_size
order by
	supplier_cnt desc,
	p_brand,
	p_type,
	p_size
-- END OF QUERY

### Query 17

In [None]:
%%sql 
-- QUERY 17
select
	sum(l_extendedprice) / 7.0 as avg_yearly
from
	TPCH100.lineitem,
	TPCH100.part
where
	p_partkey = l_partkey
	and p_brand = 'Brand#23'
	and p_container = 'MED BOX'
	and l_quantity < (
		select
			0.2 * avg(l_quantity)
		from
			TPCH100.lineitem
		where
			l_partkey = p_partkey
	)
-- END OF QUERY

### Query 19

In [None]:
%%sql 
-- QUERY 19
select
	sum(l_extendedprice* (1 - l_discount)) as revenue
from
	TPCH100.lineitem,
	TPCH100.part
where
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#12'
		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
		and l_quantity >= 1 and l_quantity <= 1 + 10
		and p_size between 1 and 5
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	)
	or
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#23'
		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
		and l_quantity >= 10 and l_quantity <= 10 + 10
		and p_size between 1 and 10
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	)
	or
	(
		p_partkey = l_partkey
		and p_brand = 'Brand#34'
		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
		and l_quantity >= 20 and l_quantity <= 20 + 10
		and p_size between 1 and 15
		and l_shipmode in ('AIR', 'AIR REG')
		and l_shipinstruct = 'DELIVER IN PERSON'
	)
-- END OF QUERY


### Query 21

In [None]:
%%sql 
-- QUERY 21-var
select
	n_name,
	count(*) as numwait
from
	TPCH100.supplier,
	TPCH100.lineitem l1,
	TPCH100.orders,
	TPCH100.nation
where
	s_suppkey = l1.l_suppkey
	and o_orderkey = l1.l_orderkey
	and o_orderstatus = 'F'
	and l1.l_receiptdate > l1.l_commitdate
	and exists (
		select
			*
		from
			TPCH100.lineitem l2
		where
			l2.l_orderkey = l1.l_orderkey
			and l2.l_suppkey <> l1.l_suppkey
	)
	and not exists (
		select
			*
		from
			TPCH100.lineitem l3
		where
			l3.l_orderkey = l1.l_orderkey
			and l3.l_suppkey <> l1.l_suppkey
			and l3.l_receiptdate > l3.l_commitdate
	)
	and s_nationkey = n_nationkey
group by
	n_name
order by
	numwait desc
{limit 100}
-- END OF QUERY

### Query 22

In [None]:
%%sql 
-- QUERY 22
select
	cntrycode,
	count(*) as numcust,
	sum(c_acctbal) as totacctbal
from
	(
		select
			SUBSTR(c_phone, 1, 2) as cntrycode,
			c_acctbal
		from
			TPCH100.customer
		where
			SUBSTR(c_phone, 1, 2) in
				('13', '31', '23', '29', '30', '18', '17')
			and c_acctbal > (
				select
					avg(c_acctbal)
				from
					TPCH100.customer
				where
					c_acctbal > 0.00
					and SUBSTR(c_phone, 1, 2) in
						('13', '31', '23', '29', '30', '18', '17')
			)
			and not exists (
				select
					*
				from
					TPCH100.orders
				where
					o_custkey = c_custkey
			)
	) as custsale
group by
	cntrycode
order by
	cntrycode
-- END OF QUERY

### Query 1, Variant 1

In [None]:
%%sql 
-- QUERY 01-variant
select
	l_linestatus,
	l_shipdate,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
	count(*) as count_order
from
	TPCH100.lineitem
where
	l_shipdate <= date({fn TIMESTAMPADD(SQL_TSI_DAY, -90, cast('1998-12-01 00:00:00' as timestamp))})
group by
	l_shipdate,
	l_linestatus
order by
    l_shipdate

-- END OF QUERY

### Query 1, Variant 2

In [None]:
%%sql 
-- QUERY 01-variant with variable substitution enabled
select
	l_linestatus,
	l_shipdate,
	sum(l_quantity) as sum_qty,
	sum(l_extendedprice) as sum_base_price,
	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
	count(*) as count_order
from
	TPCH100.lineitem
where
	l_shipdate <= ${maxDate='1992-07-20'}
group by
	l_shipdate,
	l_linestatus
order by
    l_shipdate

-- END OF QUERY

In [None]:
%%sql 
