# TPC-H - Single-Node Bodo - Jupyter Notebook

TPC-H is a decision support benchmark that offers business-oriented ad hoc queries.
More information can be found [here](http://www.tpc.org/tpch)

The queries are originally in SQL format and here they are implemented using the pandas API.

By defaults runs use Bodo. Hence, data is distributed in chunks across processes.

Dataset size is 2GB.

There's a larger dataset available on "s3://bodo-example-data/tpch/s4/" which is 4GB. 


**The Bodo parallel cluster in this example runs within the same Saturn Cloud resource as the notebook.** Thus, to increase the performance of the Bodo cluster you only need to increase the instance size of the Jupyter Server resource it's running on.

**To scale and run your application with multiple nodes you can use [Bodo platform](https://platform.bodo.ai/account/login)**

## Start an IPyParallel cluster

Run the following code in a cell to start an IPyParallel cluster. IPyParallel is used to interactively control a cluster of IPython processes. The variable `n` is used to specify the number of clusters based on the number of CPU cores available (up to 8 in the free Bodo Community Edition).

In [None]:
import ipyparallel as ipp
import psutil

n = min(psutil.cpu_count(logical=False), 8)

# command to create and start the local cluster
rc = ipp.Cluster(engines="mpi", n=n).start_and_connect_sync(activate=True)

The following code imports bodo and verifies that the IPyParallel cluster is set up correctly

In [None]:
%%px
import bodo

print(f"Hello World from rank {bodo.get_rank()}. Total ranks={bodo.get_size()}")

In [None]:
%%px
import bodo
import time
import numpy as np
import pandas as pd

<a id="loading_data"></a>
## Loading data

In this section, we load the data required by the queries in pandas DataFrame.

In [None]:
%%px
@bodo.jit(distributed=["rel"], cache=True)
def load_lineitem(data_folder):
    t1 = time.time()
    file = data_folder + "/lineitem.tbl"
    cols_names = ['L_ORDERKEY' , 'L_PARTKEY', 'L_SUPPKEY', 'L_LINENUMBER', 'L_QUANTITY',
            'L_EXTENDEDPRICE', 'L_DISCOUNT', 'L_TAX', 'L_RETURNFLAG', 'L_LINESTATUS', 'L_SHIPDATE',
            'L_COMMITDATE', 'L_RECEIPTDATE', 'L_SHIPINSTRUCT', 'L_SHIPMODE', 'L_COMMENT']
    cols = {'L_ORDERKEY' : np.int64, 'L_PARTKEY' : np.int64, 'L_SUPPKEY' : np.int64, 'L_LINENUMBER' : np.int64, 'L_QUANTITY' : np.float64,
            'L_EXTENDEDPRICE' : np.float64, 'L_DISCOUNT' : np.float64, 'L_TAX' : np.float64, 'L_RETURNFLAG' : str, 'L_LINESTATUS' : str, 'L_SHIPDATE' : str,
            'L_COMMITDATE' : str, 'L_RECEIPTDATE' : str, 'L_SHIPINSTRUCT' : str, 'L_SHIPMODE' : str, 'L_COMMENT' : str}
    rel = pd.read_csv(file, sep='|', header=None,
        names=cols_names,
        dtype=cols,
        parse_dates=[10, 11, 12]
        )
    print("Lineitem Reading time: ", ((time.time() - t1) * 1000), " (ms)")
    return rel

lineitem = load_lineitem("s3://bodo-examples-data/tpch/s2")
if bodo.get_rank()==0:
    display(lineitem.head())

In [None]:
%%px

@bodo.jit(distributed=["rel"], cache=True)
def load_orders(data_folder):
    t1 = time.time()    
    file = data_folder + "/orders.tbl"
    cols_names = ['O_ORDERKEY', 'O_CUSTKEY', 'O_ORDERSTATUS',
            'O_TOTALPRICE', 'O_ORDERDATE', 'O_ORDERPRIORITY',
            'O_CLERK', 'O_SHIPPRIORITY', 'O_COMMENT']
    cols = {'O_ORDERKEY' : np.int64, 'O_CUSTKEY' : np.int64, 'O_ORDERSTATUS' : str,
            'O_TOTALPRICE' : np.float64, 'O_ORDERDATE' : np.int64, 'O_ORDERPRIORITY' : str,
            'O_CLERK' : str, 'O_SHIPPRIORITY' : np.int64, 'O_COMMENT' : str}
    rel = pd.read_csv(file, sep='|', header=None,
        names=cols_names,
        dtype=cols,
        parse_dates=[4]
        )
    print("Orders Reading time: ", ((time.time() - t1) * 1000), " (ms)")
    return rel
    
orders = load_orders("s3://bodo-examples-data/tpch/s2")
if bodo.get_rank()==0:
    display(orders.head())

In [None]:
%%px

@bodo.jit(distributed=["rel"], cache=True)
def load_customer(data_folder):
    t1 = time.time()
    file = data_folder + "/customer.tbl"
    cols_names = ['C_CUSTKEY', 'C_NAME',
            'C_ADDRESS', 'C_NATIONKEY',
            'C_PHONE', 'C_ACCTBAL',
            'C_MKTSEGMENT', 'C_COMMENT']
    cols = {'C_CUSTKEY' : np.int64, 'C_NAME' : str,
            'C_ADDRESS' : str, 'C_NATIONKEY' : np.int64,
            'C_PHONE' : str, 'C_ACCTBAL' : np.float64,
            'C_MKTSEGMENT' : str, 'C_COMMENT' : str}
    rel = pd.read_csv(file, sep='|', header=None,
        names=cols_names,
        dtype=cols
        )
    print("Customer Reading time: ", ((time.time() - t1) * 1000), " (ms)")
    return rel

customer = load_customer("s3://bodo-examples-data/tpch/s2")
if bodo.get_rank()==0:
    display(customer.head())

In [None]:
%%px

@bodo.jit(distributed=["rel"], cache=True)
def load_nation(data_folder):
    t1 = time.time()
    file = data_folder + "/nation.tbl"
    cols_names = ['N_NATIONKEY', 'N_NAME',
            'N_REGIONKEY', 'N_COMMENT']
    cols = {'N_NATIONKEY' : np.int64, 'N_NAME' : str,
            'N_REGIONKEY' : np.int64, 'N_COMMENT' : str}
    rel = pd.read_csv(file, sep='|', header=None,
        names=cols_names,
        dtype=cols
        )
    print("Nation Reading time: ", ((time.time() - t1) * 1000), " (ms)")
    return rel

nation = load_nation("s3://bodo-examples-data/tpch/s2")
if bodo.get_rank()==0:
    display(nation.head())

In [None]:
%%px

@bodo.jit(distributed=["rel"], cache=True)
def load_supplier(data_folder):
    t1 = time.time()    
    file = data_folder + "/supplier.tbl"
    cols_names = ['S_SUPPKEY', 'S_NAME', 'S_ADDRESS',
            'S_NATIONKEY', 'S_PHONE', 'S_ACCTBAL',
            'S_COMMENT']
    cols = {'S_SUPPKEY' : np.int64, 'S_NAME' : str, 'S_ADDRESS' : str,
            'S_NATIONKEY' : np.int64, 'S_PHONE' : str, 'S_ACCTBAL' : np.float64,
            'S_COMMENT' : str}
    rel = pd.read_csv(file, sep='|', header=None,
        names=cols_names,
        dtype=cols
        )
    print("Supplier Reading time: ", ((time.time() - t1) * 1000), " (ms)")     
    return rel

supplier = load_supplier("s3://bodo-examples-data/tpch/s2")
if bodo.get_rank()==0:
    display(supplier.head())

In [None]:
%%px

@bodo.jit(distributed=["rel"], cache=True)
def load_partsupp(data_folder):
    t1 = time.time()
    file = data_folder + "/partsupp.tbl"
    cols_names = ['PS_PARTKEY', 'PS_SUPPKEY', 'PS_AVAILQTY',
            'PS_SUPPLYCOST', 'PS_COMMENT']
    cols = {'PS_PARTKEY' : np.int64, 'PS_SUPPKEY' : np.int64, 'PS_AVAILQTY' : np.int64,
            'PS_SUPPLYCOST' : np.float64, 'PS_COMMENT' : str}
    rel = pd.read_csv(file, sep='|', header=None,
        names=cols_names,
        dtype=cols
        )
    print("Partsupp Reading time: ", ((time.time() - t1) * 1000), " (ms)")
    return rel

partsupp = load_partsupp("s3://bodo-examples-data/tpch/s2")
if bodo.get_rank()==0:
    display(partsupp.head())

In [None]:
%%px

@bodo.jit(distributed=["rel"], cache=True)
def load_part(data_folder):
    t1 = time.time()
    file = data_folder + "/part.tbl"
    cols_names = ['P_PARTKEY', 'P_NAME', 'P_MFGR', 'P_BRAND',
            'P_TYPE', 'P_SIZE', 'P_CONTAINER',
            'P_RETAILPRICE', 'P_COMMENT']
    cols = {'P_PARTKEY' : np.int64, 'P_NAME' : str, 'P_MFGR' : str, 'P_BRAND' : str,
            'P_TYPE' : str, 'P_SIZE' : np.int64, 'P_CONTAINER' : str,
            'P_RETAILPRICE' : np.float64, 'P_COMMENT' : str}
    rel = pd.read_csv(file, sep='|', header=None,
        names=cols_names,
        dtype=cols
        )
    print("Part Reading time: ", ((time.time() - t1) * 1000), " (ms)")
    return rel

part = load_part("s3://bodo-examples-data/tpch/s2")
if bodo.get_rank()==0:
    display(part.head())

## Query Definitions

This section includes some of the queries using Python (Pandas)

### Q1: Pricing Summary Report Query
This query reports the amount of businesses that were billed, shipped, and returned.

Make sure you have run **`load_lineitem`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q1(lineitem):
    t1 = time.time()
    sel = lineitem.L_SHIPDATE <= "1998-09-02"
    flineitem = lineitem[sel]
    flineitem["DISC_PRICE"] = flineitem.L_EXTENDEDPRICE * (1 - flineitem.L_DISCOUNT)
    flineitem["CHARGE"] = (
        flineitem.L_EXTENDEDPRICE * (1 - flineitem.L_DISCOUNT) * (1 + flineitem.L_TAX)
    )
    gb = flineitem.groupby(["L_RETURNFLAG", "L_LINESTATUS"], as_index=False)
    total = gb.agg({"L_QUANTITY": ["sum", "mean"], "L_EXTENDEDPRICE": ["sum", "mean"],
                   "DISC_PRICE": "sum", "CHARGE": "sum",
                   "L_DISCOUNT": "mean", "L_ORDERKEY": "count"})
    total = total.sort_values(["L_RETURNFLAG", "L_LINESTATUS"])
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return total.head(10)

q1_result = q1(lineitem)
if bodo.get_rank()==0:
    display(q1_result)

### Q3: Shipping Priority Query
This query retrieves the 10 unshipped orders with the highest value.

Make sure you have run **`load_lineitem`, `load_orders`, and `load_customer`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px

@bodo.jit(cache=True)
def q3(lineitem, orders, customer):
    date = "1995-03-04"    
    t1 = time.time()
    lsel = lineitem.L_SHIPDATE > date
    osel = orders.O_ORDERDATE < date
    csel = customer.C_MKTSEGMENT == "HOUSEHOLD"
    flineitem = lineitem[lsel]
    forders = orders[osel]
    fcustomer = customer[csel]
    jn1 = fcustomer.merge(forders, left_on="C_CUSTKEY", right_on="O_CUSTKEY")
    jn2 = jn1.merge(flineitem, left_on="O_ORDERKEY", right_on="L_ORDERKEY")

    jn2["TMP"] = jn2.L_EXTENDEDPRICE * (1 - jn2.L_DISCOUNT)

    total = (
        jn2.groupby(
            ["L_ORDERKEY", "O_ORDERDATE", "O_SHIPPRIORITY"], as_index=False
        )["TMP"]
        .sum()
        .sort_values(["TMP"], ascending=False)
    )
    res = total[["L_ORDERKEY", "TMP", "O_ORDERDATE", "O_SHIPPRIORITY"]]

    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return res.head(10)

q3_result = q3(lineitem, orders, customer)
if bodo.get_rank()==0:
    display(q3_result)

### Q4: Order Priority Checking Query
This query determines how well the order priority system is working and gives an assessment of customer satisfaction.

Make sure you have run **`load_lineitem` and `load_orders`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q4(lineitem, orders):
    date1 = "1993-11-01"
    date2 = "1993-08-01"
    t1 = time.time()
    lsel = lineitem.L_COMMITDATE < lineitem.L_RECEIPTDATE
    osel = (orders.O_ORDERDATE < date1) & (orders.O_ORDERDATE >= date2)
    flineitem = lineitem[lsel]
    forders = orders[osel]
    jn = forders[forders["O_ORDERKEY"].isin(flineitem["L_ORDERKEY"])]
    total = (
        jn.groupby("O_ORDERPRIORITY", as_index=False)["O_ORDERKEY"]
        .count()
        .sort_values(["O_ORDERPRIORITY"])
    )
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return total.head(10)

q4_result = q4(lineitem, orders)
if bodo.get_rank()==0:
    display(q4_result)

### Q6: Forecasting Revenue Change Query
This query quantifies the amount of revenue increase that would have resulted from eliminating certain company-wide discounts in a given percentage range in a given year.

Make sure you have run **`load_lineitem`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q6(lineitem):
    date1 = "1996-01-01"
    date2 = "1997-01-01"
    t1 = time.time()
    sel = (
        (lineitem.L_SHIPDATE >= date1)
        & (lineitem.L_SHIPDATE < date2)
        & (lineitem.L_DISCOUNT >= 0.08)
        & (lineitem.L_DISCOUNT <= 0.1)
        & (lineitem.L_QUANTITY < 24)
    )
    flineitem = lineitem[sel]
    total = (flineitem.L_EXTENDEDPRICE * flineitem.L_DISCOUNT).sum()
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    print(total)
    return total

q6_result = q6(lineitem)

### Q9: Product Type Profit Measure Query
This query determines how much profit is made on a given line of parts, broken out by supplier nation and year.

Make sure you have run **`load_lineitem`, `load_orders`, `load_part`, `load_nation`, `load_partsupp`, and `load_supplier`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q9(lineitem, orders, part, nation, partsupp, supplier):
    t1 = time.time()
    psel = part.P_NAME.str.contains("ghost")
    fpart = part[psel]
    jn1 = lineitem.merge(fpart, left_on="L_PARTKEY", right_on="P_PARTKEY")
    jn2 = jn1.merge(supplier, left_on="L_SUPPKEY", right_on="S_SUPPKEY")
    jn3 = jn2.merge(nation, left_on="S_NATIONKEY", right_on="N_NATIONKEY")
    jn4 = partsupp.merge(
        jn3, left_on=["PS_PARTKEY", "PS_SUPPKEY"], right_on=["L_PARTKEY", "L_SUPPKEY"]
    )
    jn5 = jn4.merge(orders, left_on="L_ORDERKEY", right_on="O_ORDERKEY")
    jn5["TMP"] = jn5.L_EXTENDEDPRICE * (1 - jn5.L_DISCOUNT) - (
        (1 * jn5.PS_SUPPLYCOST) * jn5.L_QUANTITY
    )
    jn5["O_YEAR"] = jn5.O_ORDERDATE.dt.year
    gb = jn5.groupby(["N_NAME", "O_YEAR"], as_index=False)["TMP"].sum()
    total = gb.sort_values(["N_NAME", "O_YEAR"], ascending=[True, False])
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return total.head(10)

q9_result = q9(lineitem, orders, part, nation, partsupp, supplier)
if bodo.get_rank()==0:
    display(q9_result)

### Q10: Returned Item Reporting Query
This query identifies customers who might be having problems with the parts that are shipped to them.

Make sure you have run **`load_lineitem`, `load_orders`, `load_customer`, and `load_nation`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q10(lineitem, orders, customer, nation):
    date1 = "1994-11-01"
    date2 = "1995-02-01"
    t1 = time.time()
    osel = (orders.O_ORDERDATE >= date1) & (orders.O_ORDERDATE < date2)
    lsel = lineitem.L_RETURNFLAG == "R"
    forders = orders[osel]
    flineitem = lineitem[lsel]
    jn1 = flineitem.merge(forders, left_on="L_ORDERKEY", right_on="O_ORDERKEY")
    jn2 = jn1.merge(customer, left_on="O_CUSTKEY", right_on="C_CUSTKEY")
    jn3 = jn2.merge(nation, left_on="C_NATIONKEY", right_on="N_NATIONKEY")
    jn3["TMP"] = jn3.L_EXTENDEDPRICE * (1.0 - jn3.L_DISCOUNT)
    gb = jn3.groupby(
        [
            "C_CUSTKEY",
            "C_NAME",
            "C_ACCTBAL",
            "C_PHONE",
            "N_NAME",
            "C_ADDRESS",
            "C_COMMENT",
        ],
        as_index=False,
    )["TMP"].sum()
    total = gb.sort_values("TMP", ascending=False)
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return total.head(10)

q10_result = q10(lineitem, orders, customer, nation)
if bodo.get_rank()==0:
    display(q10_result)

### Q12: Shipping Modes and Order Priority Query
This query determines whether selecting less expensive modes of shipping is negatively affecting the critical-priority orders by causing more parts to be received by customers after the committed date.

Make sure you have run **`load_lineitem` and `load_orders`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q12(lineitem, orders):
    date1 = "1994-01-01"
    date2 = "1995-01-01"
    t1 = time.time()
    sel = (
        (lineitem.L_RECEIPTDATE < date2)
        & (lineitem.L_COMMITDATE < date2)
        & (lineitem.L_SHIPDATE < date2)
        & (lineitem.L_SHIPDATE < lineitem.L_COMMITDATE)
        & (lineitem.L_COMMITDATE < lineitem.L_RECEIPTDATE)
        & (lineitem.L_RECEIPTDATE >= date1)
        & ((lineitem.L_SHIPMODE == "MAIL") | (lineitem.L_SHIPMODE == "SHIP"))
    )
    flineitem = lineitem[sel]
    jn = flineitem.merge(orders, left_on="L_ORDERKEY", right_on="O_ORDERKEY")

    def g1(x):
        return ((x == "1-URGENT") | (x == "2-HIGH")).sum()

    def g2(x):
        return ((x != "1-URGENT") & (x != "2-HIGH")).sum()

    total = jn.groupby("L_SHIPMODE", as_index=False)["O_ORDERPRIORITY"].agg((g1, g2))
    total = total.sort_values("L_SHIPMODE")
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return total.head(10)

q12_result = q12(lineitem, orders)
if bodo.get_rank()==0:
    display(q12_result)

### Q14: Promotion Effect Query
This query monitors the market response to a promotion such as TV advertisements or a special campaign.

Make sure you have run **`load_lineitem`** and **`load_part`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q14(lineitem, part):
    startDate = "1994-03-01"
    endDate = "1994-04-01"
    p_type_like = "PROMO"
    t1 = time.time()
    sel = (lineitem.L_SHIPDATE >= startDate) & (lineitem.L_SHIPDATE < endDate)
    flineitem = lineitem[sel]
    jn = flineitem.merge(part, left_on="L_PARTKEY", right_on="P_PARTKEY")
    jn["TMP"] = jn.L_EXTENDEDPRICE * (1.0 - jn.L_DISCOUNT)
    total = jn[jn.P_TYPE.str.startswith(p_type_like)].TMP.sum() * 100 / jn.TMP.sum()
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    print(total)
    return total

q14_result = q14(lineitem, part)

### Q18: Large Volume Customer Query
This query ranks customers based on their having placed a large quantity order. Large quantity orders are defined as those orders whose total quantity is above a certain level.

Make sure you have run **`load_lineitem`, `load_orders`, and `load_customer`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q18(lineitem, orders, customer):
    t1 = time.time()
    gb1 = lineitem.groupby("L_ORDERKEY", as_index=False)["L_QUANTITY"].sum()
    fgb1 = gb1[gb1.L_QUANTITY > 300]
    jn1 = fgb1.merge(orders, left_on="L_ORDERKEY", right_on="O_ORDERKEY")
    jn2 = jn1.merge(customer, left_on="O_CUSTKEY", right_on="C_CUSTKEY")
    gb2 = jn2.groupby(
        ["C_NAME", "C_CUSTKEY", "O_ORDERKEY", "O_ORDERDATE", "O_TOTALPRICE"],
        as_index=False,
    )["L_QUANTITY"].sum()
    total = gb2.sort_values(["O_TOTALPRICE", "O_ORDERDATE"], ascending=[False, True])
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return total.head(10)

q18_result = q18(lineitem, orders, customer)
if bodo.get_rank()==0:
    display(q18_result)

### Q19: Discounted Revenue Query
This query reports the gross discounted revenue attributed to the sale of selected parts handled in a particular manner.

Make sure you have run **`load_lineitem`** and **`load_part`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q19(lineitem, part):
    Brand31 = "Brand#31"
    Brand43 = "Brand#43"
    SMBOX = "SM BOX"
    SMCASE = "SM CASE"
    SMPACK = "SM PACK"
    SMPKG = "SM PKG"
    MEDBAG = "MED BAG"
    MEDBOX = "MED BOX"
    MEDPACK = "MED PACK"
    MEDPKG = "MED PKG"
    LGBOX = "LG BOX"
    LGCASE = "LG CASE"
    LGPACK = "LG PACK"
    LGPKG = "LG PKG"
    DELIVERINPERSON = "DELIVER IN PERSON"
    AIR = "AIR"
    AIRREG = "AIRREG"
    t1 = time.time()
    lsel = (
        (
            ((lineitem.L_QUANTITY <= 36) & (lineitem.L_QUANTITY >= 26))
            | ((lineitem.L_QUANTITY <= 25) & (lineitem.L_QUANTITY >= 15))
            | ((lineitem.L_QUANTITY <= 14) & (lineitem.L_QUANTITY >= 4))
        )
        & (lineitem.L_SHIPINSTRUCT == DELIVERINPERSON)
        & ((lineitem.L_SHIPMODE == AIR) | (lineitem.L_SHIPMODE == AIRREG))
    )
    psel = (part.P_SIZE >= 1) & (
        (
            (part.P_SIZE <= 5)
            & (part.P_BRAND == Brand31)
            & (
                (part.P_CONTAINER == SMBOX)
                | (part.P_CONTAINER == SMCASE)
                | (part.P_CONTAINER == SMPACK)
                | (part.P_CONTAINER == SMPKG)
            )
        )
        | (
            (part.P_SIZE <= 10)
            & (part.P_BRAND == Brand43)
            & (
                (part.P_CONTAINER == MEDBAG)
                | (part.P_CONTAINER == MEDBOX)
                | (part.P_CONTAINER == MEDPACK)
                | (part.P_CONTAINER == MEDPKG)
            )
        )
        | (
            (part.P_SIZE <= 15)
            & (part.P_BRAND == Brand43)
            & (
                (part.P_CONTAINER == LGBOX)
                | (part.P_CONTAINER == LGCASE)
                | (part.P_CONTAINER == LGPACK)
                | (part.P_CONTAINER == LGPKG)
            )
        )
    )
    flineitem = lineitem[lsel]
    fpart = part[psel]
    jn = flineitem.merge(fpart, left_on="L_PARTKEY", right_on="P_PARTKEY")
    jnsel = (
        (jn.P_BRAND == Brand31)
        & (
            (jn.P_CONTAINER == SMBOX)
            | (jn.P_CONTAINER == SMCASE)
            | (jn.P_CONTAINER == SMPACK)
            | (jn.P_CONTAINER == SMPKG)
        )
        & (jn.L_QUANTITY >= 4)
        & (jn.L_QUANTITY <= 14)
        & (jn.P_SIZE <= 5)
        | (jn.P_BRAND == Brand43)
        & (
            (jn.P_CONTAINER == MEDBAG)
            | (jn.P_CONTAINER == MEDBOX)
            | (jn.P_CONTAINER == MEDPACK)
            | (jn.P_CONTAINER == MEDPKG)
        )
        & (jn.L_QUANTITY >= 15)
        & (jn.L_QUANTITY <= 25)
        & (jn.P_SIZE <= 10)
        | (jn.P_BRAND == Brand43)
        & (
            (jn.P_CONTAINER == LGBOX)
            | (jn.P_CONTAINER == LGCASE)
            | (jn.P_CONTAINER == LGPACK)
            | (jn.P_CONTAINER == LGPKG)
        )
        & (jn.L_QUANTITY >= 26)
        & (jn.L_QUANTITY <= 36)
        & (jn.P_SIZE <= 15)
    )
    jn = jn[jnsel]
    total = (jn.L_EXTENDEDPRICE * (1.0 - jn.L_DISCOUNT)).sum()
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    print(total)
    return total

q19_result = q19(lineitem, part)

### Q20: Potential Part Promotion Query
This query identifies suppliers in a particular nation having selected parts that may be candidates for a promotional offer.

Make sure you have run **`load_lineitem`, `load_part`, `load_nation`, `load_partsupp`, and `load_supplier`** from [loading data section](#loading_data) before running this query.

In [None]:
%%px
@bodo.jit(cache=True)
def q20(lineitem, part, nation, partsupp, supplier):
    date1 = "1996-01-01"
    date2 = "1997-01-01"
    t1 = time.time()
    psel = part.P_NAME.str.startswith("azure")
    nsel = nation.N_NAME == "JORDAN"
    lsel = (lineitem.L_SHIPDATE >= date1) & (lineitem.L_SHIPDATE < date2)
    fpart = part[psel]
    fnation = nation[nsel]
    flineitem = lineitem[lsel]
    jn1 = fpart.merge(partsupp, left_on="P_PARTKEY", right_on="PS_PARTKEY")
    jn2 = jn1.merge(
        flineitem,
        left_on=["PS_PARTKEY", "PS_SUPPKEY"],
        right_on=["L_PARTKEY", "L_SUPPKEY"],
    )
    gb = jn2.groupby(["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY"], as_index=False)[
        "L_QUANTITY"
    ].sum()
    gbsel = gb.PS_AVAILQTY > (0.5 * gb.L_QUANTITY)
    fgb = gb[gbsel]
    jn3 = fgb.merge(supplier, left_on="PS_SUPPKEY", right_on="S_SUPPKEY")
    jn4 = fnation.merge(jn3, left_on="N_NATIONKEY", right_on="S_NATIONKEY")
    jn4 = jn4[["S_NAME", "S_ADDRESS"]]
    total = jn4.sort_values("S_NAME").drop_duplicates()
    print("Execution time: ", ((time.time() - t1) * 1000), " (ms)")
    return total.head(10)

q20_result = q20(lineitem, part, nation, partsupp, supplier)
if bodo.get_rank()==0:
    display(q20_result)