In [3]:
import pandas as pd
import duckdb

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [4]:
# insert data
table_creation_query = """
drop table if exists product;

create table product (product_id int, product_name varchar, price float);

insert into product values
  (1, 'computer', 800),
  (2, 'printer', 600),
  (3, 'tablet', 400),
  (4, 'desk', 100),
  (5, 'chair', 50);
  
SELECT * FROM product"""

In [5]:
# transform product table to a pandas dataframe
product = duckdb.sql(table_creation_query).df()

# 1. SQL

In [7]:
sql_query = """ 
    SELECT 
        product_id,
        product_name,
        price
    FROM product
    WHERE price > 500"""

duckdb.sql(sql_query)

┌────────────┬──────────────┬───────┐
│ product_id │ product_name │ price │
│   int32    │   varchar    │ float │
├────────────┼──────────────┼───────┤
│          1 │ computer     │ 800.0 │
│          2 │ printer      │ 600.0 │
└────────────┴──────────────┴───────┘

# 2. Pandas

## 2a. Basic filtering

In [5]:
product[product["price"] > 500]

Unnamed: 0,product_id,product_name,price
0,1,computer,800.0
1,2,printer,600.0


## 2b. Pandas: using loc

In [6]:
product.loc[product["price"] > 500]

Unnamed: 0,product_id,product_name,price
0,1,computer,800.0
1,2,printer,600.0


## 2c. Pandas: using query

In [7]:
product.query("price > 500")

Unnamed: 0,product_id,product_name,price
0,1,computer,800.0
1,2,printer,600.0


# Summary
<p align="center">
<img src="assets/filtering.png" width=400 />
</p>