In [1]:
import pandas as pd
import duckdb

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
# insert data
table_creation_query = """
drop table if exists orders;

create table orders (order_id int, customer_id int, product_category varchar, amount float);

insert into orders values
  (122154, 1, 'tea', 4.5),
  (122453, 2, 'chocolate', 5.0),
  (122476, 1, 'coffee', 4.0),
  (122783, 3, 'tea', 6.0),
  (122378, 1, 'chocolate', 5.0),
  (122157, 2, 'coffee', 5.5)
  ;
  
SELECT * FROM orders"""

In [3]:
# transform orders table to a pandas dataframe
df = duckdb.sql(table_creation_query).df()

# 1. Order by a single column

## 1a. SQL

In [4]:
sql_query = """ 
    SELECT 
        customer_id
    FROM df
    ORDER BY customer_id
    """

duckdb.sql(sql_query)

┌─────────────┐
│ customer_id │
│    int32    │
├─────────────┤
│           1 │
│           1 │
│           1 │
│           2 │
│           2 │
│           3 │
└─────────────┘

## 1b. Pandas

In [5]:
df[["customer_id"]].sort_values(by=['customer_id'])

Unnamed: 0,customer_id
0,1
2,1
4,1
1,2
5,2
3,3


# 2. Order by multiple columns

## 2a. SQL

In [6]:
sql_query = """ 
    SELECT 
        customer_id,
        product_category
    FROM df
    ORDER BY customer_id, product_category DESC
    """

duckdb.sql(sql_query)

┌─────────────┬──────────────────┐
│ customer_id │ product_category │
│    int32    │     varchar      │
├─────────────┼──────────────────┤
│           1 │ tea              │
│           1 │ coffee           │
│           1 │ chocolate        │
│           2 │ coffee           │
│           2 │ chocolate        │
│           3 │ tea              │
└─────────────┴──────────────────┘

## 2b. Pandas

In [7]:
df[["customer_id", "product_category"]].sort_values(by=['customer_id', 'product_category'], ascending=[True, False])

Unnamed: 0,customer_id,product_category
0,1,tea
2,1,coffee
4,1,chocolate
5,2,coffee
1,2,chocolate
3,3,tea


# Summary

<img src="assets/orderby.png" width=1000 />
