In [1]:
import pandas as pd
import sqlite3
import sql
sql.style = 'plain'
%load_ext sql
connection=sqlite3.connect("question_bank.db")
print("Successfully connected to SQL database")
# Connect to SQLite
%sql sqlite:///question_bank.db
print("success")

Successfully connected to SQL database
success


In [2]:
%%sql
-- Create Orders table
CREATE TABLE Orders (
    order_id INTEGER PRIMARY KEY,
    customer_id INTEGER,
    order_date DATE
);

-- Insert sample data
INSERT INTO Orders (order_id, customer_id, order_date) VALUES
(1, 101, '2024-01-01'),
(2, 102, '2024-01-02'),
(3, 101, '2024-01-05'),
(4, 102, '2024-01-03'),
(5, 103, '2024-01-04'),
(6, 101, '2024-01-10');

 * sqlite:///question_bank.db
Done.
6 rows affected.


[]

In [3]:
%%sql
SELECT * FROM ORDERS

 * sqlite:///question_bank.db
Done.


order_id,customer_id,order_date
1,101,2024-01-01
2,102,2024-01-02
3,101,2024-01-05
4,102,2024-01-03
5,103,2024-01-04
6,101,2024-01-10


In [4]:
%%sql

-- most recent order (order_date) for each customer
-- recency - ranking wrt date
-- each customer - group by customer_id

SELECT customer_id, order_date,
                ROW_NUMBER () OVER (PARTITION BY customer_id ORDER BY order_date DESC) AS ranked_row
            FROM ORDERS

 * sqlite:///question_bank.db
Done.


customer_id,order_date,ranked_row
101,2024-01-10,1
101,2024-01-05,2
101,2024-01-01,3
102,2024-01-03,1
102,2024-01-02,2
103,2024-01-04,1


In [5]:
%%sql

WITH rc AS (SELECT customer_id, order_date,
                ROW_NUMBER () OVER (PARTITION BY customer_id ORDER BY order_date DESC) AS ranked
            FROM ORDERS
)

SELECT *
FROM rc
WHERE ranked = 1

 * sqlite:///question_bank.db
Done.


customer_id,order_date,ranked
101,2024-01-10,1
102,2024-01-03,1
103,2024-01-04,1


* TL;DR – Use: ROW NUMBER () Instead of RANK () Since Rank gives same ranks to same dates
* And ROW_NUMBER handles duplicates and gives unique ranks to each row, doesn't consider ties
* ROW_NUMBER() → if you want only one "latest" order even if there are ties
* RANK() → if you want to include all tied rows for the most recent date