# **SAMPLE DATASET**


Below is an example of an e-commerce sales dataset.



In [None]:
import pandas as pd
import sqlite3

# --------------------------------------------
# CREATE A MOCK E-COMMERCE SALES DATASET
# --------------------------------------------
data = {
    'transaction_id': range(101, 121),
    'date': [
        '2024-01-15','2024-01-16','2024-01-20','2024-02-01','2024-02-05',
        '2024-02-10','2024-03-01','2024-03-05','2024-03-10','2024-03-12',
        '2024-01-15','2024-01-18','2024-02-20','2024-02-22','2024-03-15',
        '2024-03-18','2024-01-05','2024-02-14','2024-03-25','2024-03-30'
    ],
    'category': [
        'Electronics','Electronics','Clothing','Home','Electronics',
        'Clothing','Home','Electronics','Home','Clothing',
        'Electronics','Home','Clothing','Electronics','Home',
        'Clothing','Electronics','Home','Clothing','Electronics'
    ],
    'product': [
        'Laptop','Headphones','T-Shirt','Blender','Monitor',
        'Jeans','Sofa','Mouse','Lamp','Jacket',
        'Laptop','Table','Sneakers','Headphones','Rug',
        'Hat','Tablet','Chair','Scarf','Keyboard'
    ],
    'amount': [
        1200,150,25,80,300,
        50,500,40,60,120,
        1200,150,80,150,100,
        20,400,200,30,80
    ],
    'region': [
        'North','North','East','West','North',
        'East','West','South','West','East',
        'South','West','North','South','East',
        'East','North','South','West','South'
    ]
}

df = pd.DataFrame(data)

# --------------------------------------------
# CREATE AN IN-MEMORY SQLITE DATABASE
# --------------------------------------------
conn = sqlite3.connect(':memory:')

# Write dataframe to SQL
df.to_sql('sales', conn, index=False)

print("Database created and 'sales' table added!")
df.head ()

In [None]:
# Quick helper to run SQL queries
def run_query(q):
    return pd.read_sql(q, conn)


# **ADVANCED FILTERING & SORTING**

In [None]:
# ---------------------------------------------------------
# MODULE A: ADVANCED FILTERING (WHERE, LIKE, BETWEEN, IN)
# ---------------------------------------------------------

query_moduleA = """
SELECT
    date,
    category,
    product,
    amount,
    region
FROM sales
WHERE category = 'Electronics'          -- exact match
  AND region IN ('North', 'South')     -- multiple values
  AND amount > 100                      -- numeric filter
  AND product LIKE '%top%'             -- pattern matching
ORDER BY amount DESC;                   -- sorting high to low
"""

print("=== Module A: Advanced Filtering & Sorting ===")
run_query(query_moduleA)

# **AGGREGATION & GROUPING**

In [None]:
# ---------------------------------------------------------
# MODULE B: AGGREGATION & GROUPING
# ---------------------------------------------------------

query_moduleB = """
SELECT
    category,
    COUNT(*) AS total_transactions,     -- number of rows per category
    SUM(amount) AS total_revenue,       -- total income per category
    AVG(amount) AS avg_order_value,     -- average sale price
    MIN(amount) AS cheapest_sale,       -- lowest sale
    MAX(amount) AS highest_sale         -- highest sale
FROM sales
GROUP BY category                       -- groups rows by category
ORDER BY total_revenue DESC;            -- sort categories by revenue
"""

print("=== Module B: Aggregation & Grouping ===")
run_query(query_moduleB)
