# Day 34 – SQL Fundamentals Review

This notebook reviews core SQL concepts learned so far
using simple examples and explanations.

In [2]:
import pandas as pd
import sqlite3

df = pd.read_csv("ecommerce_cleaned.csv")
conn = sqlite3.connect(":memory:")
df.to_sql("orders", conn, index=False, if_exists="replace")

5000

In [3]:
query = """
SELECT *
FROM orders
WHERE total_amount > 1000;
"""
pd.read_sql(query, conn)

Unnamed: 0,order_id,customer_id,date,age,gender,city,product_category,unit_price,quantity,discount_amount,total_amount,payment_method,device_type,session_duration_minutes,pages_viewed,is_returning_customer,delivery_time_days,customer_rating
0,ORD_000690,CUST_00690,2023-01-01,40,Female,Istanbul,Sports,755.61,5,0.00,3778.05,Cash on Delivery,Desktop,21,10,1,7,4
1,ORD_002506,CUST_02506,2023-01-01,25,Female,Izmir,Electronics,2107.37,2,0.00,4214.74,Digital Wallet,Desktop,10,5,0,6,5
2,ORD_001808,CUST_01808,2023-01-02,41,Male,Istanbul,Fashion,257.62,5,62.15,1225.95,Credit Card,Tablet,24,7,0,2,4
3,ORD_003413,CUST_03413,2023-01-02,58,Male,Istanbul,Sports,1784.75,4,490.25,6648.75,Debit Card,Desktop,8,5,1,5,4
4,ORD_004457,CUST_04457,2023-01-02,21,Female,Istanbul,Electronics,1091.11,1,0.00,1091.11,Cash on Delivery,Mobile,9,6,1,3,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1223,ORD_002347,CUST_02347,2024-03-24,29,Male,Gaziantep,Electronics,1993.10,1,0.00,1993.10,Credit Card,Mobile,15,7,1,5,3
1224,ORD_003481,CUST_03481,2024-03-24,31,Female,Izmir,Electronics,3078.07,3,0.00,9234.21,Bank Transfer,Tablet,11,12,1,4,5
1225,ORD_004882,CUST_04882,2024-03-25,51,Female,Adana,Home & Garden,744.29,4,214.72,2762.44,Debit Card,Tablet,10,11,0,7,3
1226,ORD_003210,CUST_03210,2024-03-25,40,Male,Ankara,Electronics,654.04,4,0.00,2616.16,Credit Card,Tablet,24,15,1,8,4


SELECT chooses columns.
WHERE filters rows based on a condition.

In [4]:
query = """
SELECT product_category,
       SUM(total_amount) AS revenue
FROM orders
GROUP BY product_category;
"""
pd.read_sql(query, conn)

Unnamed: 0,product_category,revenue
0,Beauty,156584.74
1,Books,72744.52
2,Electronics,2328806.81
3,Fashion,375214.93
4,Food,96138.67
5,Home & Garden,908348.86
6,Sports,754563.56
7,Toys,223142.48


GROUP BY groups similar values.
SUM aggregates numeric data.

In [5]:
query = """
SELECT customer_id,
       SUM(total_amount) AS total_spent
FROM orders
GROUP BY customer_id
ORDER BY total_spent DESC
LIMIT 5;
"""
pd.read_sql(query, conn)

Unnamed: 0,customer_id,total_spent
0,CUST_04705,22023.9
1,CUST_00061,21478.35
2,CUST_01984,21409.05
3,CUST_00525,20211.81
4,CUST_03721,19547.2


ORDER BY sorts results.
LIMIT restricts number of rows.

In [7]:
customers = df[["customer_id", "city"]].drop_duplicates()
customers.to_sql("customers", conn, index=False, if_exists="replace")

query = """
SELECT o.order_id,
       o.total_amount,
       c.city AS customer_region
FROM orders o
LEFT JOIN customers c
ON o.customer_id = c.customer_id;
"""
pd.read_sql(query, conn)

Unnamed: 0,order_id,total_amount,customer_region
0,ORD_001337,54.28,Bursa
1,ORD_004885,244.90,Konya
2,ORD_004507,240.75,Ankara
3,ORD_000645,574.78,Istanbul
4,ORD_000690,3778.05,Istanbul
...,...,...,...
4995,ORD_001048,103.05,Izmir
4996,ORD_001051,71.55,Adana
4997,ORD_003543,34.11,Antalya
4998,ORD_004443,171.19,Istanbul


JOIN combines data from multiple tables using a key.

In [8]:
query = """
WITH customer_spend AS (
    SELECT customer_id,
           SUM(total_amount) AS total_spent
    FROM orders
    GROUP BY customer_id
)
SELECT *
FROM customer_spend
WHERE total_spent > (
    SELECT AVG(total_amount) FROM orders
);
"""
pd.read_sql(query, conn)

Unnamed: 0,customer_id,total_spent
0,CUST_00012,1090.15
1,CUST_00030,2497.83
2,CUST_00034,2028.13
3,CUST_00035,9406.46
4,CUST_00036,2424.48
...,...,...
1241,CUST_04988,3355.70
1242,CUST_04989,1229.60
1243,CUST_04995,3066.18
1244,CUST_04997,2660.67


CTEs help break queries into readable steps.

## Self-Check

I can now:
- Explain SELECT, WHERE, GROUP BY in simple words
- Understand when to use JOIN
- Explain why CTEs improve readability
- Read SQL queries without panic