# Day 28 â€“ SQL Joins Using Pandas

Practicing INNER JOIN and LEFT JOIN logic using Pandas
to simulate real SQL interview questions.

In [1]:
import pandas as pd

df = pd.read_csv("/content/ecommerce_cleaned.csv")
df.head()

Unnamed: 0,order_id,customer_id,date,age,gender,city,product_category,unit_price,quantity,discount_amount,total_amount,payment_method,device_type,session_duration_minutes,pages_viewed,is_returning_customer,delivery_time_days,customer_rating
0,ORD_001337,CUST_01337,2023-01-01,27,Female,Bursa,Toys,54.28,1,0.0,54.28,Debit Card,Mobile,4,14,True,8,5
1,ORD_004885,CUST_04885,2023-01-01,42,Male,Konya,Toys,244.9,1,0.0,244.9,Credit Card,Mobile,11,3,True,3,3
2,ORD_004507,CUST_04507,2023-01-01,43,Female,Ankara,Food,48.15,5,0.0,240.75,Credit Card,Mobile,7,8,True,5,2
3,ORD_000645,CUST_00645,2023-01-01,32,Male,Istanbul,Electronics,804.06,1,229.28,574.78,Credit Card,Mobile,8,10,False,1,4
4,ORD_000690,CUST_00690,2023-01-01,40,Female,Istanbul,Sports,755.61,5,0.0,3778.05,Cash on Delivery,Desktop,21,10,True,7,4


In [2]:
orders = df[["order_id", "customer_id", "total_amount"]]
orders.head()

Unnamed: 0,order_id,customer_id,total_amount
0,ORD_001337,CUST_01337,54.28
1,ORD_004885,CUST_04885,244.9
2,ORD_004507,CUST_04507,240.75
3,ORD_000645,CUST_00645,574.78
4,ORD_000690,CUST_00690,3778.05


In [4]:
customers = df[["customer_id", "city"]].drop_duplicates()
customers.head()

Unnamed: 0,customer_id,city
0,CUST_01337,Bursa
1,CUST_04885,Konya
2,CUST_04507,Ankara
3,CUST_00645,Istanbul
4,CUST_00690,Istanbul


In [5]:
inner_join = pd.merge(
    orders,
    customers,
    on="customer_id",
    how="inner"
)

inner_join.head()

Unnamed: 0,order_id,customer_id,total_amount,city
0,ORD_001337,CUST_01337,54.28,Bursa
1,ORD_004885,CUST_04885,244.9,Konya
2,ORD_004507,CUST_04507,240.75,Ankara
3,ORD_000645,CUST_00645,574.78,Istanbul
4,ORD_000690,CUST_00690,3778.05,Istanbul


inner_join = pd.merge(
    orders,
    customers,
    on="customer_id",
    how="inner"
)

inner_join.head()

In [6]:
left_join = pd.merge(
    orders,
    customers,
    on="customer_id",
    how="left"
)

left_join.head()

Unnamed: 0,order_id,customer_id,total_amount,city
0,ORD_001337,CUST_01337,54.28,Bursa
1,ORD_004885,CUST_04885,244.9,Konya
2,ORD_004507,CUST_04507,240.75,Ankara
3,ORD_000645,CUST_00645,574.78,Istanbul
4,ORD_000690,CUST_00690,3778.05,Istanbul


SELECT *
FROM orders
LEFT JOIN customers
ON orders.customer_id = customers.customer_id;

In [8]:
region_revenue = (
    left_join
    .groupby("city")["total_amount"]
    .sum()
    .sort_values(ascending=False)
)

region_revenue

Unnamed: 0_level_0,total_amount
city,Unnamed: 1_level_1
Istanbul,1334122.56
Ankara,657535.82
Izmir,567534.67
Bursa,459076.31
Adana,427059.63
Konya,344974.94
Gaziantep,318408.41
Antalya,315549.89
Kayseri,312303.91
Eskisehir,178978.43


## Key Takeaway

JOINs allow combining information from multiple tables.
Most real-world analysis requires joining data before aggregation.