In [1]:
import pandas as pd
import sqlite3

df = pd.read_csv("ecommerce_cleaned.csv")
conn = sqlite3.connect(":memory:")

df.to_sql("orders", conn, index=False, if_exists="replace")

5000

In [6]:
orders = df[["order_id", "customer_id", "total_amount"]]
print(df.columns)
customers = df[["customer_id", "city"]].drop_duplicates()

Index(['order_id', 'customer_id', 'date', 'age', 'gender', 'city',
       'product_category', 'unit_price', 'quantity', 'discount_amount',
       'total_amount', 'payment_method', 'device_type',
       'session_duration_minutes', 'pages_viewed', 'is_returning_customer',
       'delivery_time_days', 'customer_rating'],
      dtype='object')


In [7]:
joined_data = pd.merge(
    orders,
    customers,
    on="customer_id",
    how="inner"
)

joined_data.head()

Unnamed: 0,order_id,customer_id,total_amount,city
0,ORD_001337,CUST_01337,54.28,Bursa
1,ORD_004885,CUST_04885,244.9,Konya
2,ORD_004507,CUST_04507,240.75,Ankara
3,ORD_000645,CUST_00645,574.78,Istanbul
4,ORD_000690,CUST_00690,3778.05,Istanbul


In [8]:
left_joined = pd.merge(
    orders,
    customers,
    on="customer_id",
    how="left"
)

left_joined.head()

Unnamed: 0,order_id,customer_id,total_amount,city
0,ORD_001337,CUST_01337,54.28,Bursa
1,ORD_004885,CUST_04885,244.9,Konya
2,ORD_004507,CUST_04507,240.75,Ankara
3,ORD_000645,CUST_00645,574.78,Istanbul
4,ORD_000690,CUST_00690,3778.05,Istanbul


In [10]:
region_revenue = (
    left_joined
    .groupby("city")["total_amount"]
    .sum()
)

region_revenue

Unnamed: 0_level_0,total_amount
city,Unnamed: 1_level_1
Adana,427059.63
Ankara,657535.82
Antalya,315549.89
Bursa,459076.31
Eskisehir,178978.43
Gaziantep,318408.41
Istanbul,1334122.56
Izmir,567534.67
Kayseri,312303.91
Konya,344974.94


### Insight
Joining tables allows us to combine order data with customer information.
Most real-world analysis requires joining data before summarizing it.