# Data Exploration & Visualization — Extended Payments + Shipments Dataset

## Connect to MySQL

In [None]:
import mysql.connector as mc
import pandas as pd
import matplotlib.pyplot as plt

conn = mc.connect(
    host="localhost",
    user="root",
    password="YOUR_PASSWORD",
    database="retail_demo_ext2"
)

## Load Data

In [None]:
customers = pd.read_sql("SELECT * FROM customers", conn)
products  = pd.read_sql("SELECT * FROM products", conn)
orders    = pd.read_sql("SELECT * FROM orders", conn)
payments  = pd.read_sql("SELECT * FROM payments", conn)
shipments = pd.read_sql("SELECT * FROM shipments", conn)

query = """
SELECT o.order_id, o.paid_price, o.order_date,
       c.customer_id, c.name, c.country,
       p.product_id, p.product_name, p.category,
       pay.method AS payment_method,
       s.status AS shipment_status, s.carrier
FROM orders o
JOIN customers c ON o.customer_id=c.customer_id
JOIN products p ON o.product_id=p.product_id
LEFT JOIN payments pay ON o.order_id=pay.order_id
LEFT JOIN shipments s ON o.order_id=s.order_id;
"""
orders_full = pd.read_sql(query, conn)
orders_full.head()

## Orders per Customer

In [None]:
orders.groupby('customer_id')['order_id'].count().plot(kind='bar'); plt.title('Orders per Customer'); plt.show()

## Revenue per Customer

In [None]:
orders_full.groupby('name')['paid_price'].sum().sort_values(ascending=False).plot(kind='bar'); plt.title('Revenue per Customer'); plt.show()

## Orders by Country

In [None]:
orders_full.groupby('country')['order_id'].count().plot(kind='bar'); plt.title('Orders by Country'); plt.show()

## Revenue by Category

In [None]:
orders_full.groupby('category')['paid_price'].sum().plot(kind='bar'); plt.title('Revenue by Category'); plt.show()

## Orders by Payment Method

In [None]:
payments.groupby('method')['amount'].sum().plot(kind='bar'); plt.title('Orders by Payment Method'); plt.show()

## Shipments by Status

In [None]:
shipments.groupby('status')['shipment_id'].count().plot(kind='bar'); plt.title('Shipments by Status'); plt.show()

## Shipments by Carrier

In [None]:
shipments.groupby('carrier')['shipment_id'].count().plot(kind='bar'); plt.title('Shipments by Carrier'); plt.show()

## Average Paid Price by Category

In [None]:
orders_full.groupby('category')['paid_price'].mean().plot(kind='bar'); plt.title('Average Paid Price by Category'); plt.show()

## Top 15 Products by Revenue

In [None]:
orders_full.groupby('product_name')['paid_price'].sum().sort_values(ascending=False).head(15).plot(kind='bar'); plt.title('Top 15 Products by Revenue'); plt.show()

## Paid Price Distribution

In [None]:
orders['paid_price'].plot(kind='hist', bins=30); plt.title('Paid Price Distribution'); plt.xlabel('Price'); plt.show()

## Close Connection

In [None]:
conn.close()