In [1]:
# Connect to PostgreSQL database
import os
import psycopg2
import pandas as pd
from dotenv import load_dotenv

# Load DB credentials from .env
load_dotenv()

conn = psycopg2.connect(
    host=os.getenv("DB_HOST", "localhost"),
    port=os.getenv("DB_PORT", "5440"),
    user=os.getenv("DB_USER", "postgres"),
    password=os.getenv("DB_PASS"),
    database=os.getenv("DB_NAME", "motorcycle_sales_db")
)

# Helper function to run SQL queries
def run_query(sql: str):
    return pd.read_sql(sql, conn)

# Test connection and show data overview
test_df = run_query("SELECT COUNT(*) as total_orders FROM sales")
print(f"Successfully connected! Total orders: {test_df['total_orders'].iloc[0]}")

# Show breakdown by client type
client_df = run_query("""
    SELECT client_type, COUNT(*) as orders, 
           ROUND(CAST(SUM(total) AS NUMERIC), 2) as total_revenue
    FROM sales 
    GROUP BY client_type
    ORDER BY total_revenue DESC
""")
print("\nRevenue by client type:")
display(client_df)


Successfully connected! Total orders: 1000

Revenue by client type:


  return pd.read_sql(sql, conn)


Unnamed: 0,client_type,orders,total_revenue
0,Wholesale,225,159642.33
1,Retail,775,129470.67


![Parked motorcycle](motorcycle.jpg)

You're working for a company that sells motorcycle parts, and they've asked for some help in analyzing their sales data!

They operate three warehouses in the area, selling both retail and wholesale. They offer a variety of parts and accept credit cards, cash, and bank transfer as payment methods. However, each payment type incurs a different fee.

The board of directors wants to gain a better understanding of wholesale revenue by product line, and how this varies month-to-month and across warehouses. You have been tasked with calculating net revenue for each product line and grouping results by month and warehouse. The results should be filtered so that only `"Wholesale"` orders are included.

They have provided you with access to their database, which contains the following table called `sales`:

## Sales
| Column | Data type | Description |
|--------|-----------|-------------|
| `order_number` | `VARCHAR` | Unique order number. |
| `date` | `DATE` | Date of the order, from June to August 2021. |
| `warehouse` | `VARCHAR` | The warehouse that the order was made from&mdash; `North`, `Central`, or `West`. |
| `client_type` | `VARCHAR` | Whether the order was `Retail` or `Wholesale`. |
| `product_line` | `VARCHAR` | Type of product ordered. |
| `quantity` | `INT` | Number of products ordered. | 
| `unit_price` | `FLOAT` | Price per product (dollars). |
| `total` | `FLOAT` | Total price of the order (dollars). |
| `payment` | `VARCHAR` | Payment method&mdash;`Credit card`, `Transfer`, or `Cash`. |
| `payment_fee` | `FLOAT` | Percentage of `total` charged as a result of the `payment` method. |


Your query output should be presented in the following format:

| `product_line` | `month` | `warehouse` |	`net_revenue` |
|----------------|-----------|----------------------------|--------------|
| product_one | --- | --- | --- |
| product_one | --- | --- | --- |
| product_one | --- | --- | --- |
| product_one | --- | --- | --- |
| product_one | --- | --- | --- |
| product_one | --- | --- | --- |
| product_two | --- | --- | --- |
| ... | ... | ... | ... |

In [2]:
# Calculate net revenue for wholesale orders by product line, month, and warehouse
query = """
SELECT
    product_line,
    CASE EXTRACT(MONTH FROM date)
        WHEN 6 THEN 'June'
        WHEN 7 THEN 'July'
        WHEN 8 THEN 'August'
    END AS month,
    warehouse,
    ROUND(CAST(SUM(total) - SUM(total * payment_fee) AS NUMERIC), 2) AS net_revenue
FROM sales
WHERE client_type = 'Wholesale'
GROUP BY
    product_line,
    EXTRACT(MONTH FROM date),
    warehouse
ORDER BY
    product_line,
    EXTRACT(MONTH FROM date),
    net_revenue DESC;
"""

df_net_revenue = run_query(query)
print("Net revenue for wholesale orders by product line, month, and warehouse:")
print(f"Total rows: {len(df_net_revenue)}")
display(df_net_revenue)

# Summary statistics
print("\nSummary: Total wholesale net revenue by product line")
summary = run_query("""
SELECT
    product_line,
    COUNT(*) as order_count,
    ROUND(CAST(SUM(total) - SUM(total * payment_fee) AS NUMERIC), 2) AS total_net_revenue
FROM sales
WHERE client_type = 'Wholesale'
GROUP BY product_line
ORDER BY total_net_revenue DESC;
""")
display(summary)


  return pd.read_sql(sql, conn)


Net revenue for wholesale orders by product line, month, and warehouse:
Total rows: 48


Unnamed: 0,product_line,month,warehouse,net_revenue
0,Braking system,June,Central,3648.14
1,Braking system,June,North,1472.93
2,Braking system,June,West,1200.64
3,Braking system,July,Central,3740.94
4,Braking system,July,West,3030.39
5,Braking system,July,North,2568.55
6,Braking system,August,Central,3009.1
7,Braking system,August,West,2475.71
8,Braking system,August,North,1753.19
9,Electrical system,June,Central,2875.93



Summary: Total wholesale net revenue by product line


  return pd.read_sql(sql, conn)


Unnamed: 0,product_line,order_count,total_net_revenue
0,Frame & body,38,39083.11
1,Suspension & traction,51,37912.32
2,Braking system,55,22899.59
3,Electrical system,38,21536.87
4,Engine,13,21023.69
5,Miscellaneous,30,15590.33
