<a href="https://colab.research.google.com/github/meredith224/Pandas_DE_Academy/blob/main/Pandas_Problems_(Tyler)_9_19_2025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Block 1: Data Generation ---
import pandas as pd
import numpy as np

# Sales dataset
sales_data = {
    "SaleID": range(1, 11),
    "ProductID": [101, 102, 103, 101, 102, 104, 105, 101, 103, 104],
    "EmployeeID": [1, 2, 1, 3, 2, 1, 3, 2, 1, 3],
    "Quantity": [2, 1, 5, 2, 3, 1, 4, 2, 1, 3],
    "Price": [20, 35, 50, 20, 35, 60, 45, 20, 50, 60],
    "Date": pd.date_range("2025-01-01", periods=10, freq="D")
}
sales = pd.DataFrame(sales_data)

# Employees dataset
employees_data = {
    "EmployeeID": [1, 2, 3],
    "Name": ["Alice", "Bob", "Charlie"],
    "Department": ["Sales", "Sales", "Support"],
    "HireDate": pd.to_datetime(["2020-01-15", "2021-06-01", "2019-03-20"])
}
employees = pd.DataFrame(employees_data)

# Products dataset
products_data = {
    "ProductID": [101, 102, 103, 104, 105],
    "ProductName": ["Pen", "Notebook", "Backpack", "Calculator", "Marker"],
    "Category": ["Stationery", "Stationery", "Accessories", "Electronics", "Stationery"]
}
products = pd.DataFrame(products_data)

print("Sales Data:\n", sales.head(), "\n")
print("Employees Data:\n", employees, "\n")
print("Products Data:\n", products, "\n")


Sales Data:
    SaleID  ProductID  EmployeeID  Quantity  Price       Date
0       1        101           1         2     20 2025-01-01
1       2        102           2         1     35 2025-01-02
2       3        103           1         5     50 2025-01-03
3       4        101           3         2     20 2025-01-04
4       5        102           2         3     35 2025-01-05 

Employees Data:
    EmployeeID     Name Department   HireDate
0           1    Alice      Sales 2020-01-15
1           2      Bob      Sales 2021-06-01
2           3  Charlie    Support 2019-03-20 

Products Data:
    ProductID ProductName     Category
0        101         Pen   Stationery
1        102    Notebook   Stationery
2        103    Backpack  Accessories
3        104  Calculator  Electronics
4        105      Marker   Stationery 



In [None]:
# Problem 1: Compute the total revenue generated by each employee.
# (Hint: Revenue = Quantity × Price. You’ll need to group by employee.)

# --- Solution to Problem 1 ---
sales["Revenue"] = sales["Quantity"] * sales["Price"]

revenue_per_employee = (
    sales.groupby("EmployeeID")["Revenue"]
    .sum()
    .reset_index()
    .merge(employees, on="EmployeeID", how="left")
)

revenue_per_employee


Unnamed: 0,EmployeeID,Revenue,Name,Department,HireDate
0,1,400,Alice,Sales,2020-01-15
1,2,180,Bob,Sales,2021-06-01
2,3,400,Charlie,Support,2019-03-20


In [None]:
# Problem 2: Find the top-selling product by total quantity sold.

# --- Solution to Problem 2 ---
product_sales = (
    sales.groupby("ProductID")["Quantity"]
    .sum()
    .reset_index()
    .merge(products, on="ProductID", how="left")
    .sort_values("Quantity", ascending=False)
)

product_sales.head(1)


Unnamed: 0,ProductID,Quantity,ProductName,Category
0,101,6,Pen,Stationery


In [None]:
# Problem 3: Show the monthly revenue trend across all products.

# --- Solution to Problem 3 ---
monthly_revenue = (
    sales.groupby(pd.Grouper(key="Date", freq="M"))["Revenue"]
    .sum()
    .reset_index()
)

monthly_revenue


  sales.groupby(pd.Grouper(key="Date", freq="M"))["Revenue"]


Unnamed: 0,Date,Revenue
0,2025-01-31,980


In [None]:
# Problem 4: Which employee–product pair generated the highest total revenue?
# (Hint: You’ll need to compute revenue, then merge sales with both employees and products.)

# --- Solution to Problem 4 ---
sales["Revenue"] = sales["Quantity"] * sales["Price"]

employee_product_revenue = (
    sales.merge(employees, on="EmployeeID", how="left")
         .merge(products, on="ProductID", how="left")
         .groupby(["Name", "ProductName"])["Revenue"]
         .sum()
         .reset_index()
         .sort_values("Revenue", ascending=False)
)

employee_product_revenue.head(1)


Unnamed: 0,Name,ProductName,Revenue
0,Alice,Backpack,300


In [None]:
# Problem 5: For each department, find the most popular product category (by total quantity sold).
# --- Solution to Problem 5 ---
dept_category_sales = (
    sales.merge(employees, on="EmployeeID", how="left")
         .merge(products, on="ProductID", how="left")
         .groupby(["Department", "Category"])["Quantity"]
         .sum()
         .reset_index()
)

# Find the top category per department
dept_top_category = (
    dept_category_sales.sort_values(["Department", "Quantity"], ascending=[True, False])
    .groupby("Department")
    .head(1)
    .reset_index(drop=True)
)

dept_top_category


Unnamed: 0,Department,Category,Quantity
0,Sales,Stationery,8
1,Support,Stationery,6
