In [None]:
import pandas as pd

sales_data = pd.DataFrame({
    'Date': ['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-02'],
    'Product': ['Laptop', 'Phone', 'Laptop', 'Phone'],
    'Category': ['Electronics', 'Electronics', 'Electronics', 'Electronics'],
    'Quantity': [2, 5, 3, 7],
    'Price': [1000, 500, 1000, 500]
})

sales_grouped = sales_data.groupby("Category").agg(
    total_quantity=("Quantity", "sum"),
    avg_price=("Price", "mean"),
    max_quantity=("Quantity", "max")
).reset_index()

top_products = (
    sales_data.groupby(["Category", "Product"])["Quantity"]
    .sum().reset_index()
    .sort_values(["Category", "Quantity"], ascending=[True, False])
    .drop_duplicates("Category")
)

sales_data["TotalSale"] = sales_data["Quantity"] * sales_data["Price"]
top_sales_day = sales_data.groupby("Date")["TotalSale"].sum().reset_index().sort_values("TotalSale", ascending=False).head(1)


In [None]:
orders_data = pd.DataFrame({
    'OrderID': range(1, 31),
    'CustomerID': [101]*10 + [102]*10 + [103]*10,
    'Product': ['Laptop']*15 + ['Phone']*15,
    'Quantity': [1]*30,
    'Price': [150]*10 + [130]*10 + [100]*10
})

order_counts = orders_data.groupby("CustomerID")["OrderID"].count()
frequent_customers = order_counts[order_counts >= 20].index
orders_frequent = orders_data[orders_data["CustomerID"].isin(frequent_customers)]

avg_price = orders_data.groupby("CustomerID")["Price"].mean()
high_spenders = avg_price[avg_price > 120].index
orders_high_price = orders_data[orders_data["CustomerID"].isin(high_spenders)]

product_summary = orders_data.groupby("Product").agg(
    total_quantity=("Quantity", "sum"),
    total_price=("Price", "sum")
).reset_index()

popular_products = product_summary[product_summary["total_quantity"] >= 5]


In [None]:
import numpy as np

population_df = pd.DataFrame({
    'ID': range(1, 21),
    'State': ['NY']*10 + ['CA']*10,
    'Salary': [5000, 12000, 18000, 24000, 30000, 36000, 42000, 48000, 54000, 60000]*2
})

salary_band_df = pd.DataFrame({
    'Category': ['Low', 'Medium', 'High'],
    'Min': [0, 20001, 40001],
    'Max': [20000, 40000, 100000]
})

def get_band(s):
    for _, row in salary_band_df.iterrows():
        if row["Min"] <= s <= row["Max"]:
            return row["Category"]
    return "Other"

population_df["SalaryBand"] = population_df["Salary"].apply(get_band)

band_grouped = population_df.groupby("SalaryBand")["Salary"]
band_stats = pd.DataFrame({
    "Population %": band_grouped.count() / len(population_df) * 100,
    "Average Salary": band_grouped.mean(),
    "Median Salary": band_grouped.median(),
    "Population Count": band_grouped.count()
}).reset_index()

state_band_group = population_df.groupby(["State", "SalaryBand"])["Salary"]
state_band_stats = pd.DataFrame({
    "Population %": state_band_group.count() / population_df.groupby("State")["Salary"].count().reindex(state_band_group.count().index, level=0).values * 100,
    "Average Salary": state_band_group.mean(),
    "Median Salary": state_band_group.median(),
    "Population Count": state_band_group.count()
}).reset_index()
