In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("online_store_sales.csv")

# 1. Classify orders as "Low", "Medium", or "High Value" based on Order_Amount
def classify_order(value):
    if value < 100:
        return "Low"
    elif 100 <= value <= 500:
        return "Medium"
    else:
        return "High"

df['Order_Value_Category'] = df['Order_Amount'].apply(classify_order)

# 2. Group by Category to aggregate:
#    - Total Revenue
#    - Average Order Amount
#    - Total Quantity Sold
category_summary = df.groupby('Category').agg(
    Total_Revenue=('Order_Amount', 'sum'),
    Average_Order_Amount=('Order_Amount', 'mean'),
    Total_Quantity_Sold=('Quantity', 'sum')
).reset_index()

# 3. Group customers by City to analyze spending trends
city_spending = df.groupby('City').agg(
    Total_Revenue=('Order_Amount', 'sum'),
    Average_Spending_Per_Order=('Order_Amount', 'mean'),
    Total_Orders=('Order_ID', 'count')
).reset_index()

# 4. Create a pivot table to compare total revenue by Category and City
revenue_pivot = pd.pivot_table(
    df,
    values='Order_Amount',
    index='City',
    columns='Category',
    aggfunc='sum',
    fill_value=0
)

# 5. Find top-selling products based on total quantity sold
top_selling_products = df.groupby('Product_Name').agg(
    Total_Quantity_Sold=('Quantity', 'sum')
).sort_values(by='Total_Quantity_Sold', ascending=False).reset_index()

# Optional: Convert Purchase_Date to datetime for further analysis (e.g., trends over time)
df['Purchase_Date'] = pd.to_datetime(df['Purchase_Date'])

# ---- Display outputs ----
print("\n📦 Order Value Classification:")
print(df[['Order_ID', 'Order_Amount', 'Order_Value_Category']])

print("\n📊 Sales Summary by Category:")
print(category_summary)

print("\n🌆 City-Wise Customer Spending:")
print(city_spending)

print("\n📉 Pivot Table - Revenue by Category and City:")
print(revenue_pivot)

print("\n🏆 Top-Selling Products:")
print(top_selling_products.head(10))
