In [2]:
import pandas as pd
 
df = pd.read_csv("../data/sales_data.csv")

In [3]:
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())

Dataset shape: (5, 5)

First few rows:
   order_id region   product  quantity  price
0      1001   East  Keyboard       2.0   1500
1      1002   West     Mouse       5.0    500
2      1003   East   Monitor       NaN  12000
3      1004  South  Keyboard       1.0   1500
4      1005   West   Monitor       2.0  12000


In [6]:
df["revenue"] = df["quantity"] * df["price"]

print(df["revenue"])

0     3000.0
1     2500.0
2        NaN
3     1500.0
4    24000.0
Name: revenue, dtype: float64


In [7]:
print("Dataset with revenue:")
print(df[["order_id", "product", "quantity", "price", "revenue"]])

Dataset with revenue:
   order_id   product  quantity  price  revenue
0      1001  Keyboard       2.0   1500   3000.0
1      1002     Mouse       5.0    500   2500.0
2      1003   Monitor       NaN  12000      NaN
3      1004  Keyboard       1.0   1500   1500.0
4      1005   Monitor       2.0  12000  24000.0


In [8]:
product_revenue = df.groupby("product")["revenue"].sum()

In [9]:
print(product_revenue)

product
Keyboard     4500.0
Monitor     24000.0
Mouse        2500.0
Name: revenue, dtype: float64


In [10]:
product_revenue_df = product_revenue.reset_index()
product_revenue_df.columns = ["product", "total_revenue"]
print("\nRevenue by Product (DataFrame):")
print(product_revenue_df)


Revenue by Product (DataFrame):
    product  total_revenue
0  Keyboard         4500.0
1   Monitor        24000.0
2     Mouse         2500.0


In [11]:
region_revenue = df.groupby("region")["revenue"].sum()

In [12]:

print("Revenue by Region:")
print(region_revenue)

Revenue by Region:
region
East      3000.0
South     1500.0
West     26500.0
Name: revenue, dtype: float64


In [13]:
region_revenue_df = region_revenue.reset_index()
region_revenue_df.columns = ["region", "total_revenue"]
print("\nRevenue by Region (DataFrame):")
print(region_revenue_df)


Revenue by Region (DataFrame):
  region  total_revenue
0   East         3000.0
1  South         1500.0
2   West        26500.0


In [14]:
product_revenue_sorted = product_revenue.sort_values(ascending=False)
print("Products sorted by revenue:")
print(product_revenue_sorted)

Products sorted by revenue:
product
Monitor     24000.0
Keyboard     4500.0
Mouse        2500.0
Name: revenue, dtype: float64


In [15]:
top_products = product_revenue_sorted.head(2)
print("\nTop 2 Products by Revenue:")
print(top_products)


Top 2 Products by Revenue:
product
Monitor     24000.0
Keyboard     4500.0
Name: revenue, dtype: float64


In [16]:
top_products_df = top_products.reset_index()
top_products_df.columns = ["product", "total_revenue"]
print("\nTop 2 Products (DataFrame):")
print(top_products_df)


Top 2 Products (DataFrame):
    product  total_revenue
0   Monitor        24000.0
1  Keyboard         4500.0


In [17]:
product_revenue_df.to_csv("revenue_by_product.csv", index=False)
print("Saved: revenue_by_product.csv")

Saved: revenue_by_product.csv


In [18]:
region_revenue_df.to_csv("revenue_by_region.csv", index=False)
print("Saved: revenue_by_region.csv")

Saved: revenue_by_region.csv


In [19]:
top_products_df.to_csv("top_products.csv", index=False)
print("Saved: top_products.csv")

Saved: top_products.csv


In [None]:
def generate_sales_analytics(input_file):
    """
    Generate sales analytics summaries from a sales dataset.
    """
    # Load data
    df = pd.read_csv(input_file)
    
    # Calculate revenue
    df["revenue"] = df["quantity"] * df["price"]
    
    # Revenue by product
    product_revenue = df.groupby("product")["revenue"].sum().reset_index()
    product_revenue.columns = ["product", "total_revenue"]
    product_revenue.to_csv("revenue_by_product.csv", index=False)
    
    # Revenue by region
    region_revenue = df.groupby("region")["revenue"].sum().reset_index()
    region_revenue.columns = ["region", "total_revenue"]
    region_revenue.to_csv("revenue_by_region.csv", index=False)
    
    # Top products
    top_products = df.groupby("product")["revenue"].sum().sort_values(ascending=False).head(2).reset_index()
    top_products.columns = ["product", "total_revenue"]
    top_products.to_csv("top_products.csv", index=False)
    
    print("Analytics generated successfully!")
    print(f"Total revenue: {df['revenue'].sum():,.2f}")
    print(f"Average order value: {df['revenue'].mean():,.2f}")
    
    return product_revenue, region_revenue, top_products

product_rev, region_rev, top_prod = generate_sales_analytics("../data/sales_data.csv")

