In [1]:
import pandas as pd
import numpy as np

# **Loading datasets**

In [45]:
dfEmp = pd.read_csv(r"/content/employees.csv")
dfPro = pd.read_csv(r"/content/products.csv")
dfSales = pd.read_csv(r"/content/sales.csv")
dfStor = pd.read_csv(r"/content/stores.csv")

# **Checking for Null values & cleaning**

In [46]:
print(f"Null values: {dfEmp.isna().sum().any()}")
print(f"Null values: {dfPro.isna().sum().any()}")
print(f"Null values: {dfSales.isna().sum().any()}")
print(f"Null values: {dfStor.isna().sum().any()}")

Null values: False
Null values: False
Null values: True
Null values: False


In [47]:
dfSales = dfSales.dropna()

In [48]:
print(f"Null values: {dfSales.isna().sum().any()}")

Null values: False


# **Calculate revenue, discount percentage & profit margins**

In [50]:
df = dfPro.merge(dfSales, on="productID", how="inner")

In [64]:
df["revenue"] = df["quantity"] * df["sellprice"]
df["profit"] = df["revenue"] - (df["quantity"] * df["cost"])
df["discountPercentage"] = round(100 - (df["sellprice"] / df["price"]) * 100, 2)
df["totalCost"] = df["quantity"] * df["cost"]
df["profitMargins"] = round((df["profit"] / df["revenue"]), 2)

In [66]:
df

Unnamed: 0,productID,name,category,price,sellprice,cost,createdAt,saleID,storeID,employeeID,quantity,saleDate,revenue,profit,discountPercentage,profitMargins,totalCost
0,1,Laptop Pro 14,Electronics,1200.0,999,900,58:32.6,1,1,1,2.0,59:59.6,1998.0,198.0,16.75,0.1,1800.0
1,2,Organic Apples,Grocery,3.5,3,2,58:32.6,2,2,2,100.0,59:59.6,300.0,100.0,14.29,0.33,200.0
2,3,Cotton T-Shirt,Apparel,25.0,21,10,58:32.6,3,3,3,30.0,59:59.6,630.0,330.0,16.0,0.52,300.0
3,4,Bluetooth Speaker,Electronics,60.0,55,40,58:32.6,4,4,4,5.0,59:59.6,275.0,75.0,8.33,0.27,200.0
4,5,LED Bulb Pack,Home Goods,15.0,12,8,58:32.6,5,5,5,20.0,02-06-2025 11:00,240.0,80.0,20.0,0.33,160.0


In [89]:
productSummary = df.groupby("productID").agg(
    productRevenue = pd.NamedAgg(column="revenue", aggfunc="sum")
).merge(dfPro[["productID", "name"]], on="productID", how="inner") \
.sort_values("productRevenue", ascending=False)

storeSummary = df.groupby("storeID").agg(
    storeRevenue = pd.NamedAgg(column="revenue", aggfunc="sum")
).merge(dfStor[["storeID", "name"]], on="storeID", how="inner") \
.sort_values("storeRevenue", ascending=False)

In [100]:
print("------------------Product Summary--------------------------")
print(productSummary[["name", "productRevenue"]].iloc[:5, :])
print("-----------------------------------------------------------\n")

print("------------------Store Summary----------------------------")
print(storeSummary[["name", "storeRevenue"]].iloc[:5, :])
print("-----------------------------------------------------------\n")

------------------Product Summary--------------------------
                name  productRevenue
0      Laptop Pro 14          1998.0
2     Cotton T-Shirt           630.0
1     Organic Apples           300.0
3  Bluetooth Speaker           275.0
4      LED Bulb Pack           240.0
-----------------------------------------------------------

------------------Store Summary----------------------------
                name  storeRevenue
0    Urban Mart - NY        1998.0
2    FreshStore - TX         630.0
1     SuperSave - LA         300.0
3      MegaMart - IL         275.0
4  BudgetBazaar - FL         240.0
-----------------------------------------------------------



# **Deliverables**

*   Cleaned dataset
*   Summarizing key metrics



In [101]:
# 1. cleaned dataset with calculated fields
df.to_csv("summary.csv")

In [112]:
storeSummary.iloc[0, :]["name"]

'Urban Mart - NY'

In [114]:
# 2. summarizing key metrics - totalRevenue, totalProfit, topSellingProduct, underSellingProduct, averageSaleValue

totalRevenue = df["revenue"].sum()
totalProfit = df["profit"].sum()

topProduct = productSummary.iloc[0, :]["name"]
bottomProduct = productSummary.iloc[-1, :]["name"]

topStore = storeSummary.iloc[0, :]["name"]
bottomStore = storeSummary.iloc[-1, :]["name"]

report = pd.DataFrame(
    {
        "totalRevenue": totalRevenue,
        "totalProfit": totalProfit,
        "topProduct": topProduct,
        "bottomProduct": bottomProduct,
        "topStore": topStore,
        "bottomStore": bottomStore
    },
    index=[0]
    )

In [125]:
print("------------------------------------Key metrics------------------------------------------------")
report

------------------------------------Key metrics------------------------------------------------


Unnamed: 0,totalRevenue,totalProfit,topProduct,bottomProduct,topStore,bottomStore
0,3443.0,783.0,Laptop Pro 14,LED Bulb Pack,Urban Mart - NY,BudgetBazaar - FL


In [127]:
report.to_csv("key_metrics_report.csv")