In [4]:
import pandas as pd

# Load the dataset
file_path = "Datasets/test_dataset_1.csv"  # Replace with your file path
df = pd.read_csv(file_path)

# Group data by Category and ProductName
grouped_stats = df.groupby(['Category', 'ProductName']).agg(
    Price_Mean=('Price', 'mean'),
    Price_Median=('Price', 'median'),
    Price_StdDev=('Price', 'std'),
    QuantitySold_Mean=('QuantitySold', 'mean'),
    QuantitySold_Median=('QuantitySold', 'median'),
    QuantitySold_StdDev=('QuantitySold', 'std'),
    TotalSales_Sum=('TotalSales', 'sum'),
    TotalSales_Mean=('TotalSales', 'mean'),
    TotalSales_Median=('TotalSales', 'median'),
    TotalSales_StdDev=('TotalSales', 'std'),
    TotalSales_Min=('TotalSales', 'min'),
    TotalSales_Max=('TotalSales', 'max')
).reset_index()

# Rank products within each category based on TotalSales
grouped_stats['Product_Rank_Within_Category'] = grouped_stats.groupby('Category')['TotalSales_Sum'].rank(method='dense', ascending=False)

# Rank categories based on total sales within each category
category_rankings = grouped_stats.groupby('Category')['TotalSales_Sum'].sum().sort_values(ascending=False).reset_index()
category_rankings['Category_Rank'] = category_rankings['TotalSales_Sum'].rank(method='dense', ascending=False)

# Display the descriptive statistics for each product within each category
print("Descriptive Statistics for Each Product in Each Category:")
print(grouped_stats.head(10))  # Displaying only the top 10 for brevity

# Display the rankings of categories
print("\nCategory Rankings:")
print(category_rankings)


Descriptive Statistics for Each Product in Each Category:
      Category  ProductName  Price_Mean  Price_Median  Price_StdDev  \
0  Accessories  Product 101       11.44         11.44           NaN   
1  Accessories  Product 120      114.01        114.01           NaN   
2  Accessories  Product 122       53.70         53.70           NaN   
3  Accessories  Product 134       59.97         59.97           NaN   
4  Accessories  Product 136      126.06        126.06           NaN   
5  Accessories  Product 142       20.67         20.67           NaN   
6  Accessories  Product 155      132.65        132.65           NaN   
7  Accessories  Product 167       11.60         11.60           NaN   
8  Accessories   Product 17       27.54         27.54           NaN   
9  Accessories  Product 178      100.76        100.76           NaN   

   QuantitySold_Mean  QuantitySold_Median  QuantitySold_StdDev  \
0              243.0                243.0                  NaN   
1              450.0        

In [5]:
# Overall Sales Ranking: Rank products by TotalSales
overall_sales_ranking = df[['ProductName', 'Category', 'TotalSales']].sort_values(by='TotalSales', ascending=False)

# Display the top 10 products by total sales
print("Overall Sales Ranking:")
print(overall_sales_ranking.head(10))

# Category-Wise Sales Ranking: Rank products within each Category by TotalSales
category_wise_sales_ranking = df[['ProductName', 'Category', 'TotalSales']].sort_values(by=['Category', 'TotalSales'], ascending=[True, False])

# Display the top product in each category
category_wise_top_products = category_wise_sales_ranking.groupby('Category').first().reset_index()
print("\nCategory-Wise Top Products:")
print(category_wise_top_products)

# Category Performance: Rank categories based on total sales (sum of TotalSales within each category)
category_performance = df.groupby('Category')['TotalSales'].sum().sort_values(ascending=False).reset_index()

# Display the ranked categories
print("\nCategory Performance:")
print(category_performance)


Overall Sales Ranking:
     ProductName     Category  TotalSales
201  Product 202         Home    72504.56
72    Product 73      Kitchen    69715.29
819  Product 820      Kitchen    67953.76
21    Product 22  Accessories    67572.02
38    Product 39  Electronics    67064.40
684  Product 685  Electronics    66379.82
210  Product 211         Toys    65220.90
954  Product 955         Toys    65045.10
386  Product 387         Toys    64926.40
671  Product 672       Sports    64269.18

Category-Wise Top Products:
      Category  ProductName  TotalSales
0  Accessories   Product 22    67572.02
1       Beauty  Product 722    63844.48
2    Beverages   Product 61    63251.28
3        Books   Product 40    63858.45
4  Electronics   Product 39    67064.40
5      Fashion  Product 930    62025.60
6         Home  Product 202    72504.56
7      Kitchen   Product 73    69715.29
8       Sports  Product 672    64269.18
9         Toys  Product 211    65220.90

Category Performance:
      Category  TotalSa