In [1]:
import numpy as np
import pandas as pd

In [2]:
dates = pd.date_range(start='2024-01-01', end='2025-08-31', freq='D')
products = ['Laptop', 'Smartphone', 'Airpods', 'Tablet', 'Headphones']
regions = ['North', 'South', 'East', 'West']

In [3]:
sales_df = pd.DataFrame({
    'Date': np.random.choice(dates, 1000),
    'Region': np.random.choice(regions, 1000),
    'Product': np.random.choice(products, 1000),
    'Number of Sales': np.random.randint(100, 3000, 1000),
    'Units': np.random.randint(1, 10, 1000),
}, index=np.arange(1, 1001))

sales_df['Revenue'] = sales_df['Number of Sales'] * sales_df['Units']

sales_df

Unnamed: 0,Date,Region,Product,Number of Sales,Units,Revenue
1,2024-08-13,North,Headphones,222,1,222
2,2025-01-13,West,Laptop,1806,1,1806
3,2024-05-08,East,Laptop,849,6,5094
4,2025-01-31,East,Tablet,1823,7,12761
5,2024-01-15,North,Headphones,2201,8,17608
...,...,...,...,...,...,...
996,2025-06-17,West,Smartphone,1509,8,12072
997,2024-08-13,North,Smartphone,2913,2,5826
998,2024-01-14,West,Laptop,1100,9,9900
999,2025-04-04,East,Laptop,1271,2,2542


In [4]:
sales_df.describe()

Unnamed: 0,Date,Number of Sales,Units,Revenue
count,1000,1000.0,1000.0,1000.0
mean,2024-10-29 15:17:16.799999744,1548.435,4.957,7736.494
min,2024-01-01 00:00:00,113.0,1.0,125.0
25%,2024-05-16 00:00:00,795.5,3.0,2457.0
50%,2024-10-25 00:00:00,1523.0,5.0,5757.5
75%,2025-04-05 06:00:00,2318.75,7.0,11610.75
max,2025-08-31 00:00:00,2999.0,9.0,26991.0
std,,863.378441,2.625651,6375.444787


In [5]:
sales_df

Unnamed: 0,Date,Region,Product,Number of Sales,Units,Revenue
1,2024-08-13,North,Headphones,222,1,222
2,2025-01-13,West,Laptop,1806,1,1806
3,2024-05-08,East,Laptop,849,6,5094
4,2025-01-31,East,Tablet,1823,7,12761
5,2024-01-15,North,Headphones,2201,8,17608
...,...,...,...,...,...,...
996,2025-06-17,West,Smartphone,1509,8,12072
997,2024-08-13,North,Smartphone,2913,2,5826
998,2024-01-14,West,Laptop,1100,9,9900
999,2025-04-04,East,Laptop,1271,2,2542


In [6]:
sales_df.groupby('Product')['Revenue'].sum()

Product
Airpods       1519861
Headphones    1673863
Laptop        1561454
Smartphone    1590886
Tablet        1390430
Name: Revenue, dtype: int64

In [7]:
product_summary = sales_df.groupby('Product').agg({
    'Number of Sales': 'mean',
    'Revenue': 'sum'
})
product_summary

Unnamed: 0_level_0,Number of Sales,Revenue
Product,Unnamed: 1_level_1,Unnamed: 2_level_1
Airpods,1512.064356,1519861
Headphones,1476.490991,1673863
Laptop,1633.559783,1561454
Smartphone,1535.057971,1590886
Tablet,1604.783784,1390430


In [8]:
sales_df.groupby(['Product', 'Region']).agg({
    'Revenue': 'mean',
    'Number of Sales': 'sum'
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Revenue,Number of Sales
Product,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
Airpods,East,8044.533333,68962
Airpods,North,7442.464286,84968
Airpods,South,7168.039216,75333
Airpods,West,7510.18,76174
Headphones,East,6712.392157,65107
Headphones,North,7933.684211,90455
Headphones,South,7225.333333,79949
Headphones,West,8152.383333,92270
Laptop,East,8244.5,77245
Laptop,North,8625.15,60418


In [9]:
sales_df.groupby(['Product', 'Region']).agg({
    'Revenue': 'mean',
    'Number of Sales': 'sum'
}).unstack(level='Region')

Unnamed: 0_level_0,Revenue,Revenue,Revenue,Revenue,Number of Sales,Number of Sales,Number of Sales,Number of Sales
Region,East,North,South,West,East,North,South,West
Product,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Airpods,8044.533333,7442.464286,7168.039216,7510.18,68962,84968,75333,76174
Headphones,6712.392157,7933.684211,7225.333333,8152.383333,65107,90455,79949,92270
Laptop,8244.5,8625.15,8570.65,8533.678571,77245,60418,67704,95208
Smartphone,7682.839286,7212.704545,7896.422222,7870.145161,89230,64892,68780,94855
Tablet,6674.22,7584.222222,7209.210526,8875.285714,73993,60021,82537,80334


In [10]:
sales_df.groupby(['Product', 'Region']).agg({
    'Revenue': 'mean',
    'Number of Sales': 'sum'
}).unstack(level='Product')

Unnamed: 0_level_0,Revenue,Revenue,Revenue,Revenue,Revenue,Number of Sales,Number of Sales,Number of Sales,Number of Sales,Number of Sales
Product,Airpods,Headphones,Laptop,Smartphone,Tablet,Airpods,Headphones,Laptop,Smartphone,Tablet
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
East,8044.533333,6712.392157,8244.5,7682.839286,6674.22,68962,65107,77245,89230,73993
North,7442.464286,7933.684211,8625.15,7212.704545,7584.222222,84968,90455,60418,64892,60021
South,7168.039216,7225.333333,8570.65,7896.422222,7209.210526,75333,79949,67704,68780,82537
West,7510.18,8152.383333,8533.678571,7870.145161,8875.285714,76174,92270,95208,94855,80334


In [11]:
sales_df

Unnamed: 0,Date,Region,Product,Number of Sales,Units,Revenue
1,2024-08-13,North,Headphones,222,1,222
2,2025-01-13,West,Laptop,1806,1,1806
3,2024-05-08,East,Laptop,849,6,5094
4,2025-01-31,East,Tablet,1823,7,12761
5,2024-01-15,North,Headphones,2201,8,17608
...,...,...,...,...,...,...
996,2025-06-17,West,Smartphone,1509,8,12072
997,2024-08-13,North,Smartphone,2913,2,5826
998,2024-01-14,West,Laptop,1100,9,9900
999,2025-04-04,East,Laptop,1271,2,2542


In [12]:
def get_month(date):
    return date.strftime('%b')

month_sales_summary = sales_df.groupby([sales_df['Date'].apply(get_month), 'Product'])['Number of Sales'].sum().unstack(level='Date')
month_sales_summary

Date,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Airpods,14356,42117,25031,30660,22397,30439,28115,46508,23447,17776,18937,5654
Headphones,30720,17255,15073,26831,38529,36468,30385,49303,33678,9387,11632,28520
Laptop,38402,20848,17544,22840,25126,43501,29042,26169,36368,13878,16406,10451
Smartphone,34564,45998,15273,29092,22184,37260,33004,25215,40281,12051,13243,9592
Tablet,43654,21214,10895,25607,31984,22222,32683,26617,32650,8932,14275,26152


In [13]:
month_sales_summary.loc['Airpods'].max()

np.int64(46508)

In [33]:
# To find a record (and the related feature) when you have the data:
month_sales_summary.loc['Airpods'][month_sales_summary.loc['Airpods'] == month_sales_summary.loc['Airpods'].max()]

Date
Mar    46508
Name: Airpods, dtype: int64

In [15]:
month_sales_summary.loc['Airpods']

Date
Apr    14356
Aug    42117
Dec    25031
Feb    30660
Jan    22397
Jul    30439
Jun    28115
Mar    46508
May    23447
Nov    17776
Oct    18937
Sep     5654
Name: Airpods, dtype: int64

In [16]:
def get_summary(df, groupby, column):
    return df.groupby(groupby)[column].agg([
        'count', 'sum', 'mean', 'median', 'min', 'max', 'std'
    ])
    
get_summary(sales_df, 'Product', 'Revenue')

Unnamed: 0_level_0,count,sum,mean,median,min,max,std
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Airpods,202,1519861,7524.064356,5793.5,125,26973,6007.524096
Headphones,222,1673863,7539.923423,5331.0,129,26991,6615.437272
Laptop,184,1561454,8486.163043,6604.0,126,26226,6899.877454
Smartphone,207,1590886,7685.439614,5400.0,160,25713,6455.983285
Tablet,185,1390430,7515.837838,6426.0,248,25470,5828.459019


In [17]:
sales_df

Unnamed: 0,Date,Region,Product,Number of Sales,Units,Revenue
1,2024-08-13,North,Headphones,222,1,222
2,2025-01-13,West,Laptop,1806,1,1806
3,2024-05-08,East,Laptop,849,6,5094
4,2025-01-31,East,Tablet,1823,7,12761
5,2024-01-15,North,Headphones,2201,8,17608
...,...,...,...,...,...,...
996,2025-06-17,West,Smartphone,1509,8,12072
997,2024-08-13,North,Smartphone,2913,2,5826
998,2024-01-14,West,Laptop,1100,9,9900
999,2025-04-04,East,Laptop,1271,2,2542


In [None]:
sales_df.groupby('Product').get_group('Laptop')

Unnamed: 0,Date,Region,Product,Number of Sales,Units,Revenue
2,2025-01-13,West,Laptop,1806,1,1806
3,2024-05-08,East,Laptop,849,6,5094
7,2024-12-03,West,Laptop,2124,9,19116
8,2025-01-14,West,Laptop,2594,2,5188
9,2025-04-10,South,Laptop,826,5,4130
...,...,...,...,...,...,...
983,2024-09-20,North,Laptop,1440,7,10080
984,2025-06-19,West,Laptop,174,3,522
994,2025-02-06,East,Laptop,434,9,3906
998,2024-01-14,West,Laptop,1100,9,9900


In [27]:
sales_df.groupby('Product').get_group('Laptop').groupby('Region').get_group('South')

Unnamed: 0,Date,Region,Product,Number of Sales,Units,Revenue
9,2025-04-10,South,Laptop,826,5,4130
18,2025-05-21,South,Laptop,698,5,3490
34,2024-05-31,South,Laptop,286,7,2002
81,2024-12-17,South,Laptop,2635,8,21080
86,2024-01-29,South,Laptop,1271,7,8897
147,2025-01-05,South,Laptop,2848,4,11392
194,2025-04-30,South,Laptop,1103,3,3309
197,2024-05-24,South,Laptop,425,5,2125
250,2024-09-18,South,Laptop,1472,2,2944
252,2025-05-30,South,Laptop,2283,4,9132


In [26]:
sales_df.groupby('Product').get_group('Laptop').groupby('Region').get_group('South')['Revenue'].sum()

np.int64(342826)