In [2]:
import pandas as pd
# Step 1: Create Sample Dataset
data_rohit = {
    'Region':['North','South','East','West','North','South','East','West'],
    'Product':['A','B','A','B','C','C','B','A'],
    'Sales': [150,200,300,400,250,180,220,310],
    'Quantity': [10,15,20,25,12,14,16,18],
}

In [3]:
df_rohit = pd.DataFrame(data_rohit)
print("Sample Dataset:\n", df_rohit)

Sample Dataset:
   Region Product  Sales  Quantity
0  North       A    150        10
1  South       B    200        15
2   East       A    300        20
3   West       B    400        25
4  North       C    250        12
5  South       C    180        14
6   East       B    220        16
7   West       A    310        18


In [4]:
# Step 2: Grouping And Aggregation
# Aggregating Sales By Region (Sum Aggregation)
sales_by_region = df_rohit.groupby('Region')['Sales'].sum()
print("\nTotal Sales By Region:\n", sales_by_region)



Total Sales By Region:
 Region
East     520
North    400
South    380
West     710
Name: Sales, dtype: int64


In [5]:
# Aggregating Sales And Quantity By Product (Mean Aggregation)
mean_by_product = df_rohit.groupby('Product')[['Sales', 'Quantity']].mean()
print("\nMean Sales and Quantity by Product:\n", mean_by_product)



Mean Sales and Quantity by Product:
               Sales   Quantity
Product                       
A        253.333333  16.000000
B        273.333333  18.666667
C        215.000000  13.000000


In [6]:
# Aggregating Count Of Sales by Region (Count Aggregation)
count_by_region = df_rohit.groupby('Region')['Sales'].count()
print("\nCount of Sales Records by Region:\n", count_by_region)


Count of Sales Records by Region:
 Region
East     2
North    2
South    2
West     2
Name: Sales, dtype: int64


In [7]:
# Custom Aggregation: Calculate Min and Max Sales by Region
custom_aggregation = df_rohit.groupby('Region')['Sales'].agg(['min','max'])
print("\nCustom Aggregation (Min and Max Sales by Region):\n", custom_aggregation)



Custom Aggregation (Min and Max Sales by Region):
         min  max
Region          
East    220  300
North   150  250
South   180  200
West    310  400


In [8]:
# Step 3: Multi-Level Aggregation
# Aggregating Sales by Region and Product
multi_level_agg = df_rohit.groupby(['Region','Product'])['Sales'].sum()
print("\nSales by Region and Product:\n", multi_level_agg)


Sales by Region and Product:
 Region  Product
East    A          300
        B          220
North   A          150
        C          250
South   B          200
        C          180
West    A          310
        B          400
Name: Sales, dtype: int64


In [9]:
# Step 4: Reset Index for Multi-Level Aggregation
multi_level_agg_reset = multi_level_agg.reset_index()
print("\nSales by Region and Product (Reset Index):\n", multi_level_agg_reset)


Sales by Region and Product (Reset Index):
   Region Product  Sales
0   East       A    300
1   East       B    220
2  North       A    150
3  North       C    250
4  South       B    200
5  South       C    180
6   West       A    310
7   West       B    400


In [10]:
import pandas as pd
# Step 1: Extend Dataset with Time and Spatial Data
data = {
    'Region': ['North', 'South', 'East', 'West', 'North', 'South', 'East', 'West'],
    'City': ['City1', 'City2', 'City3', 'City4', 'City1','City2', 'City3', 'City4'],
    'Product': ['A', 'B', 'A', 'B', 'C', 'C', 'B', 'A'],
    'Sales': [150, 200, 300, 400, 250, 180, 220, 310],
    'Quantity': [10, 15, 20, 25, 12, 14, 16, 18],
    'Date': pd.to_datetime(['2024-01-01', '2024-01-02', '2024-02-01', '2024-02-03',
                            '2024-03-01', '2024-03-02', '2024-04-01', '2024-04-03'])
}

In [11]:
df_rohit = pd.DataFrame(data)
print("Extended Dataset:\n", df_rohit)

Extended Dataset:
   Region   City Product  Sales  Quantity       Date
0  North  City1       A    150        10 2024-01-01
1  South  City2       B    200        15 2024-01-02
2   East  City3       A    300        20 2024-02-01
3   West  City4       B    400        25 2024-02-03
4  North  City1       C    250        12 2024-03-01
5  South  City2       C    180        14 2024-03-02
6   East  City3       B    220        16 2024-04-01
7   West  City4       A    310        18 2024-04-03


In [13]:
# -----------
# Time Aggregation
# ------------

# Step 2: Set Date Column as Index (optional)
df_rohit.set_index('Date',inplace=True)

In [14]:
# Aggregating Sales by Month
monthly_sales = df_rohit.resample('M')['Sales'].sum()
print("\nTotal Sales by Month:\n", monthly_sales)


Total Sales by Month:
 Date
2024-01-31    350
2024-02-29    700
2024-03-31    430
2024-04-30    530
Freq: ME, Name: Sales, dtype: int64


  monthly_sales = df_rohit.resample('M')['Sales'].sum()


In [16]:
# Aggregating Sales by Quarter
quarterly_sales = df_rohit.resample('Q')['Sales'].sum()
print("\nTotal Sales by Quarter:\n", quarterly_sales)


Total Sales by Quarter:
 Date
2024-03-31    1480
2024-06-30     530
Freq: QE-DEC, Name: Sales, dtype: int64


  quarterly_sales = df_rohit.resample('Q')['Sales'].sum()


In [18]:
# Aggregating Sales by Year
yearly_sales = df_rohit.resample('Y')['Sales'].sum()
print("\nTotal Sales by Year:\n", yearly_sales)



Total Sales by Year:
 Date
2024-12-31    2010
Freq: YE-DEC, Name: Sales, dtype: int64


  yearly_sales = df_rohit.resample('Y')['Sales'].sum()


In [19]:
# Reset Index to Restore Original Structure
df_rohit.reset_index(inplace=True)

In [20]:
# -----------------
# Spatial Aggregation
# -----------------

# Step 3: Aggregating Sales by Region
sales_by_region = df_rohit.groupby('Region')['Sales'].sum()
print("\nTotal Sales by Region:\n", sales_by_region)


Total Sales by Region:
 Region
East     520
North    400
South    380
West     710
Name: Sales, dtype: int64


In [22]:
# Aggregating Sales by City
sales_by_city = df_rohit.groupby('City')['Sales'].sum()
print("\nTotal Sales by City:\n", sales_by_city)


Total Sales by City:
 City
City1    400
City2    380
City3    520
City4    710
Name: Sales, dtype: int64


In [23]:
# Aggregating Sales by Region and City
sales_by_region_city = df_rohit.groupby(['Region', 'City'])['Sales'].sum()
print("\nTotal Sales by Region and City:")
print(sales_by_region_city)


Total Sales by Region and City:
Region  City 
East    City3    520
North   City1    400
South   City2    380
West    City4    710
Name: Sales, dtype: int64


In [24]:
# Step 4: Export Spatial Aggregation Results
sales_by_region = sales_by_region_city.reset_index()

# sales_by_region_city_reset.to_csv("spatial_aggregation.csv")
# print("nSpatial aggregation data saved to 'spatial_aggregation.csv", index=False)
# print("n Spatial aggregation data saved to 'spatial_aggregation.csv' ")