In [8]:
import numpy as np
import pandas as pd

## Basic Aggregations

In [9]:

data = {
    'Product': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Region': ['North', 'South', 'East', 'West', 'North', 'South', 'East', 'West', 'North'],
    'Units_Sold': [10, 15, 20, 5, 10, 15, 10, 5, 15],
    'Price': [100, 200, 150, 100, 200, 150, 100, 200, 150]
}
df_sales = pd.DataFrame(data)

In [10]:
df_sales['Revenue'] = df_sales['Units_Sold'] * df_sales['Price']
print(df_sales)

  Product Region  Units_Sold  Price  Revenue
0       A  North          10    100     1000
1       B  South          15    200     3000
2       C   East          20    150     3000
3       A   West           5    100      500
4       B  North          10    200     2000
5       C  South          15    150     2250
6       A   East          10    100     1000
7       B   West           5    200     1000
8       C  North          15    150     2250


In [11]:
total_units = df_sales['Units_Sold'].sum()
total_revenue = df_sales['Revenue'].sum()
average_units = df_sales['Units_Sold'].mean()
average_revenue = df_sales['Revenue'].mean()

print(f"Total Units Sold: {total_units}")
print(f"Total Revenue: {total_revenue}")
print(f"Average Units Sold: {average_units}")
print(f"Average Revenue: {average_revenue}")

Total Units Sold: 105
Total Revenue: 16000
Average Units Sold: 11.666666666666666
Average Revenue: 1777.7777777777778


## Grouping Data

In [12]:
product_group = df_sales.groupby('Product').agg({'Units_Sold': 'sum', 'Revenue': 'sum'})
print(product_group)

         Units_Sold  Revenue
Product                     
A                25     2500
B                30     6000
C                50     7500


In [13]:
region_group = df_sales.groupby('Region').agg({'Units_Sold': 'mean', 'Revenue': 'mean'})
print(region_group)


        Units_Sold  Revenue
Region                     
East     15.000000   2000.0
North    11.666667   1750.0
South    15.000000   2625.0
West      5.000000    750.0


In [14]:
product_region_group = df_sales.groupby(['Product', 'Region']).agg({'Units_Sold': 'sum', 'Revenue': 'sum'})
print(product_region_group)


                Units_Sold  Revenue
Product Region                     
A       East            10     1000
        North           10     1000
        West             5      500
B       North           10     2000
        South           15     3000
        West             5     1000
C       East            20     3000
        North           15     2250
        South           15     2250


## Sorting and Ranking Data

In [15]:
sorted_units = df_sales.sort_values(by='Units_Sold', ascending=True)
print(sorted_units)

  Product Region  Units_Sold  Price  Revenue
3       A   West           5    100      500
7       B   West           5    200     1000
4       B  North          10    200     2000
0       A  North          10    100     1000
6       A   East          10    100     1000
1       B  South          15    200     3000
5       C  South          15    150     2250
8       C  North          15    150     2250
2       C   East          20    150     3000


In [16]:
sorted_revenue = df_sales.sort_values(by='Revenue', ascending=False)
print(sorted_revenue)


  Product Region  Units_Sold  Price  Revenue
1       B  South          15    200     3000
2       C   East          20    150     3000
8       C  North          15    150     2250
5       C  South          15    150     2250
4       B  North          10    200     2000
0       A  North          10    100     1000
6       A   East          10    100     1000
7       B   West           5    200     1000
3       A   West           5    100      500


In [17]:
df_sales['Revenue_Rank'] = df_sales['Revenue'].rank(ascending=False)
print(df_sales)


  Product Region  Units_Sold  Price  Revenue  Revenue_Rank
0       A  North          10    100     1000           7.0
1       B  South          15    200     3000           1.5
2       C   East          20    150     3000           1.5
3       A   West           5    100      500           9.0
4       B  North          10    200     2000           5.0
5       C  South          15    150     2250           3.5
6       A   East          10    100     1000           7.0
7       B   West           5    200     1000           7.0
8       C  North          15    150     2250           3.5


## Merging DataFrames

In [19]:
product_data = {
    'Product': ['A', 'B', 'C'],
    'Category': ['Electronics', 'Furniture', 'Appliances']
}
df_products = pd.DataFrame(product_data)

In [20]:
df_merged = pd.merge(df_sales, df_products, on='Product')
print(df_merged)


  Product Region  Units_Sold  Price  Revenue  Revenue_Rank     Category
0       A  North          10    100     1000           7.0  Electronics
1       B  South          15    200     3000           1.5    Furniture
2       C   East          20    150     3000           1.5   Appliances
3       A   West           5    100      500           9.0  Electronics
4       B  North          10    200     2000           5.0    Furniture
5       C  South          15    150     2250           3.5   Appliances
6       A   East          10    100     1000           7.0  Electronics
7       B   West           5    200     1000           7.0    Furniture
8       C  North          15    150     2250           3.5   Appliances


In [21]:
category_group = df_merged.groupby('Category').agg({'Revenue': 'sum'})
print(category_group)


             Revenue
Category            
Appliances      7500
Electronics     2500
Furniture       6000
