In [1]:
import numpy as np
import pandas as pd

np.random.seed(7)

cities   = ['Kyiv','Kyiv','Kyiv','Lviv','Lviv','Lviv','Odesa','Odesa','Odesa','Kyiv','Lviv','Odesa']
shops    = ['K1','K1','K2','L1','L2','L2','O1','O1','O2','K2','L1','O2']
products = ['espresso','latte','croissant','espresso','tea','croissant','latte','espresso','tea','tea','latte','croissant']
category_map = {'espresso':'drink','latte':'drink','tea':'drink','croissant':'food'}
price_map    = {'espresso':2.50,'latte':3.80,'tea':1.90,'croissant':2.20}

qty = np.random.randint(10, 50, size=len(products))

sales = pd.DataFrame({
    'city': cities,
    'shop': shops,
    'product': products,
    'category': [category_map[p] for p in products],
    'price': [price_map[p] for p in products],
    'qty': qty
})

sales

Unnamed: 0,city,shop,product,category,price,qty
0,Kyiv,K1,espresso,drink,2.5,14
1,Kyiv,K1,latte,drink,3.8,35
2,Kyiv,K2,croissant,food,2.2,13
3,Lviv,L1,espresso,drink,2.5,29
4,Lviv,L2,tea,drink,1.9,33
5,Lviv,L2,croissant,food,2.2,49
6,Odesa,O1,latte,drink,3.8,38
7,Odesa,O1,espresso,drink,2.5,24
8,Odesa,O2,tea,drink,1.9,33
9,Kyiv,K2,tea,drink,1.9,18


In [2]:
sales['revenue'] = sales['price'] * sales['qty']
sales

Unnamed: 0,city,shop,product,category,price,qty,revenue
0,Kyiv,K1,espresso,drink,2.5,14,35.0
1,Kyiv,K1,latte,drink,3.8,35,133.0
2,Kyiv,K2,croissant,food,2.2,13,28.6
3,Lviv,L1,espresso,drink,2.5,29,72.5
4,Lviv,L2,tea,drink,1.9,33,62.7
5,Lviv,L2,croissant,food,2.2,49,107.8
6,Odesa,O1,latte,drink,3.8,38,144.4
7,Odesa,O1,espresso,drink,2.5,24,60.0
8,Odesa,O2,tea,drink,1.9,33,62.7
9,Kyiv,K2,tea,drink,1.9,18,34.2


In [3]:
sales.groupby('city')['qty'].sum()

city
Kyiv      80
Lviv     146
Odesa    131
Name: qty, dtype: int64

In [4]:
sales.groupby('city')['revenue'].sum()

city
Kyiv     230.8
Lviv     376.0
Odesa    346.3
Name: revenue, dtype: float64

In [5]:
sales.groupby('city')['price'].mean()

city
Kyiv     2.6
Lviv     2.6
Odesa    2.6
Name: price, dtype: float64

In [8]:
def value_range(series):
    return series.max() - series.min()

print(sales.groupby('city')[['price','revenue']].agg(['mean','std', value_range]))

      price                      revenue                       
       mean      std value_range    mean        std value_range
city                                                           
Kyiv    2.6  0.83666         1.9  57.700  50.280679       104.4
Lviv    2.6  0.83666         1.9  94.000  32.421084        70.3
Odesa   2.6  0.83666         1.9  86.575  39.473060        84.4


In [10]:
city_category = sales.groupby(['city', 'category'])['revenue'].sum()
city_category

city   category
Kyiv   drink       202.2
       food         28.6
Lviv   drink       268.2
       food        107.8
Odesa  drink       267.1
       food         79.2
Name: revenue, dtype: float64

In [12]:
q06=sales.groupby('city')['price'].quantile(0.6)
q06

city
Kyiv     2.44
Lviv     2.44
Odesa    2.44
Name: price, dtype: float64

In [17]:
for name, grp in sales.groupby('city'):
    print(name)
    print(len(grp['product'].unique()))
    print(grp.loc[grp['qty'].idxmax()]['product'])
    print("___________________________")


Kyiv
4
latte
___________________________
Lviv
4
croissant
___________________________
Odesa
4
latte
___________________________


In [18]:
sales.groupby('city').mean(numeric_only=True).add_prefix('mean_')

Unnamed: 0_level_0,mean_price,mean_qty,mean_revenue
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kyiv,2.6,20.0,57.7
Lviv,2.6,36.5,94.0
Odesa,2.6,32.75,86.575
