In [1]:
import numpy as np
import pandas as pd

np.random.seed(24)

n = 48
cities   = np.random.choice(['Kyiv','Lviv','Odesa'], size=n)
couriers = np.random.choice(['C1','C2','C3'], size=n)
package_id = np.arange(1001, 1001+n)

# ваги посилок (кг) і відстані (км)
weights = np.round(np.random.uniform(0.3, 12.0, size=n), 2)
dist_km = np.round(np.random.gamma(shape=2.2, scale=4.5, size=n), 2)

# ймовірність вчасної доставки трішки відрізняється між містами
p_on_time = np.where(cities=='Kyiv', 0.88, np.where(cities=='Lviv', 0.83, 0.86))
on_time = np.random.rand(n) < p_on_time

couriers_df = pd.DataFrame({
    'package_id': package_id,
    'city': cities,
    'courier': couriers,
    'weight_kg': weights,
    'distance_km': dist_km,
    'on_time': on_time
})

couriers_df

Unnamed: 0,package_id,city,courier,weight_kg,distance_km,on_time
0,1001,Odesa,C1,3.53,5.32,False
1,1002,Kyiv,C3,10.34,14.97,True
2,1003,Lviv,C2,10.7,27.32,True
3,1004,Lviv,C1,3.64,10.43,True
4,1005,Lviv,C1,8.02,1.35,True
5,1006,Kyiv,C3,11.67,18.47,True
6,1007,Kyiv,C3,9.62,7.63,True
7,1008,Odesa,C3,2.4,14.71,True
8,1009,Lviv,C2,9.48,9.73,False
9,1010,Odesa,C1,11.65,2.72,False


In [5]:
couriers_df['city_distance_sum'] = couriers_df.groupby('city')['distance_km'].transform("sum")
couriers_df['distance_share'] = couriers_df['distance_km'] / couriers_df['city_distance_sum']
couriers_df


Unnamed: 0,package_id,city,courier,weight_kg,distance_km,on_time,city_distance_sum,distance_shared,distance_share
0,1001,Odesa,C1,3.53,5.32,False,113.58,0.046839,0.046839
1,1002,Kyiv,C3,10.34,14.97,True,167.81,0.089208,0.089208
2,1003,Lviv,C2,10.7,27.32,True,209.67,0.1303,0.1303
3,1004,Lviv,C1,3.64,10.43,True,209.67,0.049745,0.049745
4,1005,Lviv,C1,8.02,1.35,True,209.67,0.006439,0.006439
5,1006,Kyiv,C3,11.67,18.47,True,167.81,0.110065,0.110065
6,1007,Kyiv,C3,9.62,7.63,True,167.81,0.045468,0.045468
7,1008,Odesa,C3,2.4,14.71,True,113.58,0.129512,0.129512
8,1009,Lviv,C2,9.48,9.73,False,209.67,0.046406,0.046406
9,1010,Odesa,C1,11.65,2.72,False,113.58,0.023948,0.023948


In [6]:
grp = couriers_df.groupby(['city','courier'])['distance_km']
mu = grp.transform('mean')
sigma = grp.transform('std')

z = (couriers_df['distance_km'] - mu)
z = z.where(sigma == 0, z / sigma)  # if sigma!=0 -> divide; else leave diff (should be 0)
z = z.where(sigma != 0, 0.0)

couriers_df['zscore_distance'] = z
couriers_df

Unnamed: 0,package_id,city,courier,weight_kg,distance_km,on_time,city_distance_sum,distance_shared,distance_share,zscore_distance
0,1001,Odesa,C1,3.53,5.32,False,113.58,0.046839,0.046839,-0.093755
1,1002,Kyiv,C3,10.34,14.97,True,167.81,0.089208,0.089208,0.819983
2,1003,Lviv,C2,10.7,27.32,True,209.67,0.1303,0.1303,0.935961
3,1004,Lviv,C1,3.64,10.43,True,209.67,0.049745,0.049745,0.16055
4,1005,Lviv,C1,8.02,1.35,True,209.67,0.006439,0.006439,-1.191764
5,1006,Kyiv,C3,11.67,18.47,True,167.81,0.110065,0.110065,1.538665
6,1007,Kyiv,C3,9.62,7.63,True,167.81,0.045468,0.045468,-0.687198
7,1008,Odesa,C3,2.4,14.71,True,113.58,0.129512,0.129512,0.76607
8,1009,Lviv,C2,9.48,9.73,False,209.67,0.046406,0.046406,-0.685665
9,1010,Odesa,C1,11.65,2.72,False,113.58,0.023948,0.023948,-0.860302


In [7]:
couriers_df['city_on_time_rate'] = couriers_df.groupby('city')['on_time'].transform('mean')
sample3 = couriers_df.groupby('city').head(3)[['city','package_id','on_time','city_on_time_rate']]
sample3

Unnamed: 0,city,package_id,on_time,city_on_time_rate
0,Odesa,1001,False,0.733333
1,Kyiv,1002,True,0.615385
2,Lviv,1003,True,0.8
3,Lviv,1004,True,0.8
4,Lviv,1005,True,0.8
5,Kyiv,1006,True,0.615385
6,Kyiv,1007,True,0.615385
7,Odesa,1008,True,0.733333
9,Odesa,1010,False,0.733333


In [10]:
def heaviest_row(g):
    row = g.loc[g['weight_kg'].idxmax(), ['weight_kg','distance_km','package_id']]
    return row

heaviest = couriers_df.groupby(['city','courier']).apply(heaviest_row)
heaviest

  heaviest = couriers_df.groupby(['city','courier']).apply(heaviest_row)


Unnamed: 0_level_0,Unnamed: 1_level_0,weight_kg,distance_km,package_id
city,courier,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kyiv,C1,7.88,14.57,1043
Kyiv,C2,7.69,29.15,1016
Kyiv,C3,11.67,18.47,1006
Lviv,C1,11.35,13.77,1034
Lviv,C2,10.7,27.32,1003
Lviv,C3,10.98,5.14,1018
Odesa,C1,11.65,2.72,1010
Odesa,C2,5.43,3.97,1036
Odesa,C3,6.69,13.42,1028
