In [426]:
import pandas as pd

# Ex03. Выборки и агрегации

## 1. Чтение файла auto.json

In [427]:
df = pd.read_json("data/auto.json", orient='records')
df = df.set_index('CarNumber')

## 2. Выборка

### 2.1. Строки, где fines превышает 2100

In [428]:
df[df['Fines'] > 2100]

Unnamed: 0_level_0,Refund,Fines,Make,Model
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Y163O8161RUS,2,3200.000000,Ford,Focus
E432XX77RUS,1,6500.000000,Toyota,Camry
92918M178RUS,1,5700.000000,Ford,Focus
H234YH197RUS,2,6000.000000,Ford,Focus
E40577152RUS,1,8594.586466,Ford,Focus
...,...,...,...,...
O718MM163RUS,2,8594.586466,Ford,Focus
7065C8197RUS,2,11400.000000,Volkswagen,Passat
O22097197RUS,1,24300.000000,Ford,Focus
M0309X197RUS,1,22300.000000,Ford,Focus


### 2.2. Строки, где fines превышает 2100, а refund равен 2

In [429]:
df[(df['Fines'] > 2100) & (df['Refund'] == 2)]

Unnamed: 0_level_0,Refund,Fines,Make,Model
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Y163O8161RUS,2,3200.000000,Ford,Focus
H234YH197RUS,2,6000.000000,Ford,Focus
707987163RUS,2,2200.000000,Ford,Focus
K330T8197RUS,2,8200.000000,Skoda,Octavia
M592CH197RUS,2,8594.586466,Skoda,Octavia
...,...,...,...,...
O136HO197RUS,2,7800.000000,Toyota,Corolla
O68897197RUS,2,12300.000000,Ford,Focus
O718MM163RUS,2,8594.586466,Ford,Focus
7065C8197RUS,2,11400.000000,Volkswagen,Passat


### 2.3. Строки, в которых находятся модели Focus и Corolla

In [430]:
df[df['Model'].isin(['Focus', 'Corolla'])]

Unnamed: 0_level_0,Refund,Fines,Make,Model
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Y163O8161RUS,2,3200.000000,Ford,Focus
7184TT36RUS,1,2100.000000,Ford,Focus
X582HE161RUS,2,2000.000000,Ford,Focus
92918M178RUS,1,5700.000000,Ford,Focus
H234YH197RUS,2,6000.000000,Ford,Focus
...,...,...,...,...
Y163O8161RUS,2,1600.000000,Ford,Focus
M0309X197RUS,1,22300.000000,Ford,Focus
O673E8197RUS,2,600.000000,Ford,Focus
8610T8154RUS,1,2000.000000,Ford,Focus


### 2.3. Строки с номерами Y7689C197RUS, 92928M178RUS, 7788KT197RUS, H115YO163RUS, X758HY197RUS

In [431]:
df[df.index.isin(['Y7689C197RUS', '92928M178RUS', '7788KT197RUS', 'H115YO163RUS', 'X758HY197RUS'])]

Unnamed: 0_level_0,Refund,Fines,Make,Model
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92928M178RUS,1,8594.586466,Ford,Focus
H115YO163RUS,1,2200.0,Ford,Focus
7788KT197RUS,2,12000.0,Ford,Focus
X758HY197RUS,2,24200.0,Ford,Focus
X758HY197RUS,2,72600.0,Ford,Focus
Y7689C197RUS,1,27000.0,Ford,Focus
92928M178RUS,1,600.0,Ford,Focus
H115YO163RUS,1,8594.586466,Ford,Focus
H115YO163RUS,2,1100.0,Ford,Focus
7788KT197RUS,2,8594.586466,Ford,Focus


## 3. Агрегации по make и model

### 3.1. Медианные штрафы, сгруппированные по make

In [432]:
df.groupby(['Make'], dropna=False).agg({
    'Fines': 'median'
})

Unnamed: 0_level_0,Fines
Make,Unnamed: 1_level_1
Audi,4200.0
BMW,6500.0
Ford,3500.0
Skoda,3250.0
Toyota,7700.0
Volkswagen,4300.0
Volvo,8500.0


### 3.2. Медианные штрафы, сгруппированные по make и model

In [433]:
gb_make_n_model = df.fillna('').groupby(['Make', 'Model'], dropna=False)
gb_make_n_model.agg({
    'Fines': 'median'
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Fines
Make,Model,Unnamed: 2_level_1
Audi,,4200.0
BMW,,6500.0
Ford,Focus,3500.0
Ford,Mondeo,7650.0
Skoda,Octavia,3250.0
Toyota,Camry,7700.0
Toyota,Corolla,7700.0
Volkswagen,,7400.0
Volkswagen,Golf,4800.0
Volkswagen,Jetta,2800.0


### 3.3. Количество штрафов, сгруппированные по make и model

In [434]:
gb_make_n_model.agg({
    'Fines': 'count'
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Fines
Make,Model,Unnamed: 2_level_1
Audi,,1
BMW,,3
Ford,Focus,575
Ford,Mondeo,6
Skoda,Octavia,48
Toyota,Camry,16
Toyota,Corolla,18
Volkswagen,,3
Volkswagen,Golf,20
Volkswagen,Jetta,6


### 3.4. Минимальные и максимальные штрафы, сгруппированные по make и model

In [435]:
gb_make_n_model.agg({
    'Fines': ['max', 'min']
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Fines,Fines
Unnamed: 0_level_1,Unnamed: 1_level_1,max,min
Make,Model,Unnamed: 2_level_2,Unnamed: 3_level_2
Audi,,4200.0,4200.0
BMW,,8594.586466,3000.0
Ford,Focus,180000.0,100.0
Ford,Mondeo,46200.0,1100.0
Skoda,Octavia,145000.0,300.0
Toyota,Camry,22400.0,500.0
Toyota,Corolla,34300.0,900.0
Volkswagen,,7900.0,1300.0
Volkswagen,Golf,168000.0,200.0
Volkswagen,Jetta,46000.0,500.0


### 3.4. Стандартное отклонение штрафов, сгруппированных по make и model

In [436]:
gb_make_n_model.agg({
    'Fines': 'std'
}).fillna('-')

Unnamed: 0_level_0,Unnamed: 1_level_0,Fines
Make,Model,Unnamed: 2_level_1
Audi,,-
BMW,,2826.561226
Ford,Focus,15041.269437
Ford,Mondeo,18987.329108
Skoda,Octavia,24339.742174
Toyota,Camry,6410.250654
Toyota,Corolla,9629.325617
Volkswagen,,3674.688195
Volkswagen,Golf,36950.83995
Volkswagen,Jetta,17743.026799


## 4. Агрегации с car number

### 4.1. Номера автомобилей, сгруппированные по количеству штрафов

In [437]:
fines_counts = pd.DataFrame(df.groupby('CarNumber').size())
fines_counts = fines_counts.rename(columns = {0: 'FinesCount'})
fines_counts = fines_counts.sort_values('FinesCount', ascending=False)
fines_counts

Unnamed: 0_level_0,FinesCount
CarNumber,Unnamed: 1_level_1
Y7689C197RUS,4
92928M178RUS,4
7788KT197RUS,4
H115YO163RUS,3
X758HY197RUS,3
...,...
E42577152RUS,1
E42377152RUS,1
E41977152RUS,1
E41577152RUS,1


### 4.2. Строки, соответствующие первому номеру в списке

In [438]:
df.loc[fines_counts.idxmax()]

Unnamed: 0_level_0,Refund,Fines,Make,Model
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Y7689C197RUS,1,27000.0,Ford,Focus
Y7689C197RUS,2,9000.0,Ford,Focus
Y7689C197RUS,2,45000.0,Ford,Focus
Y7689C197RUS,1,36000.0,Ford,Focus


### 4.3. Номера автомобилей, сгруппированных по сумме штрафов

In [439]:
fines_sum = df.groupby('CarNumber').agg({'Fines': 'sum'}).sort_values('Fines', ascending=False)
fines_sum

Unnamed: 0_level_0,Fines
CarNumber,Unnamed: 1_level_1
X758HY197RUS,242000.0
9020YC197RUS,217500.0
M0279X197RUS,216000.0
Y352O8197RUS,207200.0
Y778EE197RUS,192000.0
...,...
83218C154RUS,100.0
Y195O8161RUS,100.0
K376HE161RUS,100.0
705787163RUS,100.0


### 4.4. Строки, соответствующие первому номеру в списке

In [440]:
df.loc[fines_sum.idxmax()]

Unnamed: 0_level_0,Refund,Fines,Make,Model
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
X758HY197RUS,2,24200.0,Ford,Focus
X758HY197RUS,2,72600.0,Ford,Focus
X758HY197RUS,2,145200.0,Ford,Focus


### 4.5. Номера автомобилей, связанные с различными моделями

In [441]:
result = df[['Model']].groupby('CarNumber').agg({'Model': 'count'}).rename(columns = {'Model': 'ModelCount'})
result = result.sort_values('ModelCount', ascending=False)
result[result['ModelCount'] > 1]

Unnamed: 0_level_0,ModelCount
CarNumber,Unnamed: 1_level_1
7788KT197RUS,4
Y7689C197RUS,4
92928M178RUS,4
7361C8197RUS,3
X758HY197RUS,3
...,...
Y273O8197RUS,2
89587X197RUS,2
90159H178RUS,2
9020YC197RUS,2
