In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/flights_sample_3m.csv')

In [3]:
df.groupby('AIRLINE')['DEP_DELAY'].mean()

AIRLINE
Alaska Airlines Inc.                   4.640018
Allegiant Air                         13.907797
American Airlines Inc.                12.609895
Delta Air Lines Inc.                   8.106748
Endeavor Air Inc.                      5.951135
Envoy Air                              6.728256
ExpressJet Airlines LLC d/b/a aha!    12.774462
Frontier Airlines Inc.                16.033574
Hawaiian Airlines Inc.                 5.089537
Horizon Air                            4.832775
JetBlue Airways                       18.322555
Mesa Airlines Inc.                    12.277383
PSA Airlines Inc.                      7.972124
Republic Airline                       5.804359
SkyWest Airlines Inc.                  9.458294
Southwest Airlines Co.                10.816957
Spirit Air Lines                      12.981880
United Air Lines Inc.                 11.217032
Name: DEP_DELAY, dtype: float64

In [4]:
df.groupby('AIRLINE').size()

AIRLINE
Alaska Airlines Inc.                  100467
Allegiant Air                          52738
American Airlines Inc.                383106
Delta Air Lines Inc.                  395239
Endeavor Air Inc.                     112463
Envoy Air                             121256
ExpressJet Airlines LLC d/b/a aha!     19082
Frontier Airlines Inc.                 64466
Hawaiian Airlines Inc.                 32114
Horizon Air                            20634
JetBlue Airways                       112844
Mesa Airlines Inc.                     65012
PSA Airlines Inc.                     107050
Republic Airline                      143107
SkyWest Airlines Inc.                 343737
Southwest Airlines Co.                576470
Spirit Air Lines                       95711
United Air Lines Inc.                 254504
dtype: int64

In [5]:
df.groupby(['ORIGIN', 'AIRLINE'])['DEP_DELAY'].mean()

ORIGIN  AIRLINE                           
ABE     Allegiant Air                         11.752688
        Delta Air Lines Inc.                  -1.093023
        Endeavor Air Inc.                     13.398950
        Envoy Air                              5.747126
        ExpressJet Airlines LLC d/b/a aha!    -3.818182
                                                ...    
YAK     Alaska Airlines Inc.                   0.183544
YKM     Horizon Air                            3.283784
YUM     Envoy Air                              6.400000
        Mesa Airlines Inc.                    11.870504
        SkyWest Airlines Inc.                  7.250375
Name: DEP_DELAY, Length: 2301, dtype: float64

In [6]:
print(df.groupby(['ORIGIN', 'AIRLINE'])['DEP_DELAY'].mean().reset_index())

     ORIGIN                             AIRLINE  DEP_DELAY
0       ABE                       Allegiant Air  11.752688
1       ABE                Delta Air Lines Inc.  -1.093023
2       ABE                   Endeavor Air Inc.  13.398950
3       ABE                           Envoy Air   5.747126
4       ABE  ExpressJet Airlines LLC d/b/a aha!  -3.818182
...     ...                                 ...        ...
2296    YAK                Alaska Airlines Inc.   0.183544
2297    YKM                         Horizon Air   3.283784
2298    YUM                           Envoy Air   6.400000
2299    YUM                  Mesa Airlines Inc.  11.870504
2300    YUM               SkyWest Airlines Inc.   7.250375

[2301 rows x 3 columns]


Custom Aggregation

In [7]:
print(df.groupby('AIRLINE')['DEP_DELAY'].agg(['mean', 'std']))

                                         mean        std
AIRLINE                                                 
Alaska Airlines Inc.                 4.640018  30.895956
Allegiant Air                       13.907797  66.790906
American Airlines Inc.              12.609895  62.682515
Delta Air Lines Inc.                 8.106748  45.691394
Endeavor Air Inc.                    5.951135  45.331615
Envoy Air                            6.728256  41.436867
ExpressJet Airlines LLC d/b/a aha!  12.774462  71.589832
Frontier Airlines Inc.              16.033574  56.990523
Hawaiian Airlines Inc.               5.089537  36.856980
Horizon Air                          4.832775  27.526428
JetBlue Airways                     18.322555  60.875447
Mesa Airlines Inc.                  12.277383  60.827800
PSA Airlines Inc.                    7.972124  46.302715
Republic Airline                     5.804359  43.501327
SkyWest Airlines Inc.                9.458294  59.529868
Southwest Airlines Co.         

In [8]:
print(df.groupby('AIRLINE').agg({
    'DEP_DELAY': 'mean',
    'ARR_DELAY': 'max',
    'TAXI_OUT': 'median'
}))

                                    DEP_DELAY  ARR_DELAY  TAXI_OUT
AIRLINE                                                           
Alaska Airlines Inc.                 4.640018      805.0      17.0
Allegiant Air                       13.907797     1736.0      12.0
American Airlines Inc.              12.609895     2934.0      16.0
Delta Air Lines Inc.                 8.106748     1241.0      14.0
Endeavor Air Inc.                    5.951135     2560.0      15.0
Envoy Air                            6.728256     1540.0      15.0
ExpressJet Airlines LLC d/b/a aha!  12.774462     1844.0      18.0
Frontier Airlines Inc.              16.033574     1416.0      15.0
Hawaiian Airlines Inc.               5.089537     1642.0      11.0
Horizon Air                          4.832775      757.0      12.0
JetBlue Airways                     18.322555     1815.0      15.0
Mesa Airlines Inc.                  12.277383     2034.0      15.0
PSA Airlines Inc.                    7.972124     1888.0      

In [9]:
df.groupby('AIRLINE')['DEP_DELAY'].agg(
    lambda x: (x > 15).mean()
)

AIRLINE
Alaska Airlines Inc.                  0.150487
Allegiant Air                         0.219007
American Airlines Inc.                0.182399
Delta Air Lines Inc.                  0.137497
Endeavor Air Inc.                     0.114420
Envoy Air                             0.139416
ExpressJet Airlines LLC d/b/a aha!    0.168641
Frontier Airlines Inc.                0.251590
Hawaiian Airlines Inc.                0.133306
Horizon Air                           0.121111
JetBlue Airways                       0.256620
Mesa Airlines Inc.                    0.163308
PSA Airlines Inc.                     0.148445
Republic Airline                      0.125452
SkyWest Airlines Inc.                 0.138676
Southwest Airlines Co.                0.210842
Spirit Air Lines                      0.204114
United Air Lines Inc.                 0.175915
Name: DEP_DELAY, dtype: float64

Group-wise Transformation

In [10]:
df.groupby('AIRLINE')['DEP_DELAY'].transform(
    lambda x: x > x.mean()
)

0          False
1          False
2          False
3          False
4          False
           ...  
2999995    False
2999996    False
2999997     True
2999998    False
2999999    False
Name: DEP_DELAY, Length: 3000000, dtype: bool

Group-wise Filtering

In [11]:
print(df.groupby('AIRLINE').filter(lambda x: len(x) > 300000))

            FL_DATE                 AIRLINE                 AIRLINE_DOT  \
1        2022-11-19    Delta Air Lines Inc.    Delta Air Lines Inc.: DL   
3        2023-03-06    Delta Air Lines Inc.    Delta Air Lines Inc.: DL   
5        2019-07-31  Southwest Airlines Co.  Southwest Airlines Co.: WN   
6        2023-06-11  American Airlines Inc.  American Airlines Inc.: AA   
10       2021-06-11    Delta Air Lines Inc.    Delta Air Lines Inc.: DL   
...             ...                     ...                         ...   
2999994  2020-08-31    Delta Air Lines Inc.    Delta Air Lines Inc.: DL   
2999995  2022-11-13  American Airlines Inc.  American Airlines Inc.: AA   
2999996  2022-11-02  American Airlines Inc.  American Airlines Inc.: AA   
2999997  2022-09-11    Delta Air Lines Inc.    Delta Air Lines Inc.: DL   
2999999  2019-06-15  Southwest Airlines Co.  Southwest Airlines Co.: WN   

        AIRLINE_CODE  DOT_CODE  FL_NUMBER ORIGIN          ORIGIN_CITY DEST  \
1                 DL 

In [12]:
filtered_ds = df.groupby('AIRLINE').filter(lambda x: x['ARR_DELAY'].mean() > 10)
filtered_ds.groupby('AIRLINE')['ARR_DELAY'].mean()

AIRLINE
Allegiant Air                         13.284601
ExpressJet Airlines LLC d/b/a aha!    10.031976
Frontier Airlines Inc.                11.100429
JetBlue Airways                       12.276125
Name: ARR_DELAY, dtype: float64

Group-wise Application

In [13]:
print(df.groupby('AIRLINE').apply(lambda g: g.nlargest(2, 'DEP_DELAY')))

                                               FL_DATE  \
AIRLINE                                                  
Alaska Airlines Inc.               1013642  2019-04-15   
                                   2632062  2022-07-23   
Allegiant Air                      462915   2023-01-03   
                                   2293408  2023-07-23   
American Airlines Inc.             2639771  2022-03-26   
                                   2210421  2023-08-25   
Delta Air Lines Inc.               1948895  2022-09-15   
                                   573761   2019-04-14   
Endeavor Air Inc.                  1976065  2020-03-13   
                                   199049   2019-03-22   
Envoy Air                          1873493  2022-08-07   
                                   1320027  2023-03-19   
ExpressJet Airlines LLC d/b/a aha! 2289028  2019-06-29   
                                   2169546  2019-06-08   
Frontier Airlines Inc.             2969570  2023-07-08   
              

  print(df.groupby('AIRLINE').apply(lambda g: g.nlargest(2, 'DEP_DELAY')))


Pivot Tables

In [14]:
pd.pivot_table(
    df,
    values='DEP_DELAY',
    index='AIRLINE',
    columns='ORIGIN',
    aggfunc='mean'
)

ORIGIN,ABE,ABI,ABQ,ABR,ABY,ACK,ACT,ACV,ACY,ADK,...,VEL,VLD,VPS,WRG,WYS,XNA,XWA,YAK,YKM,YUM
AIRLINE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alaska Airlines Inc.,,,4.056769,,,,,,,11.731707,...,,,,-2.371951,,,,0.183544,,
Allegiant Air,11.752688,,2.282609,,,,,,,,...,,,15.267396,,,22.433447,,,,
American Airlines Inc.,,,11.76311,,,,,,,,...,,,19.074013,,,26.79476,,,,
Delta Air Lines Inc.,-1.093023,,2.70892,,,,,,,,...,,,5.178052,,,6.421053,,,,
Endeavor Air Inc.,13.39895,,,,0.690789,41.782609,,,,,...,,10.492877,3.574468,,,13.532281,,,,
Envoy Air,5.747126,8.019608,3.336449,,,-3.25,5.022298,,,,...,,,5.325153,,,6.657839,,,,6.4
ExpressJet Airlines LLC d/b/a aha!,-3.818182,,5.777778,,,,,,,,...,,,3.579439,,,6.028037,,,,
Frontier Airlines Inc.,,,0.513514,,,,,,,,...,,,,,,9.380952,,,,
Hawaiian Airlines Inc.,,,,,,,,,,,...,,,,,,,,,,
Horizon Air,,,8.25,,,,,,,,...,,,,,,,,,3.283784,
