In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('annual_time_series_by_actypes@airports.csv')

In [3]:
df.columns

Index(['ORIGIN_AIRPORT_ID', 'ORIGIN_STATE_ABR', 'CODE', 'NAME',
       'AIRCRAFT_TYPE', 'Description', '2019', '2020', '2021', '2022'],
      dtype='object')

In [4]:
# First, let's create the total_2019 and total_2022
df['total_2019'] = df.groupby('ORIGIN_AIRPORT_ID')['2019'].transform('sum')
df['total_2022'] = df.groupby('ORIGIN_AIRPORT_ID')['2022'].transform('sum')

# Now, let's create a mask for the AIRCRAFT_TYPE values of interest
mask = df['AIRCRAFT_TYPE'].isin([628, 629, 674, 675, 676])

# Use this mask to sum the values in 2019 and 2022 for these specific AIRCRAFT_TYPEs
df['total_50s_2019'] = df.loc[mask].groupby('ORIGIN_AIRPORT_ID')['2019'].transform('sum')
df['total_50s_2022'] = df.loc[mask].groupby('ORIGIN_AIRPORT_ID')['2022'].transform('sum')

# Fill NaN values (where the AIRCRAFT_TYPE is not one of the ones specified) with 0
df['total_50s_2019'].fillna(0, inplace=True)
df['total_50s_2022'].fillna(0, inplace=True)

# Calculate the percentages
df['pct_50_2019'] = df['total_50s_2019'] / df['total_2019'] * 100
df['pct_50_2022'] = df['total_50s_2022'] / df['total_2022'] * 100

# Finally, let's create the output_df
output_df = df[['ORIGIN_AIRPORT_ID', 'ORIGIN_STATE_ABR', 'CODE', 'NAME', 'total_2019', 'total_2022', 'total_50s_2019', 'pct_50_2019', 'total_50s_2022', 'pct_50_2022']].drop_duplicates()


In [5]:
output_df

Unnamed: 0,ORIGIN_AIRPORT_ID,ORIGIN_STATE_ABR,CODE,NAME,total_2019,total_2022,total_50s_2019,pct_50_2019,total_50s_2022,pct_50_2022
0,10397,GA,ATL,"Atlanta, GA: Hartsfield-Jackson Atlanta Intern...",395557,314165,0.0,0.000000,0.0,0.000000
13,10397,GA,ATL,"Atlanta, GA: Hartsfield-Jackson Atlanta Intern...",395557,314165,37002.0,9.354404,17658.0,5.620613
33,13930,IL,ORD,"Chicago, IL: Chicago O'Hare International",392793,291702,0.0,0.000000,0.0,0.000000
44,13930,IL,ORD,"Chicago, IL: Chicago O'Hare International",392793,291702,122896.0,31.287727,67913.0,23.281637
69,11292,CO,DEN,"Denver, CO: Denver International",288617,277694,0.0,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...
4943,14092,NC,PGV,"Greenville, NC: Pitt Greenville",1484,786,0.0,0.000000,0.0,0.000000
4973,11415,TX,DRT,"Del Rio, TX: Del Rio International",714,683,0.0,0.000000,0.0,0.000000
4979,12129,MN,HIB,"Hibbing, MN: Range Regional",627,640,0.0,0.000000,0.0,0.000000
4989,10409,SD,ATY,"Watertown, SD: Watertown Regional",502,623,0.0,0.000000,0.0,0.000000


In [6]:
output_df.to_csv('50_airports.csv', index=False)

In [7]:
df1 = pd.read_csv('50_airports_cleaned.csv')

In [8]:
df1

Unnamed: 0,ORIGIN_AIRPORT_ID,ORIGIN_STATE_ABR,CODE,NAME,total_2019,total_2022,total_50s_2019,pct_50_2019,total_50s_2022,pct_50_2022
0,10397,GA,ATL,"Atlanta, GA: Hartsfield-Jackson Atlanta Intern...",395557,314165,37002,9.354404,17658,5.620613
1,13930,IL,ORD,"Chicago, IL: Chicago O'Hare International",392793,291702,122896,31.287727,67913,23.281637
2,11292,CO,DEN,"Denver, CO: Denver International",288617,277694,57367,19.876515,39491,14.221049
3,11298,TX,DFW,"Dallas/Fort Worth, TX: Dallas/Fort Worth Inter...",302386,272740,32428,10.724041,21377,7.837868
4,12892,CA,LAX,"Los Angeles, CA: Los Angeles International",249333,191194,9584,3.843855,4170,2.181031
...,...,...,...,...,...,...,...,...,...,...
368,12441,WY,JAC,"Jackson, WY: Jackson Hole",4743,4703,0,0.000000,1,0.021263
369,11415,TX,DRT,"Del Rio, TX: Del Rio International",714,683,0,0.000000,0,0.000000
370,12129,MN,HIB,"Hibbing, MN: Range Regional",627,640,0,0.000000,0,0.000000
371,10409,SD,ATY,"Watertown, SD: Watertown Regional",502,623,0,0.000000,0,0.000000


In [9]:
df2 = df1[df1.duplicated(subset='CODE', keep=False)]


In [10]:
df2

Unnamed: 0,ORIGIN_AIRPORT_ID,ORIGIN_STATE_ABR,CODE,NAME,total_2019,total_2022,total_50s_2019,pct_50_2019,total_50s_2022,pct_50_2022
127,12441,WY,JAC,"Jackson, WY: Jackson Hole",4743,4703,0,0.0,0,0.0
245,11308,AL,DHN,"Dothan, AL: Dothan Regional",1382,933,1382,100.0,883,94.640943
252,12007,MS,GTR,"Columbus, MS: Golden Triangle Regional",1188,1018,1188,100.0,1015,99.705305
263,10731,GA,BQK,"Brunswick, GA: Brunswick Golden Isles",1035,870,1035,100.0,870,100.0
268,11274,IA,DBQ,"Dubuque, IA: Dubuque Regional",985,441,985,100.0,441,100.0
274,10728,TX,BPT,"Beaumont/Port Arthur, TX: Jack Brooks Regional",961,782,961,100.0,671,85.805627
285,14006,KY,PAH,"Paducah, KY: Barkley Regional",1239,796,1239,100.0,795,99.874372
288,11013,MI,CIU,"Sault Ste. Marie, MI: Chippewa County Internat...",667,709,667,100.0,709,100.0
299,11415,TX,DRT,"Del Rio, TX: Del Rio International",714,683,714,100.0,268,39.238653
311,14534,WY,RIW,"Riverton/Lander, WY: Central Wyoming Regional",1020,391,0,0.0,0,0.0
