In [1]:
import pandas as pd
import numpy as np

In [2]:
# List of years
years = ['2019', '2020', '2021', '2022']

# Read the 'airport_codes_master_analysis.csv' file
df_master = pd.read_csv('airport_codes_master_analysis.csv')

# Create DataFrame using 'ORIGIN_AIRPORT_ID' from master DataFrame
output_df = df_master[['ORIGIN_AIRPORT_ID', 'CODE', 'ORIGIN_STATE_ABR', 'NAME', 'LOCATION']].copy()
output_df.set_index('ORIGIN_AIRPORT_ID', inplace=True)

# Initialize totals to zero
for year in years:
    output_df[f'total_{year}'] = np.nan

for year in years:
    # Define the path
    path = f'time_series_deps/time_series_deps_airport_{year}.csv'
    
    # Read the CSV file
    df = pd.read_csv(path)
    
    # Extract 'total' column and rename it with the corresponding year
    df_total = df.set_index('ORIGIN_AIRPORT_ID')['total'].rename(f'total_{year}')
    
    # Update the values in output_df with the actual totals
    output_df.update(df_total)

# Replace NaN values with zero
output_df.fillna(0, inplace=True)

# Reset index
output_df.reset_index(inplace=True)

# Save the DataFrame to a CSV file
output_df.to_csv('total_annual_deps_airports.csv', index=False)


In [3]:
output_df

Unnamed: 0,ORIGIN_AIRPORT_ID,CODE,ORIGIN_STATE_ABR,NAME,LOCATION,total_2019,total_2020,total_2021,total_2022
0,10005,05A,AK,Little Squaw Airport,Little Squaw,2.0,0.0,0.0,0.0
1,10006,06A,AK,Kizhuyak Bay,Kizhuyak,0.0,0.0,0.0,1.0
2,10011,GCW,AZ,Grand Canyon West,Peach Springs,0.0,0.0,0.0,58.0
3,10039,A26,AK,Ivotuk Airstrip,Ivotuk,0.0,0.0,1.0,0.0
4,10042,A29,AK,Kiluda Bay Airport,Kiluda Bay,2.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
808,16869,XWA,ND,Williston Basin International,Williston,405.0,854.0,1132.0,1481.0
809,16879,T4X,TX,Llano Municipal,Llano,0.0,0.0,0.0,26.0
810,16923,1AK,AK,Mertarvik Quarry Road Landing Strip,Mertarvik,0.0,0.0,0.0,30.0
811,16929,2AK,AK,Deer Park Airport,Deer Park,0.0,0.0,9.0,7.0


In [4]:
airports354_df = pd.read_csv('above10K_airports_list.csv')


In [5]:
# Filter output_df to only include rows where 'ORIGIN_AIRPORT_ID' is in airports354_df
output_df = output_df[output_df['ORIGIN_AIRPORT_ID'].isin(airports354_df['ORIGIN_AIRPORT_ID'])]


In [6]:
output_df


Unnamed: 0,ORIGIN_AIRPORT_ID,CODE,ORIGIN_STATE_ABR,NAME,LOCATION,total_2019,total_2020,total_2021,total_2022
7,10135,ABE,PA,Lehigh Valley International,Allentown/Bethlehem/Easton,6505.0,3782.0,5312.0,5091.0
8,10136,ABI,TX,Abilene Regional,Abilene,2312.0,1695.0,2394.0,1924.0
10,10140,ABQ,NM,Albuquerque International Sunport,Albuquerque,27807.0,16098.0,19070.0,22334.0
11,10141,ABR,SD,Aberdeen Regional,Aberdeen,740.0,690.0,754.0,700.0
12,10146,ABY,GA,Southwest Georgia Regional,Albany,1013.0,748.0,1051.0,961.0
...,...,...,...,...,...,...,...,...,...
765,15624,VPS,FL,Eglin AFB Destin Fort Walton Beach,Valparaiso,8833.0,7089.0,11713.0,8706.0
783,15855,WST,RI,Westerly State,Westerly,4110.0,3236.0,4032.0,3679.0
790,15919,XNA,AR,Northwest Arkansas National,Fayetteville,15445.0,8386.0,10284.0,11989.0
793,16101,YKM,WA,Yakima Air Terminal/McAllister Field,Yakima,1224.0,726.0,752.0,600.0


In [7]:
output_df.to_csv('above10K_total_annual_deps_airports.csv', index=False)