In [1]:
import glob
import pandas as pd

### Calculate Airport Departure Density

In [3]:
ref_date = pd.to_datetime('2014-01-01')

In [4]:
def departure_data_reader(airport):
    # read in csv files in the csv_flight folder, filter the data and combine them into one csv file
    csv_files = glob.glob('csv_flight/report_*.csv')

    # initialize an empty DataFrame to hold the cleaned DataFrames
    departure_df = pd.DataFrame()

    for file in csv_files:
        df = pd.read_csv(file, low_memory=False)
        # filter the data before combing them
        df_cleaned = df[df['Origin'].isin(airport)]

        # combine the cleaned DataFrame with the combined dataframe
        departure_df = pd.concat([departure_df, df_cleaned], ignore_index=True)
        
        # only choose 3 columns in the combined dataset
        departure_df = departure_df[['Origin', 'FlightDate', 'DepTime']].copy()
        
        # create the absolute day and minutes reference
        departure_df['DaysSince'] = (pd.to_datetime(departure_df['FlightDate']) - ref_date).dt.days
        departure_df['MinSince'] = departure_df['DaysSince'] * 1440 + (departure_df['DepTime'] // 100) * 60 + (departure_df['DepTime'] % 100)
        
        # sort data by MinSince
        departure_df = departure_df.sort_values('MinSince')

    print("Final shape of departure dataframe:", departure_df.shape)
    
    return departure_df

In [5]:
atl_departure_df = departure_data_reader(['ATL'])

Final shape of departure dataframe: (1888279, 5)


In [6]:
atl_departure_df.head()

Unnamed: 0,Origin,FlightDate,DepTime,DaysSince,MinSince
0,ATL,2014-01-01,557.0,0,357.0
1,ATL,2014-01-01,612.0,0,372.0
2,ATL,2014-01-01,625.0,0,385.0
3,ATL,2014-01-01,629.0,0,389.0
4,ATL,2014-01-01,636.0,0,396.0


In [7]:
def departure_density_calculator(airport_df):
    # Define the start and end datetime
    start_date = pd.to_datetime('2014-01-01')
    end_date = pd.to_datetime('2018-12-31 23:59')

    # Create a date range of minutes
    all_minutes = pd.date_range(start=start_date, end=end_date, freq='T')

    # Create DataFrame from the range
    all_minutes_df = pd.DataFrame(all_minutes, columns=['FullMinutes'])
    all_minutes_df['MinSince'] = (all_minutes_df['FullMinutes'] - ref_date).dt.total_seconds() // 60
    
    # calculate departure counts of the airport dataframe
    departure_counts = airport_df.groupby('MinSince').size().reset_index(name='DepartureCount')
    
    # merge DepartureCount into Full Minutes Range Dataframe
    full_minute_df = all_minutes_df.merge(departure_counts, on='MinSince', how='left')
    full_minute_df['DepartureCount'].fillna(0, inplace=True)
    
    # calculate the departure density for every minutes from 2014-01-01 to 2018-12-31
    full_minute_df['DepartureDensity'] = full_minute_df['DepartureCount'].rolling(window=120, min_periods=1, center=True).sum()
    full_minute_df = full_minute_df[['MinSince', 'DepartureCount', 'DepartureDensity']].copy()
    
    return full_minute_df

In [8]:
atl_departure_density = departure_density_calculator(atl_departure_df)

In [9]:
# check the density value of the first day
atl_departure_density.loc[atl_departure_density['MinSince'] == 331]

Unnamed: 0,MinSince,DepartureCount,DepartureDensity
331,331.0,0.0,4.0


In [10]:
atl_departure_density.to_csv('csv_density/ATL_departure.csv', index=False) 

In [11]:
# calculate departure density for all 9 main airports
airports = ['ATL', 'ORD', 'DFW', 'DEN', 'CLT', 'LAX', 'IAH', 'PHX', 'SFO']

for airport in airports:
    departure_df = departure_data_reader([airport])
    departure_density = departure_density_calculator(departure_df)
    departure_density.to_csv(f'csv_density/{airport}_departure.csv', index=False)

Final shape of departure dataframe: (1888279, 5)
Final shape of departure dataframe: (1444067, 5)
Final shape of departure dataframe: (1195459, 5)
Final shape of departure dataframe: (1123686, 5)
Final shape of departure dataframe: (679227, 5)
Final shape of departure dataframe: (1083099, 5)
Final shape of departure dataframe: (774212, 5)
Final shape of departure dataframe: (807807, 5)
Final shape of departure dataframe: (851909, 5)


### Calculate Airport Arrival Density

In [13]:
ref_date = pd.to_datetime('2014-01-01')

In [14]:
def arrival_data_reader(airport):
    # read in multiple csv files in the csv_flight folder, filter the data and combine them into one csv file
    csv_files = glob.glob('csv_flight/report_*.csv')

    # initialize an empty DataFrame to hold the cleaned DataFrames
    arrival_df = pd.DataFrame()

    for file in csv_files:
        df = pd.read_csv(file, low_memory=False)
        # filter the data before combing them
        df_cleaned = df[df['Dest'].isin(airport)]

        # combine the cleaned DataFrame with the combined dataframe
        arrival_df = pd.concat([arrival_df, df_cleaned], ignore_index=True)
        
        # only choose 4 columns in the combined dataset
        arrival_df = arrival_df[['Dest', 'FlightDate', 'DepTime', 'ActualElapsedTime']].copy()
        arrival_df['DaysSince'] = (pd.to_datetime(arrival_df['FlightDate']) - ref_date).dt.days
        arrival_df['MinSince'] = arrival_df['DaysSince'] * 1440 + (arrival_df['DepTime'] // 100) * 60 + (arrival_df['DepTime'] % 100) + arrival_df['ActualElapsedTime']
        
        arrival_df = arrival_df.sort_values('MinSince')

    print("Final shape of arrival dataframe:", arrival_df.shape)
    
    return arrival_df

In [15]:
atl_arrival_df = arrival_data_reader(['ATL'])

Final shape of arrival dataframe: (1888267, 6)


In [16]:
atl_arrival_df.head()

Unnamed: 0,Dest,FlightDate,DepTime,ActualElapsedTime,DaysSince,MinSince
0,ATL,2014-01-01,50.0,151.0,0,201.0
1,ATL,2014-01-01,34.0,189.0,0,223.0
2,ATL,2014-01-01,22.0,205.0,0,227.0
3,ATL,2014-01-01,17.0,230.0,0,247.0
4,ATL,2014-01-01,13.0,261.0,0,274.0


In [17]:
def arrival_density_calculator(airport_df):
    # Define the start and end datetime
    start_date = pd.to_datetime('2014-01-01')
    end_date = pd.to_datetime('2018-12-31 23:59')

    # Create a date range of minutes
    all_minutes = pd.date_range(start=start_date, end=end_date, freq='T')

    # Create DataFrame from the range
    all_minutes_df = pd.DataFrame(all_minutes, columns=['FullMinutes'])
    all_minutes_df['MinSince'] = (all_minutes_df['FullMinutes'] - ref_date).dt.total_seconds() // 60
    
    # calculate arrival counts of the airport dataframe
    arrival_counts = airport_df.groupby('MinSince').size().reset_index(name='ArrivalCount')
    
    # merge ArrivalCount into Full Minutes Range Dataframe
    full_minute_df = all_minutes_df.merge(arrival_counts, on='MinSince', how='left')
    full_minute_df['ArrivalCount'].fillna(0, inplace=True)
    
    # calculate the arrival density for every minutes from 2014-01-01 to 2018-12-31
    full_minute_df['ArrivalDensity'] = full_minute_df['ArrivalCount'].rolling(window=120, min_periods=1, center=True).sum()
    full_minute_df = full_minute_df[['MinSince', 'ArrivalCount', 'ArrivalDensity']].copy()
    
    return full_minute_df

In [18]:
atl_arrival_density = arrival_density_calculator(atl_arrival_df)

In [19]:
# check the density value of the first day
atl_arrival_density.loc[atl_arrival_density['MinSince'] == 164]

Unnamed: 0,MinSince,ArrivalCount,ArrivalDensity
164,164.0,0.0,0.0


In [20]:
atl_arrival_density.to_csv('csv_density/ATL_arrival.csv', index=False) 

In [21]:
# calculate arrival density for all 9 main airports
airports = ['ATL', 'ORD', 'DFW', 'DEN', 'CLT', 'LAX', 'IAH', 'PHX', 'SFO']

for airport in airports:
    arrival_df = arrival_data_reader([airport])
    arrival_density = arrival_density_calculator(arrival_df)
    arrival_density.to_csv(f'csv_density/{airport}_arrival.csv', index=False)

Final shape of arrival dataframe: (1888267, 6)
Final shape of arrival dataframe: (1444099, 6)
Final shape of arrival dataframe: (1195436, 6)
Final shape of arrival dataframe: (1123681, 6)
Final shape of arrival dataframe: (679233, 6)
Final shape of arrival dataframe: (1083176, 6)
Final shape of arrival dataframe: (774243, 6)
Final shape of arrival dataframe: (807825, 6)
Final shape of arrival dataframe: (852030, 6)
