# Flights Table Builder

By Kenneth Burchfiel

Released under the MIT license

This program converts various source files into a table from which airline route maps can be generated.

I plan to add additional documentation at a later date.

**Note**: The following files were not included in the GitHub repository because their size exceeds 100MB:
1. routes_planes_coordinates.csv
2. routes_planes_coordinates_for_mapping.csv
3. 21503323_T_T100_SEGMENT_ALL_CARRIER.csv
4. routes_planes_coordinates_for_mapping_v2.csv

You can instead retrieve these files from my copy of this project on Google Drive, available at https://drive.google.com/drive/folders/1jRTjoZtT6OWCXRTNstG9D4CxmjXe4W8q?usp=sharing .

In [1]:
import pandas as pd
import cartopy.crs as ccrs # https://scitools.org.uk/cartopy/docs/latest/matplotlib/intro.html
import time
import matplotlib.pyplot as plt
import numpy as np

airport_list = pd.read_csv('original_data\\GlobalAirportDatabase.txt',
delimiter=':',names=['icao_code', 'iata_code', 'airport_name', 'city/town', 
'country', 'lat_deg', 'lat_min', 'lat_sec', 'lat_dir', 'lon_deg', 'lon_min', 
'lon_sec', 'lon_dir', 'altitude', 'latitude_dec_deg', 'longitude_dec_deg']) 
# Headers are not included in the CSV, so they are instead based on those in the
# dictionary here:
# https://www.partow.net/miscellaneous/airportdatabase/index.html . License:
# "Free use of The Global Airport Database is permitted under the guidelines and
# in accordance with the MIT License." (See
# https://www.partow.net/miscellaneous/airportdatabase/index.html#GlobalAirportDatabaseLicense
# )

airport_list

Unnamed: 0,icao_code,iata_code,airport_name,city/town,country,lat_deg,lat_min,lat_sec,lat_dir,lon_deg,lon_min,lon_sec,lon_dir,altitude,latitude_dec_deg,longitude_dec_deg
0,AYGA,GKA,GOROKA,GOROKA,PAPUA NEW GUINEA,6,4,54,S,145,23,30,E,1610,-6.082,145.392
1,AYLA,LAE,,LAE,PAPUA NEW GUINEA,0,0,0,U,0,0,0,U,0,0.000,0.000
2,AYMD,MAG,MADANG,MADANG,PAPUA NEW GUINEA,5,12,25,S,145,47,19,E,7,-5.207,145.789
3,AYMH,HGU,MOUNT HAGEN,MOUNT HAGEN,PAPUA NEW GUINEA,5,49,34,S,144,17,46,E,1643,-5.826,144.296
4,AYNZ,LAE,NADZAB,NADZAB,PAPUA NEW GUINEA,6,34,11,S,146,43,34,E,73,-6.570,146.726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9295,ZYTK,,,SHENYANG,CHINA,0,0,0,U,0,0,0,U,0,0.000,0.000
9296,ZYTL,DLC,ZHOUSHUIZI,DALIAN,CHINA,38,57,56,N,121,32,18,E,33,38.966,121.538
9297,ZYXC,,,XIANCHENG,CHINA,0,0,0,U,0,0,0,U,0,0.000,0.000
9298,ZYYC,,,YICHUN,CHINA,0,0,0,U,0,0,0,U,0,0.000,0.000


In [2]:
origin_airport_list_for_merge = airport_list[['iata_code', 'latitude_dec_deg', 
'longitude_dec_deg']].copy()
origin_airport_list_for_merge.rename(columns={'iata_code': 'origin_iata_code',
'latitude_dec_deg':'origin_lat','longitude_dec_deg':'origin_lon'},inplace=True)
origin_airport_list_for_merge

Unnamed: 0,origin_iata_code,origin_lat,origin_lon
0,GKA,-6.082,145.392
1,LAE,0.000,0.000
2,MAG,-5.207,145.789
3,HGU,-5.826,144.296
4,LAE,-6.570,146.726
...,...,...,...
9295,,0.000,0.000
9296,DLC,38.966,121.538
9297,,0.000,0.000
9298,,0.000,0.000


In [3]:
destination_airport_list_for_merge = airport_list[['iata_code', 
'latitude_dec_deg', 'longitude_dec_deg']].copy()
destination_airport_list_for_merge.rename(columns={'iata_code': 
'destination_iata_code','latitude_dec_deg':'destination_lat',
'longitude_dec_deg':'destination_lon'},inplace=True)
destination_airport_list_for_merge

Unnamed: 0,destination_iata_code,destination_lat,destination_lon
0,GKA,-6.082,145.392
1,LAE,0.000,0.000
2,MAG,-5.207,145.789
3,HGU,-5.826,144.296
4,LAE,-6.570,146.726
...,...,...,...
9295,,0.000,0.000
9296,DLC,38.966,121.538
9297,,0.000,0.000
9298,,0.000,0.000


In [4]:
route_list = pd.read_csv(
    'original_data\\21503323_T_T100_SEGMENT_ALL_CARRIER.csv', 
    low_memory = False) # Source: Air Carriers : T-100 Segment (All Carriers);
    # 2018 'All months' data. Link:
    # https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FMG
route_list

Unnamed: 0,DEPARTURES_SCHEDULED,DEPARTURES_PERFORMED,PAYLOAD,SEATS,PASSENGERS,FREIGHT,MAIL,DISTANCE,RAMP_TO_RAMP,AIR_TIME,...,AIRCRAFT_GROUP,AIRCRAFT_TYPE,AIRCRAFT_CONFIG,YEAR,QUARTER,MONTH,DISTANCE_GROUP,CLASS,DATA_SOURCE,Unnamed: 50
0,0.0,1.0,21502.0,76.0,3.0,0.0,0.0,901.0,170.0,140.0,...,6,638,1,2018,1,2,2,F,DU,
1,0.0,3.0,64506.0,228.0,75.0,0.0,0.0,228.0,219.0,140.0,...,6,638,1,2018,1,2,1,F,DU,
2,0.0,1.0,21502.0,76.0,64.0,0.0,0.0,851.0,144.0,114.0,...,6,638,1,2018,1,2,2,F,DU,
3,0.0,1.0,21502.0,76.0,55.0,0.0,0.0,122.0,58.0,31.0,...,6,638,1,2018,1,2,1,F,DU,
4,0.0,1.0,12500.0,50.0,34.0,0.0,0.0,133.0,49.0,29.0,...,6,629,1,2018,1,2,1,F,DU,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475214,1166.0,583.0,1049400.0,5247.0,3646.0,0.0,0.0,91.0,27284.0,21338.0,...,1,125,1,2018,3,7,1,F,DU,
475215,1188.0,594.0,1069200.0,5346.0,3573.0,0.0,0.0,91.0,27799.0,21740.0,...,1,125,1,2018,3,7,1,F,DU,
475216,1216.0,608.0,1094400.0,5472.0,3827.0,0.0,0.0,91.0,28454.0,22253.0,...,1,125,1,2018,3,8,1,F,DU,
475217,1258.0,629.0,1132200.0,5661.0,4056.0,0.0,0.0,91.0,29437.0,23021.0,...,1,125,1,2018,3,8,1,F,DU,


In [5]:
route_list.columns

Index(['DEPARTURES_SCHEDULED', 'DEPARTURES_PERFORMED', 'PAYLOAD', 'SEATS',
       'PASSENGERS', 'FREIGHT', 'MAIL', 'DISTANCE', 'RAMP_TO_RAMP', 'AIR_TIME',
       'UNIQUE_CARRIER', 'AIRLINE_ID', 'UNIQUE_CARRIER_NAME',
       'UNIQUE_CARRIER_ENTITY', 'REGION', 'CARRIER', 'CARRIER_NAME',
       'CARRIER_GROUP', 'CARRIER_GROUP_NEW', 'ORIGIN_AIRPORT_ID',
       'ORIGIN_AIRPORT_SEQ_ID', 'ORIGIN_CITY_MARKET_ID', 'ORIGIN',
       'ORIGIN_CITY_NAME', 'ORIGIN_STATE_ABR', 'ORIGIN_STATE_FIPS',
       'ORIGIN_STATE_NM', 'ORIGIN_COUNTRY', 'ORIGIN_COUNTRY_NAME',
       'ORIGIN_WAC', 'DEST_AIRPORT_ID', 'DEST_AIRPORT_SEQ_ID',
       'DEST_CITY_MARKET_ID', 'DEST', 'DEST_CITY_NAME', 'DEST_STATE_ABR',
       'DEST_STATE_FIPS', 'DEST_STATE_NM', 'DEST_COUNTRY', 'DEST_COUNTRY_NAME',
       'DEST_WAC', 'AIRCRAFT_GROUP', 'AIRCRAFT_TYPE', 'AIRCRAFT_CONFIG',
       'YEAR', 'QUARTER', 'MONTH', 'DISTANCE_GROUP', 'CLASS', 'DATA_SOURCE',
       'Unnamed: 50'],
      dtype='object')

In [6]:
pd.unique(route_list['UNIQUE_CARRIER_NAME'])


array(['Endeavor Air Inc.', 'Ameristar Air Cargo',
       'Trans States Airlines', 'Scott Air LLC dba Island Air Express',
       'Alaska Central Express', 'Peninsula Airways Inc.',
       'Caribbean Sun Airlines, Inc. d/b/a World Atlantic Airlines',
       'Cavok Air LLC', 'Aerolitoral', 'Aeromexico',
       'Eva Airways Corporation', 'China Cargo Airline',
       'Nippon Cargo Airlines', 'Jin Air Co Ltd.',
       'Grand Canyon Airlines, Inc. d/b/a Grand Canyon Airlines d/b/a Scenic Airlines',
       'Menagerie Enterprises Inc d/b/a Monarch Air',
       'Alaska Airlines Inc.', 'Hainan Airlines Company Limited',
       'Oceanair Linhas Aereas S A', 'Cargologicair Limited',
       'Ukraine International Airlines', 'Scandinavian Airlines Sys.',
       'Brussels Airlines N.V.', 'Saudi Arabian Airlines Corp',
       'TUI Airlines Belgium N.V. d/b/a Jetairfly', 'Air Tahiti Nui',
       'Tap-Portuguese Airlines', 'Hong Kong Express Airways Limited',
       'VistaJet Limited', 'Star Marianas 

In [7]:
aircraft_type = pd.read_csv('original_data\\L_AIRCRAFT_TYPE.csv')
aircraft_type.rename(columns={'Description':'Plane_Type_Text'},inplace=True)
aircraft_group = pd.read_csv('original_data\\L_AIRCRAFT_GROUP.csv')
aircraft_group.rename(columns={'Description':'Plane_Group_Text'},inplace=True)
aircraft_config = pd.read_csv('original_data\\L_AIRCRAFT_CONFIG.csv')
aircraft_config.rename(columns={'Description':'Plane_Config_Text'},inplace=True)
aircraft_type

Unnamed: 0,Code,Plane_Type_Text
0,7,Aero Commander 200
1,8,Aero Macchi AL-60
2,9,Aeronca 7-AC
3,10,Beech Bonanza 35A/C/D/E/G/H/J/K/S/V/ 36A
4,20,Bellanca CH-300
...,...,...
423,887,B787-800 Dreamliner
424,888,Boeing 737-900ER
425,889,B787-900 Dreamliner
426,890,Antonov 225 (6 Engine)


In [8]:
df_routes_planes_airports = route_list.merge(aircraft_type, how = 'left', 
left_on = 'AIRCRAFT_TYPE', right_on = 'Code')
df_routes_planes_airports = df_routes_planes_airports.merge(aircraft_group, 
how = 'left', left_on = 'AIRCRAFT_GROUP', right_on = 'Code')
df_routes_planes_airports = df_routes_planes_airports.merge(aircraft_config, 
how = 'left', left_on = 'AIRCRAFT_CONFIG', right_on = 'Code')
df_routes_planes_airports = df_routes_planes_airports.merge(
    origin_airport_list_for_merge, how = 'left', left_on = 'ORIGIN',
    right_on = 'origin_iata_code') 
df_routes_planes_airports = df_routes_planes_airports.merge(
    destination_airport_list_for_merge, how = 'left', left_on = 'DEST',
    right_on = 'destination_iata_code') 
df_routes_planes_airports

Unnamed: 0,DEPARTURES_SCHEDULED,DEPARTURES_PERFORMED,PAYLOAD,SEATS,PASSENGERS,FREIGHT,MAIL,DISTANCE,RAMP_TO_RAMP,AIR_TIME,...,Code_y,Plane_Group_Text,Code,Plane_Config_Text,origin_iata_code,origin_lat,origin_lon,destination_iata_code,destination_lat,destination_lon
0,0.0,1.0,21502.0,76.0,3.0,0.0,0.0,901.0,170.0,140.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IAD,38.944,-77.456,FLL,26.072,-80.153
1,0.0,3.0,64506.0,228.0,75.0,0.0,0.0,228.0,219.0,140.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IAD,38.944,-77.456,JFK,40.640,-73.779
2,0.0,1.0,21502.0,76.0,64.0,0.0,0.0,851.0,144.0,114.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IAH,29.980,-95.340,SAV,32.127,-81.202
3,0.0,1.0,21502.0,76.0,55.0,0.0,0.0,122.0,58.0,31.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,ILM,34.271,-77.903,RDU,35.877,-78.787
4,0.0,1.0,12500.0,50.0,34.0,0.0,0.0,133.0,49.0,29.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IND,39.717,-86.294,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
482268,1166.0,583.0,1049400.0,5247.0,3646.0,0.0,0.0,91.0,27284.0,21338.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005
482269,1188.0,594.0,1069200.0,5346.0,3573.0,0.0,0.0,91.0,27799.0,21740.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,BOS,42.364,-71.005,ACK,41.253,-70.060
482270,1216.0,608.0,1094400.0,5472.0,3827.0,0.0,0.0,91.0,28454.0,22253.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005
482271,1258.0,629.0,1132200.0,5661.0,4056.0,0.0,0.0,91.0,29437.0,23021.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,BOS,42.364,-71.005,ACK,41.253,-70.060


In [9]:
df_routes_planes_airports.to_csv('routes_planes_coordinates.csv')

In [10]:
df_routes_planes_airports_for_mapping = df_routes_planes_airports.copy()
df_routes_planes_airports_for_mapping.dropna(axis=0,subset=['origin_iata_code',
 'destination_iata_code', 'destination_lat', 'destination_lon'],inplace=True) 
df_routes_planes_airports_for_mapping = df_routes_planes_airports_for_mapping.query(
    "origin_lat != 0 and origin_lon != 0 and destination_lat != 0 \
        and destination_lon != 0")
df_routes_planes_airports_for_mapping

Unnamed: 0,DEPARTURES_SCHEDULED,DEPARTURES_PERFORMED,PAYLOAD,SEATS,PASSENGERS,FREIGHT,MAIL,DISTANCE,RAMP_TO_RAMP,AIR_TIME,...,Code_y,Plane_Group_Text,Code,Plane_Config_Text,origin_iata_code,origin_lat,origin_lon,destination_iata_code,destination_lat,destination_lon
0,0.0,1.0,21502.0,76.0,3.0,0.0,0.0,901.0,170.0,140.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IAD,38.944,-77.456,FLL,26.072,-80.153
1,0.0,3.0,64506.0,228.0,75.0,0.0,0.0,228.0,219.0,140.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IAD,38.944,-77.456,JFK,40.640,-73.779
2,0.0,1.0,21502.0,76.0,64.0,0.0,0.0,851.0,144.0,114.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IAH,29.980,-95.340,SAV,32.127,-81.202
3,0.0,1.0,21502.0,76.0,55.0,0.0,0.0,122.0,58.0,31.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,ILM,34.271,-77.903,RDU,35.877,-78.787
5,0.0,1.0,21502.0,76.0,0.0,0.0,0.0,476.0,157.0,138.0,...,6,"Jet, 2-Engine",1,Passenger Configuration,IND,39.717,-86.294,IAD,38.944,-77.456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
482267,1146.0,573.0,1031400.0,5157.0,3693.0,0.0,0.0,91.0,26816.0,20972.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005
482268,1166.0,583.0,1049400.0,5247.0,3646.0,0.0,0.0,91.0,27284.0,21338.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005
482269,1188.0,594.0,1069200.0,5346.0,3573.0,0.0,0.0,91.0,27799.0,21740.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,BOS,42.364,-71.005,ACK,41.253,-70.060
482270,1216.0,608.0,1094400.0,5472.0,3827.0,0.0,0.0,91.0,28454.0,22253.0,...,1,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005


In [11]:
df_routes_planes_airports_for_mapping.to_csv(
    'routes_planes_coordinates_for_mapping.csv')

The following cell, applied to a copy of df_routes_planes_airports_for_mapping, creates a new column showing the origin and destination airports separated by an underscore. These airports are sorted alphabetically, which allows flights from Airport B to Airport A and from Airport A to Airport B to be consolidated together within the route mapping notebook. Using np.where() as opposed to a for loop saved a considerable amount of time.



In [12]:
df_routes_planes_airports_for_mapping_v2 = \
df_routes_planes_airports_for_mapping.copy()
df_routes_planes_airports_for_mapping_v2.reset_index(drop=True,inplace=True)


df_routes_planes_airports_for_mapping_v2['ORIGIN_DEST'] = \
np.where(df_routes_planes_airports_for_mapping_v2['ORIGIN'] < \
df_routes_planes_airports_for_mapping_v2['DEST'], 
df_routes_planes_airports_for_mapping_v2['ORIGIN'] + '_' + \
df_routes_planes_airports_for_mapping_v2['DEST'], 
df_routes_planes_airports_for_mapping_v2['DEST'] + \
'_' + df_routes_planes_airports_for_mapping_v2['ORIGIN'])

# The following for loop took over 23 minutes to run, 
# whereas the np.where() approach took 0.2 seconds!
# df_routes_planes_airports_for_mapping_v2.reset_index(drop=True,inplace=True)
# df_routes_planes_airports_for_mapping_v2['ORIGIN_DEST'] = ''
# loop_start_time = time.time()
# for i in range(len(df_routes_planes_airports_for_mapping_v2)):
#     if i % 1000 == 0:
#         print("Now on row",i)
#         print("elapsed time (in seconds):", time.time() - loop_start_time)
#     origin_airport = \
#     df_routes_planes_airports_for_mapping_v2.iloc[i, 
#     df_routes_planes_airports_for_mapping_v2.columns.get_loc('ORIGIN')]
#     destination_airport = \
#     df_routes_planes_airports_for_mapping_v2.iloc[i, 
#     df_routes_planes_airports_for_mapping_v2.columns.get_loc('DEST')]
#     if origin_airport < destination_airport:
#         origin_dest = origin_airport+'_'+destination_airport
#     else:
#         origin_dest = destination_airport+'_'+origin_airport
#     df_routes_planes_airports_for_mapping_v2.iloc[i, 
#     df_routes_planes_airports_for_mapping_v2.columns.get_loc('ORIGIN_DEST')] = \
#             origin_dest


In [13]:
df_routes_planes_airports_for_mapping_v2

Unnamed: 0,DEPARTURES_SCHEDULED,DEPARTURES_PERFORMED,PAYLOAD,SEATS,PASSENGERS,FREIGHT,MAIL,DISTANCE,RAMP_TO_RAMP,AIR_TIME,...,Plane_Group_Text,Code,Plane_Config_Text,origin_iata_code,origin_lat,origin_lon,destination_iata_code,destination_lat,destination_lon,ORIGIN_DEST
0,0.0,1.0,21502.0,76.0,3.0,0.0,0.0,901.0,170.0,140.0,...,"Jet, 2-Engine",1,Passenger Configuration,IAD,38.944,-77.456,FLL,26.072,-80.153,FLL_IAD
1,0.0,3.0,64506.0,228.0,75.0,0.0,0.0,228.0,219.0,140.0,...,"Jet, 2-Engine",1,Passenger Configuration,IAD,38.944,-77.456,JFK,40.640,-73.779,IAD_JFK
2,0.0,1.0,21502.0,76.0,64.0,0.0,0.0,851.0,144.0,114.0,...,"Jet, 2-Engine",1,Passenger Configuration,IAH,29.980,-95.340,SAV,32.127,-81.202,IAH_SAV
3,0.0,1.0,21502.0,76.0,55.0,0.0,0.0,122.0,58.0,31.0,...,"Jet, 2-Engine",1,Passenger Configuration,ILM,34.271,-77.903,RDU,35.877,-78.787,ILM_RDU
4,0.0,1.0,21502.0,76.0,0.0,0.0,0.0,476.0,157.0,138.0,...,"Jet, 2-Engine",1,Passenger Configuration,IND,39.717,-86.294,IAD,38.944,-77.456,IAD_IND
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349168,1146.0,573.0,1031400.0,5157.0,3693.0,0.0,0.0,91.0,26816.0,20972.0,...,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005,ACK_BOS
349169,1166.0,583.0,1049400.0,5247.0,3646.0,0.0,0.0,91.0,27284.0,21338.0,...,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005,ACK_BOS
349170,1188.0,594.0,1069200.0,5346.0,3573.0,0.0,0.0,91.0,27799.0,21740.0,...,"Piston, 2-Engine",1,Passenger Configuration,BOS,42.364,-71.005,ACK,41.253,-70.060,ACK_BOS
349171,1216.0,608.0,1094400.0,5472.0,3827.0,0.0,0.0,91.0,28454.0,22253.0,...,"Piston, 2-Engine",1,Passenger Configuration,ACK,41.253,-70.060,BOS,42.364,-71.005,ACK_BOS


In [14]:
df_routes_planes_airports_for_mapping_v2.to_csv(
    'routes_planes_coordinates_for_mapping_v2.csv')