In [77]:
#Dependencies:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
from scipy.stats import linregress
import time
import json
import os
from pathlib import Path
import hvplot.pandas

#Files to load
airline_data_to_load = Path("Resources/airlines.csv")
airport_data_to_load = Path("Resources/airports.csv")
flights_data_to_load = Path("Resources/flights200501.csv")

#Read the data and store it in a Pandas dataframe
airline_data = pd.read_csv(airline_data_to_load)
airport_data = pd.read_csv(airport_data_to_load)
flights_data = pd.read_csv(flights_data_to_load)

#Clean the data to have same column names
airline_data.rename(columns={"AIRLINE": "AIRLINE_NAME"}, inplace=True)
airline_data.rename(columns={"IATA_CODE": "AIRLINE"}, inplace=True)
airport_data.rename(columns={"IATA_CODE": "ORIGIN_AIRPORT"}, inplace=True)

#Combine the data into a single dataframe
flight_data_complete = pd.merge(flights_data, airline_data, on="AIRLINE", how="left")
flight_data_complete = pd.merge(flight_data_complete, airport_data, on="ORIGIN_AIRPORT", how="left")
flight_data_complete

Unnamed: 0,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,...,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,AIRLINE_NAME,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
0,2015,1,1,4,AS,98,N407AS,ANC,SEA,5,...,,,,Alaska Airlines Inc.,Ted Stevens Anchorage International Airport,Anchorage,AK,USA,61.17432,-149.99619
1,2015,1,1,4,AA,2336,N3KUAA,LAX,PBI,10,...,,,,American Airlines Inc.,Los Angeles International Airport,Los Angeles,CA,USA,33.94254,-118.40807
2,2015,1,1,4,US,840,N171US,SFO,CLT,20,...,,,,US Airways Inc.,San Francisco International Airport,San Francisco,CA,USA,37.61900,-122.37484
3,2015,1,1,4,AA,258,N3HYAA,LAX,MIA,20,...,,,,American Airlines Inc.,Los Angeles International Airport,Los Angeles,CA,USA,33.94254,-118.40807
4,2015,1,1,4,AS,135,N527AS,SEA,ANC,25,...,,,,Alaska Airlines Inc.,Seattle-Tacoma International Airport,Seattle,WA,USA,47.44898,-122.30931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469963,2015,1,31,6,B6,839,N658JB,JFK,BQN,2359,...,,,,JetBlue Airways,John F. Kennedy International Airport (New Yor...,New York,NY,USA,40.63975,-73.77893
469964,2015,1,31,6,DL,1887,N855NW,SEA,DTW,2359,...,,,,Delta Air Lines Inc.,Seattle-Tacoma International Airport,Seattle,WA,USA,47.44898,-122.30931
469965,2015,1,31,6,F9,300,N218FR,DEN,TPA,2359,...,2.0,0.0,0.0,Frontier Airlines Inc.,Denver International Airport,Denver,CO,USA,39.85841,-104.66700
469966,2015,1,31,6,F9,422,N954FR,DEN,ATL,2359,...,0.0,0.0,0.0,Frontier Airlines Inc.,Denver International Airport,Denver,CO,USA,39.85841,-104.66700


In [78]:
#Clean the data and fill the missing values with 0
flight_data_cleaned=flight_data_complete.fillna("0")
flight_data_cleaned

Unnamed: 0,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,...,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,AIRLINE_NAME,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
0,2015,1,1,4,AS,98,N407AS,ANC,SEA,5,...,0,0,0,Alaska Airlines Inc.,Ted Stevens Anchorage International Airport,Anchorage,AK,USA,61.17432,-149.99619
1,2015,1,1,4,AA,2336,N3KUAA,LAX,PBI,10,...,0,0,0,American Airlines Inc.,Los Angeles International Airport,Los Angeles,CA,USA,33.94254,-118.40807
2,2015,1,1,4,US,840,N171US,SFO,CLT,20,...,0,0,0,US Airways Inc.,San Francisco International Airport,San Francisco,CA,USA,37.619,-122.37484
3,2015,1,1,4,AA,258,N3HYAA,LAX,MIA,20,...,0,0,0,American Airlines Inc.,Los Angeles International Airport,Los Angeles,CA,USA,33.94254,-118.40807
4,2015,1,1,4,AS,135,N527AS,SEA,ANC,25,...,0,0,0,Alaska Airlines Inc.,Seattle-Tacoma International Airport,Seattle,WA,USA,47.44898,-122.30931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469963,2015,1,31,6,B6,839,N658JB,JFK,BQN,2359,...,0,0,0,JetBlue Airways,John F. Kennedy International Airport (New Yor...,New York,NY,USA,40.63975,-73.77893
469964,2015,1,31,6,DL,1887,N855NW,SEA,DTW,2359,...,0,0,0,Delta Air Lines Inc.,Seattle-Tacoma International Airport,Seattle,WA,USA,47.44898,-122.30931
469965,2015,1,31,6,F9,300,N218FR,DEN,TPA,2359,...,2.0,0.0,0.0,Frontier Airlines Inc.,Denver International Airport,Denver,CO,USA,39.85841,-104.667
469966,2015,1,31,6,F9,422,N954FR,DEN,ATL,2359,...,0.0,0.0,0.0,Frontier Airlines Inc.,Denver International Airport,Denver,CO,USA,39.85841,-104.667


In [79]:
#Check to insure there are no null values
null_count= flight_data_cleaned.isnull().sum()
null_count

YEAR                   0
MONTH                  0
DAY                    0
DAY_OF_WEEK            0
AIRLINE                0
FLIGHT_NUMBER          0
TAIL_NUMBER            0
ORIGIN_AIRPORT         0
DESTINATION_AIRPORT    0
SCHEDULED_DEPARTURE    0
DEPARTURE_TIME         0
DEPARTURE_DELAY        0
TAXI_OUT               0
WHEELS_OFF             0
SCHEDULED_TIME         0
ELAPSED_TIME           0
AIR_TIME               0
DISTANCE               0
WHEELS_ON              0
TAXI_IN                0
SCHEDULED_ARRIVAL      0
ARRIVAL_TIME           0
ARRIVAL_DELAY          0
DIVERTED               0
CANCELLED              0
CANCELLATION_REASON    0
AIR_SYSTEM_DELAY       0
SECURITY_DELAY         0
AIRLINE_DELAY          0
LATE_AIRCRAFT_DELAY    0
WEATHER_DELAY          0
AIRLINE_NAME           0
AIRPORT                0
CITY                   0
STATE                  0
COUNTRY                0
LATITUDE               0
LONGITUDE              0
dtype: int64

In [80]:
#What is the average delay time for each airline

#Create a new dataframe that contains the airline delays that are greater than zero
flight_data_cleaned['AIRLINE_DELAY'] = flight_data_cleaned['AIRLINE_DELAY'].astype(float)
flights_delayed_df = flight_data_cleaned.loc[flight_data_cleaned['AIRLINE_DELAY'] > 0]
flights_delayed_df

Unnamed: 0,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,...,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,AIRLINE_NAME,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
35,2015,1,1,4,HA,17,N389HA,LAS,HNL,145,...,15.0,0.0,0.0,Hawaiian Airlines Inc.,McCarran International Airport,Las Vegas,NV,USA,36.08036,-115.15233
52,2015,1,1,4,B6,2134,N307JB,SJU,MCO,400,...,85.0,0.0,0.0,JetBlue Airways,Luis Muñoz Marín International Airport,San Juan,PR,USA,18.43942,-66.00183
55,2015,1,1,4,B6,2276,N646JB,SJU,BDL,438,...,72.0,0.0,0.0,JetBlue Airways,Luis Muñoz Marín International Airport,San Juan,PR,USA,18.43942,-66.00183
73,2015,1,1,4,US,425,N174US,PDX,PHX,520,...,60.0,0.0,0.0,US Airways Inc.,Portland International Airport,Portland,OR,USA,45.58872,-122.5975
74,2015,1,1,4,AA,89,N3KVAA,IAH,MIA,520,...,54.0,0.0,0.0,American Airlines Inc.,George Bush Intercontinental Airport,Houston,TX,USA,29.98047,-95.33972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469912,2015,1,31,6,B6,2002,N317JB,JFK,BUF,2310,...,8.0,8.0,0.0,JetBlue Airways,John F. Kennedy International Airport (New Yor...,New York,NY,USA,40.63975,-73.77893
469920,2015,1,31,6,B6,912,N516JB,LAS,JFK,2318,...,18.0,0.0,0.0,JetBlue Airways,McCarran International Airport,Las Vegas,NV,USA,36.08036,-115.15233
469924,2015,1,31,6,B6,729,N510JB,MCO,BQN,2325,...,15.0,25.0,0.0,JetBlue Airways,Orlando International Airport,Orlando,FL,USA,28.42889,-81.31603
469940,2015,1,31,6,B6,688,N643JB,LAX,BOS,2349,...,18.0,11.0,0.0,JetBlue Airways,Los Angeles International Airport,Los Angeles,CA,USA,33.94254,-118.40807


In [81]:
flight_data_cleaned['AIRLINE_DELAY'] = pd.to_numeric(flight_data_cleaned['AIRLINE_DELAY'], errors='coerce')
flight_data_cleaned['AIR_SYSTEM_DELAY'] = pd.to_numeric(flight_data_cleaned['AIR_SYSTEM_DELAY'], errors='coerce')
flight_data_cleaned['SECURITY_DELAY'] = pd.to_numeric(flight_data_cleaned['SECURITY_DELAY'], errors='coerce')
flight_data_cleaned['LATE_AIRCRAFT_DELAY'] = pd.to_numeric(flight_data_cleaned['LATE_AIRCRAFT_DELAY'], errors='coerce')
flight_data_cleaned['WEATHER_DELAY'] = pd.to_numeric(flight_data_cleaned['WEATHER_DELAY'], errors='coerce')

In [82]:
flights_all_delayed_df= flight_data_cleaned[(flight_data_cleaned['AIRLINE_DELAY'] > 1) |
                  (flight_data_cleaned['AIR_SYSTEM_DELAY'] > 1) |
                  (flight_data_cleaned['SECURITY_DELAY'] > 1) |
                  (flight_data_cleaned['LATE_AIRCRAFT_DELAY'] > 1) |
                  (flight_data_cleaned['WEATHER_DELAY'] > 1)]
flights_all_delayed_df

Unnamed: 0,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,...,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY,AIRLINE_NAME,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
27,2015,1,1,4,NK,597,N528NK,MSP,FLL,115,...,0.0,0.0,0.0,Spirit Air Lines,Minneapolis-Saint Paul International Airport,Minneapolis,MN,USA,44.88055,-93.21692
30,2015,1,1,4,NK,168,N629NK,PHX,ORD,125,...,0.0,0.0,0.0,Spirit Air Lines,Phoenix Sky Harbor International Airport,Phoenix,AZ,USA,33.43417,-112.00806
35,2015,1,1,4,HA,17,N389HA,LAS,HNL,145,...,15.0,0.0,0.0,Hawaiian Airlines Inc.,McCarran International Airport,Las Vegas,NV,USA,36.08036,-115.15233
50,2015,1,1,4,B6,1030,N239JB,BQN,MCO,307,...,0.0,0.0,0.0,JetBlue Airways,Rafael Hernández Airport,Aguadilla,PR,USA,18.49486,-67.12944
52,2015,1,1,4,B6,2134,N307JB,SJU,MCO,400,...,85.0,0.0,0.0,JetBlue Airways,Luis Muñoz Marín International Airport,San Juan,PR,USA,18.43942,-66.00183
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469945,2015,1,31,6,AS,121,N763AS,SEA,ANC,2350,...,0.0,78.0,0.0,Alaska Airlines Inc.,Seattle-Tacoma International Airport,Seattle,WA,USA,47.44898,-122.30931
469953,2015,1,31,6,B6,778,N594JB,LAS,BOS,2355,...,0.0,28.0,0.0,JetBlue Airways,McCarran International Airport,Las Vegas,NV,USA,36.08036,-115.15233
469959,2015,1,31,6,B6,98,N535JB,DEN,JFK,2357,...,0.0,11.0,13.0,JetBlue Airways,Denver International Airport,Denver,CO,USA,39.85841,-104.667
469965,2015,1,31,6,F9,300,N218FR,DEN,TPA,2359,...,2.0,0.0,0.0,Frontier Airlines Inc.,Denver International Airport,Denver,CO,USA,39.85841,-104.667


In [83]:
#What is the average delay time for each airport for airline delay for airline

#Find the average delay time for each airport delay using groupby 
average_airline_delay= flights_all_delayed_df.groupby(["AIRPORT"])["AIRLINE_DELAY"].mean()
average_airline_delay

AIRPORT
Aberdeen Regional Airport                       140.666667
Abilene Regional Airport                         24.333333
Abraham Lincoln Capital Airport                  14.900000
Adak Airport                                      0.000000
Akron-Canton Regional Airport                    13.175258
                                                   ...    
Yakutat Airport                                   0.000000
Yampa Valley Airport (Yampa Valley Regional)     11.350000
Yeager Airport                                   12.272727
Yellowstone Regional Airport                     28.636364
Yuma International Airport                       18.250000
Name: AIRLINE_DELAY, Length: 310, dtype: float64

In [84]:
#What is the average delay time for each airport for airline delay for air system

#Find the average delay time for each airport delay using groupby 
average_air_system_delay= flights_all_delayed_df.groupby(["AIRPORT"])['AIR_SYSTEM_DELAY'].mean()
average_air_system_delay

AIRPORT
Aberdeen Regional Airport                        6.555556
Abilene Regional Airport                        12.354167
Abraham Lincoln Capital Airport                 13.166667
Adak Airport                                    11.500000
Akron-Canton Regional Airport                   15.309278
                                                  ...    
Yakutat Airport                                  3.285714
Yampa Valley Airport (Yampa Valley Regional)    13.850000
Yeager Airport                                  23.363636
Yellowstone Regional Airport                    26.090909
Yuma International Airport                      19.500000
Name: AIR_SYSTEM_DELAY, Length: 310, dtype: float64

In [85]:
#What is the average delay time for each airline for security

#Find the average delay time for each airline using groupby 
average_security_delay= flights_all_delayed_df.groupby(["AIRPORT"])["SECURITY_DELAY"].mean()
average_security_delay

AIRPORT
Aberdeen Regional Airport                        0.000000
Abilene Regional Airport                         0.250000
Abraham Lincoln Capital Airport                  1.333333
Adak Airport                                    15.500000
Akron-Canton Regional Airport                    0.000000
                                                  ...    
Yakutat Airport                                  0.000000
Yampa Valley Airport (Yampa Valley Regional)     0.000000
Yeager Airport                                   0.000000
Yellowstone Regional Airport                     0.000000
Yuma International Airport                       0.000000
Name: SECURITY_DELAY, Length: 310, dtype: float64

In [86]:
#What is the average delay time for each airline for late

#Find the average delay time for each airline using groupby 
average_late_airline_delay= flights_all_delayed_df.groupby(["AIRPORT"])["LATE_AIRCRAFT_DELAY"].mean()
average_late_airline_delay

AIRPORT
Aberdeen Regional Airport                       39.666667
Abilene Regional Airport                        20.520833
Abraham Lincoln Capital Airport                 27.133333
Adak Airport                                     0.000000
Akron-Canton Regional Airport                   18.618557
                                                  ...    
Yakutat Airport                                 43.428571
Yampa Valley Airport (Yampa Valley Regional)    34.100000
Yeager Airport                                  36.704545
Yellowstone Regional Airport                     3.454545
Yuma International Airport                      27.035714
Name: LATE_AIRCRAFT_DELAY, Length: 310, dtype: float64

In [87]:
#What is the average delay time for each airline for weather

#Find the average delay time for each airline using groupby 
average_weather_delay= flights_all_delayed_df.groupby(["AIRPORT"])["WEATHER_DELAY"].mean()
average_weather_delay

AIRPORT
Aberdeen Regional Airport                       0.000000
Abilene Regional Airport                        0.604167
Abraham Lincoln Capital Airport                 0.200000
Adak Airport                                    0.000000
Akron-Canton Regional Airport                   4.711340
                                                  ...   
Yakutat Airport                                 4.714286
Yampa Valley Airport (Yampa Valley Regional)    7.425000
Yeager Airport                                  1.386364
Yellowstone Regional Airport                    0.000000
Yuma International Airport                      2.785714
Name: WEATHER_DELAY, Length: 310, dtype: float64

In [88]:
flights_delay_averages = pd.DataFrame({
                                    "Airline Delay": average_airline_delay,
                                    "Air System Delay": average_air_system_delay,
                                    "Security Delay": average_security_delay,
                                    "Late Aircraft Delay": average_late_airline_delay,
                                    "Weather Delay": average_weather_delay 
})
flights_delay_averages

Unnamed: 0_level_0,Airline Delay,Air System Delay,Security Delay,Late Aircraft Delay,Weather Delay
AIRPORT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aberdeen Regional Airport,140.666667,6.555556,0.000000,39.666667,0.000000
Abilene Regional Airport,24.333333,12.354167,0.250000,20.520833,0.604167
Abraham Lincoln Capital Airport,14.900000,13.166667,1.333333,27.133333,0.200000
Adak Airport,0.000000,11.500000,15.500000,0.000000,0.000000
Akron-Canton Regional Airport,13.175258,15.309278,0.000000,18.618557,4.711340
...,...,...,...,...,...
Yakutat Airport,0.000000,3.285714,0.000000,43.428571,4.714286
Yampa Valley Airport (Yampa Valley Regional),11.350000,13.850000,0.000000,34.100000,7.425000
Yeager Airport,12.272727,23.363636,0.000000,36.704545,1.386364
Yellowstone Regional Airport,28.636364,26.090909,0.000000,3.454545,0.000000


In [93]:
delay_sums = flights_delay_averages.groupby('AIRPORT')[['Airline Delay', 'Air System Delay', 'Security Delay', 'Late Aircraft Delay', 'Weather Delay']].sum()
delay_sums['TOTAL DELAYS'] = delay_sums[['Airline Delay', 'Air System Delay', 'Security Delay', 'Late Aircraft Delay', 'Weather Delay']].sum(axis=1)
delay_sums_df = pd.DataFrame(delay_sums ['TOTAL DELAYS'])
delay_sums_df

Unnamed: 0_level_0,TOTAL DELAYS
AIRPORT,Unnamed: 1_level_1
Aberdeen Regional Airport,186.888889
Abilene Regional Airport,58.062500
Abraham Lincoln Capital Airport,56.733333
Adak Airport,27.000000
Akron-Canton Regional Airport,51.814433
...,...
Yakutat Airport,51.428571
Yampa Valley Airport (Yampa Valley Regional),66.725000
Yeager Airport,73.727273
Yellowstone Regional Airport,58.181818


In [95]:
#Find the average delay of each origin airport

origin_delay= flights_all_delayed_df.groupby(["ORIGIN_AIRPORT"])[['AIRLINE_DELAY', 'AIR_SYSTEM_DELAY', 'SECURITY_DELAY', 'LATE_AIRCRAFT_DELAY', 'WEATHER_DELAY']].mean()
origin_delay

Unnamed: 0_level_0,AIRLINE_DELAY,AIR_SYSTEM_DELAY,SECURITY_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
ORIGIN_AIRPORT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABE,12.818182,17.484848,0.000000,23.060606,1.303030
ABI,24.333333,12.354167,0.250000,20.520833,0.604167
ABQ,20.037152,11.083591,0.040248,23.399381,1.291022
ABR,140.666667,6.555556,0.000000,39.666667,0.000000
ABY,31.153846,8.615385,0.000000,24.000000,0.000000
...,...,...,...,...,...
VPS,17.171875,11.187500,0.000000,34.765625,0.843750
WRG,0.000000,6.428571,0.000000,52.785714,0.000000
XNA,21.693122,18.640212,0.000000,24.507937,1.830688
YAK,0.000000,3.285714,0.000000,43.428571,4.714286


In [99]:
origin_described = origin_delay.describe()
origin_described

Unnamed: 0,AIRLINE_DELAY,AIR_SYSTEM_DELAY,SECURITY_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
count,310.0,310.0,310.0,310.0,310.0
mean,18.051264,14.454805,0.113582,29.718987,2.778966
std,16.436211,7.907288,0.912628,13.70197,6.57892
min,0.0,0.0,0.0,0.0,0.0
25%,10.174852,9.565934,0.0,22.258564,0.0
50%,15.806983,13.411496,0.0,28.473993,0.888477
75%,21.68832,18.409754,0.0,34.632616,2.669625
max,140.666667,64.333333,15.5,106.2,67.333333


In [102]:
#What origin airport has the highest amount of delayed flights

max_origin_delay = delay_sums.groupby("AIRPORT")["TOTAL DELAYS"].max()
max_delay = max_origin_delay.max()
origin_airport_with_max_delay = max_origin_delay.idxmax()
print(f"The origin airport with the highest delay is {origin_airport_with_max_delay} with a delay of {max_delay} minutes")

The origin airport with the highest delay is Pocatello Regional Airport with a delay of 213.55555555555554 minutes


In [74]:
#Find the average delay of each destination airport

destination_delay= flights_delayed_df.groupby(["DESTINATION_AIRPORT"])["AIRLINE_DELAY"].mean()
destination_delay

DESTINATION_AIRPORT
ABE    37.785714
ABI    28.744186
ABQ    23.734300
ABR    41.000000
ABY    65.400000
         ...    
VPS    39.533333
WRG    16.000000
XNA    37.973913
YAK     6.500000
YUM    50.818182
Name: AIRLINE_DELAY, Length: 310, dtype: float64

In [75]:
#What destination airport has the highest amount of delayed flights

max_destination_delay = flights_delayed_df.groupby("DESTINATION_AIRPORT")["AIRLINE_DELAY"].max()
max_delay = max_origin_delay.max()
destination_airport_with_max_delay = max_destination_delay.idxmax()
print(f"The destination airport with the maximum delay is {destination_airport_with_max_delay} with a delay of {max_delay}")

The destination airport with the maximum delay is DFW with a delay of 1971.0


In [89]:
#What origin airport has the lowest amount of delayed flights

min_origin_delay = flights_delayed_df.groupby("ORIGIN_AIRPORT")["AIRLINE_DELAY"].min()
min_delay = max_origin_delay.min()
origin_airport_with_min_delay = min_origin_delay.idxmin()
print(f"The origin airport with the lowest delay is {origin_airport_with_min_delay} with a delay of {min_delay}")

The origin airport with the lowest delay is ABE with a delay of 1.0


In [None]:
#Find the average delay of each origin airport

destination_delay= flights_delayed_df.groupby(["ORIGIN_AIRPORT"])["AIRLINE_DELAY"].mean()
destination_delay

In [105]:
unique = flight_data_cleaned["DESTINATION_AIRPORT"].unique()
unique

array(['SEA', 'PBI', 'CLT', 'MIA', 'ANC', 'MSP', 'DFW', 'ATL', 'IAH',
       'PDX', 'MCI', 'FLL', 'ORD', 'HNL', 'PHX', 'EWR', 'JFK', 'MCO',
       'BOS', 'BDL', 'ITO', 'SFO', 'KOA', 'OGG', 'MYR', 'DTW', 'LIH',
       'DEN', 'SJU', 'LAX', 'BWI', 'IAD', 'BQN', 'BUF', 'LGA', 'HOU',
       'SLC', 'PHL', 'SJC', 'OAK', 'LGB', 'TPA', 'DCA', 'TTN', 'BTR',
       'LAS', 'RSW', 'BRD', 'STL', 'RKS', 'MBS', 'SNA', 'MEI', 'MDW',
       'SAN', 'RIC', 'AUS', 'OTZ', 'PIT', 'JAX', 'MSY', 'ONT', 'PSP',
       'BUR', 'DAL', 'CVG', 'SMF', 'RDU', 'JMS', 'BNA', 'DSM', 'MAF',
       'BOI', 'ELP', 'TUS', 'SCC', 'HPN', 'STT', 'MDT', 'RHI', 'SBP',
       'MKE', 'JNU', 'CMH', 'CLD', 'KTN', 'CAK', 'CRP', 'CLE', 'GPT',
       'SHV', 'TYS', 'IND', 'LIT', 'SAT', 'SRQ', 'TUL', 'GRK', 'PNS',
       'BTV', 'CHS', 'DAY', 'OKC', 'SAV', 'XNA', 'COS', 'GJT', 'BZN',
       'PUB', 'HRL', 'HDN', 'MEM', 'GEG', 'ORH', 'SYR', 'GSO', 'VPS',
       'LAW', 'ACY', 'LBB', 'JAC', 'BIL', 'EUG', 'ASE', 'TVC', 'MTJ',
       'CAE', 'PVD',