In [0]:
import pandas as pd
import numpy as np
import plotly.express as px
import datetime as dt
import plotly.graph_objects as go
from plotly.subplots import make_subplots



In [3]:
airline_data = "https://raw.githubusercontent.com/karinafrank/analyzing-airline-delays-by-weekday/master/Airline%20Delay%20Raw%20Data.csv"
df_airline = pd.read_csv(airline_data)
df_airline.head()

Unnamed: 0,YEAR,DAY_OF_WEEK,MKT_UNIQUE_CARRIER,ORIGIN_AIRPORT_ID,DEST_AIRPORT_ID,DEP_DELAY,ARR_DELAY
0,2019,6,DL,11193,13487,-4.0,-27.0
1,2019,1,DL,11433,14730,-5.0,-38.0
2,2019,2,DL,11433,14730,-5.0,-34.0
3,2019,3,DL,11433,14730,-10.0,-32.0
4,2019,4,DL,11433,14730,-9.0,-23.0


In [4]:
df_airline.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 638649 entries, 0 to 638648
Data columns (total 7 columns):
YEAR                  638649 non-null int64
DAY_OF_WEEK           638649 non-null int64
MKT_UNIQUE_CARRIER    638649 non-null object
ORIGIN_AIRPORT_ID     638649 non-null int64
DEST_AIRPORT_ID       638649 non-null int64
DEP_DELAY             619547 non-null float64
ARR_DELAY             617649 non-null float64
dtypes: float64(2), int64(4), object(1)
memory usage: 34.1+ MB


In [5]:
df_airline["DAY_OF_WEEK"] = pd.to_numeric(df_airline["DAY_OF_WEEK"])
df_airline.head()

Unnamed: 0,YEAR,DAY_OF_WEEK,MKT_UNIQUE_CARRIER,ORIGIN_AIRPORT_ID,DEST_AIRPORT_ID,DEP_DELAY,ARR_DELAY
0,2019,6,DL,11193,13487,-4.0,-27.0
1,2019,1,DL,11433,14730,-5.0,-38.0
2,2019,2,DL,11433,14730,-5.0,-34.0
3,2019,3,DL,11433,14730,-10.0,-32.0
4,2019,4,DL,11433,14730,-9.0,-23.0


In [6]:
df_airline_dep_delays = df_airline.groupby(["DAY_OF_WEEK", "MKT_UNIQUE_CARRIER"])["DEP_DELAY"].agg(["mean"]).reset_index()
df_airline_dep_delays.head()




Unnamed: 0,DAY_OF_WEEK,MKT_UNIQUE_CARRIER,mean
0,1,AA,8.290999
1,1,AS,4.95809
2,1,B6,28.623003
3,1,DL,11.884272
4,1,F9,16.246468


In [7]:
df_airline_arr_delays = df_airline.groupby(["DAY_OF_WEEK", "MKT_UNIQUE_CARRIER"])["ARR_DELAY"].agg(["mean"]).reset_index()
df_airline_arr_delays.head()

Unnamed: 0,DAY_OF_WEEK,MKT_UNIQUE_CARRIER,mean
0,1,AA,4.433053
1,1,AS,3.599739
2,1,B6,22.079692
3,1,DL,3.790106
4,1,F9,10.589159


In [8]:
df_airline_dep_delays['ARR_DELAYS'] = df_airline_arr_delays['mean']

df_airline_dep_delays.head()

Unnamed: 0,DAY_OF_WEEK,MKT_UNIQUE_CARRIER,mean,ARR_DELAYS
0,1,AA,8.290999,4.433053
1,1,AS,4.95809,3.599739
2,1,B6,28.623003,22.079692
3,1,DL,11.884272,3.790106
4,1,F9,16.246468,10.589159


In [19]:
line_graph = px.line(df_airline_dep_delays, x="DAY_OF_WEEK", y=df_airline_dep_delays["mean"], color='MKT_UNIQUE_CARRIER',hover_name='MKT_UNIQUE_CARRIER')
line_graph.show()




In [0]:
df_melt = df_airline_dep_delays.melt(id_vars=['DAY_OF_WEEK','MKT_UNIQUE_CARRIER'],value_vars=['mean','ARR_DELAYS'])

In [25]:
df_melt

Unnamed: 0,DAY_OF_WEEK,MKT_UNIQUE_CARRIER,variable,value
0,1,AA,mean,8.290999
1,1,AS,mean,4.958090
2,1,B6,mean,28.623003
3,1,DL,mean,11.884272
4,1,F9,mean,16.246468
...,...,...,...,...
135,7,G4,ARR_DELAYS,15.189916
136,7,HA,ARR_DELAYS,-1.750507
137,7,NK,ARR_DELAYS,7.069906
138,7,UA,ARR_DELAYS,8.736107


In [32]:
line_graph = px.line(df_melt, 
                     x="DAY_OF_WEEK", 
                     y="value", 
                     color='MKT_UNIQUE_CARRIER',
                     hover_name='variable', 
                     title = "Difference Between Departure Delay and Arrival Delay by Airline", 
                    labels = {"DAY_OF_WEEK": "Week Day", "MKT_UNIQUE_CARRIER": "Airline","variable":"Departure or Arrival Delay","value":"Delay (Minutes)"},)
line_graph.show()