# Data to Dashboard: US Domestic Airline Flights Analytics (2005-2020)

## Import Libraries

In [1]:
import pandas as pd
import numpy as np

import plotly.express as px

from dash import Dash, html, dcc, Input, Output, callback

## Prepare our data (2005-2020 only)

In [2]:
url = 'https://raw.githubusercontent.com/marvin-rubia/US-Airlines-Analytics-Dashboard/main/airline_data.csv' 

df = pd.read_csv(url)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27000 entries, 0 to 26999
Columns: 110 entries, Unnamed: 0 to Div5TailNum
dtypes: float64(74), int64(19), object(17)
memory usage: 22.7+ MB


In [3]:
# Get only 2005-2020 data, as per course instruction

condition = (df['Year'] >= 2005) & (df['Year'] <= 2020)

airline_data = df[condition]

airline_data.head()

Unnamed: 0.1,Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,IATA_CODE_Reporting_Airline,...,Div4WheelsOff,Div4TailNum,Div5Airport,Div5AirportID,Div5AirportSeqID,Div5WheelsOn,Div5TotalGTime,Div5LongestGTime,Div5WheelsOff,Div5TailNum
1,1125375,2013,2,5,13,1,2013-05-13,EV,20366,EV,...,,,,,,,,,,
4,1888125,2017,3,8,17,4,2017-08-17,UA,19977,UA,...,,,,,,,,,,
5,1133769,2014,2,4,2,3,2014-04-02,AA,19805,AA,...,,,,,,,,,,
6,1465620,2011,3,8,15,1,2011-08-15,WN,19393,WN,...,,,,,,,,,,
7,172607,2019,1,1,6,7,2019-01-06,EV,20366,EV,...,,,,,,,,,,


In [4]:
# Check if we have the right data based on years

airline_data['Year'].value_counts()

Year
2007    1062
2019    1026
2006    1013
2005    1006
2008     988
2018     961
2009     954
2010     950
2011     919
2013     869
2014     853
2015     794
2012     791
2017     761
2016     758
2020     232
Name: count, dtype: int64

### Description of each column: [DAX Dataset Glossary](https://dax-cdn.cdn.appdomain.cloud/dax-airline/1.0.1/data-preview/index.html)

## Make a function to get data for performance report (Type 1)

In [6]:
def compute_data_1(airline_data):
    ## Get different sets of data
    
    # For plot1
    tree_data = airline_data.groupby(['DestState', 'Reporting_Airline'])['Flights'].sum().reset_index()

    # For plot2
    condition = airline_data['DivAirportLandings'] != 0.0
    div_data = airline_data[condition]
  
    # For plot3
    map_data = airline_data.groupby('OriginState')['Flights'].sum().reset_index()

    # For plot4 
    bar_data = airline_data.groupby(['Month','CancellationCode'])['Flights'].sum().reset_index()
    
    # For plot5
    line_data = airline_data.groupby(['Month','Reporting_Airline'])['AirTime'].mean().reset_index()
    
    return tree_data, div_data, map_data, bar_data, line_data

## Make a function to get data for delay report (Type 2)

In [7]:
def compute_data_2(airline_data):
    ## Compute delay averages
    
    # For plot1
    avg_car = airline_data.groupby(['Month','Reporting_Airline'])['CarrierDelay'].mean().reset_index()

    # For plot2
    avg_weather = airline_data.groupby(['Month','Reporting_Airline'])['WeatherDelay'].mean().reset_index()
    
    # For plot3
    avg_NAS = airline_data.groupby(['Month','Reporting_Airline'])['NASDelay'].mean().reset_index()
    
    # For plot4
    avg_sec = airline_data.groupby(['Month','Reporting_Airline'])['SecurityDelay'].mean().reset_index()
    
    # For plot5
    avg_late = airline_data.groupby(['Month','Reporting_Airline'])['LateAircraftDelay'].mean().reset_index()
    
    return avg_car, avg_weather, avg_NAS, avg_sec, avg_late

## Coding the dashboard 

In [11]:
# Create the Dash app
app = Dash(__name__)

# Create the app layout
app.layout = html.Div(children=[
                html.H1('US Domestic Airline Flights Performance [2005-2020]',
                       style={'textAlign': 'center', 'color': '#00008B', 
                             'font-size': 30}),
                
                # Dropdown creation for report_type      
                html.Div(style={'display': 'flex', 'justify-content': 'center'},
                         children=[
                            html.H2('Choose Type of Report:', style={'margin-right': '2em', 'white-space':'nowrap', 'font-size': 20}),
                            dcc.Dropdown(
                                 options=[{'label': 'Yearly Airline Performance Report', 'value':'Type1'},
                                          {'label': 'Yearly Airline Delay Report', 'value': 'Type2'}], 
                                 id='report_type_input',
                                 value='Type1',
                                 placeholder='Choose a report type.',
                                 style={'textAlign': 'center', 'width':'80%', 'padding':'3px', 'font-size': '20px'})
                                  ]
                         ),
        
                # Dropdown creation for year of interest
                html.Div(style={'display': 'flex', 'justify-content': 'center'},
                         children=[
                            html.H2('Choose Year:', style={'margin-right': '2em', 'white-space':'nowrap', 'font-size': 20}),
                            dcc.Dropdown(options=list(range(2005,2021)), 
                                 id='year_input',
                                 value='2020',
                                 placeholder='Choose a year.', 
                                 style={'textAlign': 'center', 'width':'80%', 'padding':'3px', 'font-size': '20px'})
                                 ]
                        ), 
    
                # Area for the graphs
                html.Div(dcc.Graph(id='plot1'), style={'justify-content': 'center', 'border': 'ridge', 'padding': '10px'}),
                
                html.Div(style={'display': 'flex', 'justify-content': 'center'}, 
                         children=[
                            html.Div(dcc.Graph(id='plot2'), style={'border': 'ridge', 'padding': '10px'}),
                            html.Div(dcc.Graph(id='plot3'), style={'border': 'ridge', 'padding': '10px'})
                            ]
                         ),
    
                html.Div(style={'display': 'flex', 'justify-content': 'center'},
                         children=[
                            html.Div(dcc.Graph(id='plot4'), style={'border': 'ridge', 'padding': '10px'}),
                            html.Div(dcc.Graph(id='plot5'), style={'border': 'ridge', 'padding': '10px'})
                            ]
                         )
]
)

# Create callback decorator
@app.callback([Output(component_id='plot1', component_property='figure'),
           Output(component_id='plot2', component_property='figure'),
           Output(component_id='plot3', component_property='figure'),
           Output(component_id='plot4', component_property='figure'),
           Output(component_id='plot5', component_property='figure')],
           [Input(component_id='report_type_input', component_property='value'),
           Input(component_id='year_input', component_property='value')]
           )

# Create callback function
def get_graphs(report_type, year):
    condition = airline_data['Year'] == int(year)
    data = airline_data[condition]
    
    # If report type 1 is chosen:
    if report_type == 'Type1':
        # Get plotting data
        tree_data, div_data, map_data, bar_data, line_data = compute_data_1(data)
        
        # Tree map
        tree_fig = px.treemap(tree_data, path=[px.Constant('DestState'), 'Reporting_Airline'], values='Flights', 
                              color='Flights', color_continuous_scale='RdBu', title='Number of Flights Going to an Airline')
        
        # Pie graph
        pie_fig = px.pie(div_data, values='Flights', names='Reporting_Airline', title='% of Diverted Landings by Airline')
        
        # Choropleth map
        map_fig = px.choropleth(map_data,
                                locations='OriginState', 
                                color='Flights',  
                                hover_data=['OriginState', 'Flights'], 
                                locationmode = 'USA-states', # Set to plot as US States
                                color_continuous_scale='GnBu',
                                range_color=[0, map_data['Flights'].max()])
        
        map_fig.update_layout(title_text = 'Number of Flights from Origin State', geo_scope='usa') # Plot only the USA instead of globe)
          
        # Bar graph
        bar_fig = px.bar(bar_data, x='Month', y='Flights', color='CancellationCode', title='Monthly Flight Cancellation')
          
        # Line graph
        line_fig = px.line(line_data, x='Month', y='AirTime', color='Reporting_Airline', title='Monthly Airtime Per Airline')
          
        return [tree_fig, pie_fig, map_fig, bar_fig, line_fig]
    
    # If report type 2 is chosen:
    elif report_type == 'Type2':
        avg_car, avg_weather, avg_NAS, avg_sec, avg_late = compute_data_2(data)
        
        # Create line graphs
        carrier_fig = px.line(avg_car, x='Month', y='CarrierDelay', color='Reporting_Airline', title='Average Carrrier Delay Time (minutes) by Airline')
        weather_fig = px.line(avg_weather, x='Month', y='WeatherDelay', color='Reporting_Airline', title='Average Weather Delay Time (minutes) by Airline')
        nas_fig = px.line(avg_NAS, x='Month', y='NASDelay', color='Reporting_Airline', title='Average NAS Delay Time (minutes) by Airline')
        sec_fig = px.line(avg_sec, x='Month', y='SecurityDelay', color='Reporting_Airline', title='Average Security Delay Time (minutes) by Airline')
        late_fig = px.line(avg_late, x='Month', y='LateAircraftDelay', color='Reporting_Airline', title='Average Late Aircraft Delay Time (minutes) by Airline')

        return [carrier_fig, weather_fig, nas_fig, sec_fig, late_fig]

# Run the app and open it a new tab
if __name__ == '__main__':
    app.run(jupyter_mode='tab') 
    # The jupyter_mode parameter is only possible within Jupyter environment

Dash app running on http://127.0.0.1:8050/


<IPython.core.display.Javascript object>