Jupyter notebook for testing out Plotly graphs

In [1]:
import pandas as pd
import plotly.express as px
import nbformat
import numpy as np

## Creating a bar chart showing the top 20 airports by passenger arrivals in 2018:

In [2]:
df_top_20_airports = pd.read_csv('top_20_airports_by_pax_arrivals_2018.csv')
df_top_20_airports

Unnamed: 0,Airport,2018_Passenger_Arrivals,Rank
0,ATL,47388875.0,1
1,LAX,41502828.0,2
2,ORD,36572207.0,3
3,DFW,30823751.0,4
4,JFK,29744941.0,5
5,DEN,29527533.0,6
6,SFO,26974833.0,7
7,LAS,23511649.0,8
8,MCO,23178413.0,9
9,SEA,22558466.0,10


In [3]:
top_20_airports_list = list(df_top_20_airports['Airport'])
top_20_airports_list

['ATL',
 'LAX',
 'ORD',
 'DFW',
 'JFK',
 'DEN',
 'SFO',
 'LAS',
 'MCO',
 'SEA',
 'EWR',
 'PHX',
 'IAH',
 'MIA',
 'CLT',
 'BOS',
 'FLL',
 'MSP',
 'DTW',
 'PHL']

In [4]:
fig_top_20_airports_2018 = px.bar(df_top_20_airports, x="Airport", y="2018_Passenger_Arrivals")

In [5]:
fig_top_20_airports_2018

In [6]:
df_aaa = pd.read_csv('local_copy_of_airports_airlines_aircraft_2018.csv')
df_aaa

Unnamed: 0,UNIQUE_CARRIER_NAME,ORIGIN_DEST,Plane_Type_Text,PASSENGERS,AIRPORT 1,AIRPORT 2
0,Hawaiian Airlines Inc.,HNL_OGG,Boeing 717-200,1954139.0,HNL,OGG
1,Delta Air Lines Inc.,ATL_MCO,Boeing 757-200,1417832.0,ATL,MCO
2,Hawaiian Airlines Inc.,HNL_KOA,Boeing 717-200,1281221.0,HNL,KOA
3,Southwest Airlines Co.,DAL_HOU,Boeing 737-700/700LR/Max 7,1260362.0,DAL,HOU
4,American Airlines Inc.,DFW_LAX,Airbus Industrie A321/Lr,1257512.0,DFW,LAX
...,...,...,...,...,...,...
2799,Spirit Air Lines,FLL_LGA,Airbus Industrie A320-100/200,100381.0,FLL,LGA
2800,Aer Lingus Plc,DUB_MCO,Airbus Industrie A330-200,100329.0,DUB,MCO
2801,Republic Airline,DCA_MCI,Embraer ERJ-175,100303.0,DCA,MCI
2802,Southwest Airlines Co.,BNA_CLT,Boeing 737-700/700LR/Max 7,100282.0,BNA,CLT


In [7]:
# Consider building a graph that lets you pivot by airline name, airport, and plane type and then display the output in both chart form and table form.

In [8]:
# You could also try creating a chart that lets you compare the presence of a given set of airlines (maybe up to 5) for a given set of airports (maybe up to 20. The charts could be either grouped bar charts or stacked bar charts (to show the airline/airport relationship).

In [9]:
## Top 20 airlines in 2018:

df_top_20_airlines = pd.read_csv('top_20_airlines_by_passengers_2018.csv')
df_top_20_airlines

Unnamed: 0,Airline,Passengers,Rank
0,Southwest Airlines Co.,165139267.0,1
1,American Airlines Inc.,146267939.0,2
2,Delta Air Lines Inc.,143049045.0,3
3,United Air Lines Inc.,110385926.0,4
4,JetBlue Airways,41635897.0,5
5,Alaska Airlines Inc.,33755225.0,6
6,Spirit Air Lines,28777722.0,7
7,SkyWest Airlines Inc.,26810281.0,8
8,Frontier Airlines Inc.,19411454.0,9
9,Republic Airline,17474019.0,10


Creating a list of the top 4 airlines (which will be useful for a later graph):

In [10]:
top_airlines_list = list(df_top_20_airlines['Airline'][0:4])
top_airlines_list

['Southwest Airlines Co.',
 'American Airlines Inc.',
 'Delta Air Lines Inc.',
 'United Air Lines Inc.']

In [11]:
fig_top_20_airlines_2018 = px.bar(df_top_20_airlines, x="Airline", y="Passengers")
fig_top_20_airlines_2018

In [12]:
df_airline_airport_pairs = pd.read_csv('airport_airline_pairs_2018.csv')
df_airline_airport_pairs

Unnamed: 0,Airline,Dest_Airport,Passengers
0,Delta Air Lines Inc.,ATL,35049728.0
1,American Airlines Inc.,DFW,21814244.0
2,American Airlines Inc.,CLT,13853081.0
3,American Airlines Inc.,MIA,12595599.0
4,United Air Lines Inc.,ORD,12153983.0
...,...,...,...
7019,Tradewind Aviation,MLB,1.0
7020,Tradewind Aviation,MIA,1.0
7021,"Jet Aviation Flight Services, Inc.",ART,1.0
7022,CAT Aviation,AUS,1.0


In [13]:
top_airline_list_as_string = ("|".join(top_airlines_list)) # Converts the airlines in the list to a string value that the following np.where statement can use to create an 'Other' category of airlines
top_airline_list_as_string

'Southwest Airlines Co.|American Airlines Inc.|Delta Air Lines Inc.|United Air Lines Inc.'

In [14]:
df_top_airlines_and_airports = df_airline_airport_pairs.query("Dest_Airport in @top_20_airports_list").copy().reset_index(drop=True)
df_top_airlines_and_airports['Airline'] = np.where(df_top_airlines_and_airports['Airline'].str.contains(top_airline_list_as_string) == False, 'Other', df_top_airlines_and_airports['Airline'])
df_top_airlines_and_airports

Unnamed: 0,Airline,Dest_Airport,Passengers
0,Delta Air Lines Inc.,ATL,35049728.0
1,American Airlines Inc.,DFW,21814244.0
2,American Airlines Inc.,CLT,13853081.0
3,American Airlines Inc.,MIA,12595599.0
4,United Air Lines Inc.,ORD,12153983.0
...,...,...,...
1316,Other,DTW,1.0
1317,Other,BOS,1.0
1318,Other,ATL,1.0
1319,Other,EWR,1.0


In [15]:
df_top_airlines_and_airports = df_top_airlines_and_airports.pivot_table(index = ["Airline", "Dest_Airport"], values = "Passengers", aggfunc = "sum").reset_index()


In [16]:
df_top_airlines_and_airports['Airline'].value_counts()

American Airlines Inc.    20
Delta Air Lines Inc.      20
Other                     20
United Air Lines Inc.     20
Southwest Airlines Co.    18
Name: Airline, dtype: int64

In [17]:
airport_ranks = df_top_20_airports[['Airport', 'Rank']]
airport_ranks

Unnamed: 0,Airport,Rank
0,ATL,1
1,LAX,2
2,ORD,3
3,DFW,4
4,JFK,5
5,DEN,6
6,SFO,7
7,LAS,8
8,MCO,9
9,SEA,10


In [18]:
df_top_airlines_and_airports = df_top_airlines_and_airports.merge(airport_ranks, left_on = "Dest_Airport", right_on = "Airport")


In [19]:
df_top_airlines_and_airports.sort_values("Rank", inplace = True)
df_top_airlines_and_airports

Unnamed: 0,Airline,Dest_Airport,Passengers,Airport,Rank
0,American Airlines Inc.,ATL,1272128.0,ATL,1
1,Delta Air Lines Inc.,ATL,35049728.0,ATL,1
2,Other,ATL,5406930.0,ATL,1
3,Southwest Airlines Co.,ATL,5093338.0,ATL,1
4,United Air Lines Inc.,ATL,566751.0,ATL,1
...,...,...,...,...,...
80,Other,PHL,4781501.0,PHL,20
79,Delta Air Lines Inc.,PHL,815566.0,PHL,20
78,American Airlines Inc.,PHL,7463574.0,PHL,20
81,Southwest Airlines Co.,PHL,1117445.0,PHL,20


In [20]:
fig_t4_airline_presence_at_t20_airports = px.bar(df_top_airlines_and_airports, x="Dest_Airport", y="Passengers", color="Airline", title="Top 20 US Airports by Airline Share")
fig_t4_airline_presence_at_t20_airports