In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sqlalchemy import create_engine

In [2]:
# Connect Python to MySQL

engine = create_engine(
    "mysql+pymysql://root:@localhost/airline_analytics"
)

In [None]:
# Top 15 Airlines by Total Flights

query_airline_volume = """
SELECT
    a.airline_code,
    SUM(f.total_flights) AS total_flights
FROM flights_fact_daily f
JOIN airlines_dim a ON f.airline_id = a.airline_id
GROUP BY a.airline_code
ORDER BY total_flights DESC
LIMIT 15;
"""

df_airlines = pd.read_sql(query_airline_volume, engine)

In [None]:
# Top 15 Airlines by Total Flights Bar Chart

fig = px.bar(
    df_airlines,
    x="airline_code",
    y="total_flights",
    text="total_flights",
    title="Top 15 Airlines by Total Flights (1987–2008)",
    labels={"total_flights": "Total Flights", "airline_code": "Airline"},
)

fig.update_traces(
    texttemplate="%{text:,}",
    textposition="outside"
)

fig.update_layout(
    xaxis_tickangle=-45,
    uniformtext_minsize=8,
    uniformtext_mode="hide"
)

fig.show()

In [None]:
# Average Arrival Delay by Major Airlines

query_arr_delay = """
SELECT
    a.airline_code,
    ROUND(
        SUM(f.avg_arr_delay * f.total_flights) / SUM(f.total_flights),
        2
    ) AS weighted_avg_arr_delay,
    SUM(f.total_flights) AS total_flights
FROM flights_fact_daily f
JOIN airlines_dim a ON f.airline_id = a.airline_id
GROUP BY a.airline_code
HAVING SUM(f.total_flights) > 1000000
ORDER BY weighted_avg_arr_delay ASC;
"""

df_delay = pd.read_sql(query_arr_delay, engine)
df_delay.head()

Unnamed: 0,airline_code,weighted_avg_arr_delay,total_flights
0,NW,5.36,10061018.0
1,WN,5.54,15148433.0
2,OO,5.88,2718681.0
3,US,6.36,13763787.0
4,AA,6.53,14530810.0


In [None]:
# Average Arrival Delay by Major Airlines Bar Chart

fig = px.bar(
    df_delay,
    x="airline_code",
    y="weighted_avg_arr_delay",
    text="weighted_avg_arr_delay",
    title="Average Arrival Delay by Major Airlines (Lower is Better)",
)

fig.update_traces(
    texttemplate="%{text:.2f}",
    textposition="outside"
)

fig.update_layout(
    xaxis_title="Airline",
    yaxis_title="Avg Arrival Delay (minutes)",
    xaxis_tickangle=0,
    yaxis_zeroline=True,
    uniformtext_minsize=9,
    uniformtext_mode="hide",
)

fig.show()

In [12]:
# Top 15 Busiest Routes

query_routes = """
SELECT
    origin,
    destination,
    total_flights
FROM v_route_performance
ORDER BY total_flights DESC
LIMIT 15;
"""

df_routes = pd.read_sql(query_routes, engine)
df_routes["route"] = df_routes["origin"] + " → " + df_routes["destination"]
df_routes

Unnamed: 0,origin,destination,total_flights,route
0,SFO,LAX,329370.0,SFO → LAX
1,LAX,SFO,328105.0,LAX → SFO
2,LAX,LAS,284494.0,LAX → LAS
3,LAS,LAX,278653.0,LAS → LAX
4,PHX,LAX,273286.0,PHX → LAX
5,LAX,PHX,272681.0,LAX → PHX
6,ORD,MSP,243470.0,ORD → MSP
7,MSP,ORD,242933.0,MSP → ORD
8,PHX,LAS,233977.0,PHX → LAS
9,LAS,PHX,232467.0,LAS → PHX


In [None]:
# Top 15 Busiest Routes Bar Chart

fig = px.bar(
    df_routes,
    y="route",
    x="total_flights",
    orientation="h",
    text="total_flights",
    title="Top 15 Busiest U.S. Airline Routes (1987–2008)",
)

fig.update_traces(texttemplate="%{text:,}", textposition="outside")
fig.update_layout(
    title="Top 15 Busiest U.S. Airline Routes (1987–2008)",
    xaxis_title="Total Flights",
    yaxis_title="Route",
    xaxis=dict(range=[0, df_routes["total_flights"].max() * 1.12]),
    template="plotly_white",
    margin=dict(l=120, r=40, t=80, b=60)
)
fig.show()