In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [2]:
# -------------------------------------------------------------
# Visualization style setup
# -------------------------------------------------------------
sns.set(style="whitegrid")
plt.rcParams['axes.unicode_minus'] = False

In [3]:
# -------------------------------------------------------------
# Load the dataset
# -------------------------------------------------------------
# Change the file path to your dataset
data = pd.read_csv("flights_data.csv")

# Create a folder for saving charts
os.makedirs("charts", exist_ok=True)

In [4]:
# -------------------------------------------------------------
# Helper function to save charts
# -------------------------------------------------------------
def save_chart(name):
    """Save current matplotlib figure to charts folder."""
    path = os.path.join("charts", f"{name}.png")
    plt.tight_layout()
    plt.savefig(path, dpi=300)
    plt.close()
    print(f"✅ Chart saved: {path}")

In [6]:
# -------------------------------------------------------------
# 1. Average departure delay per carrier
# -------------------------------------------------------------
avg_delay_carrier = data.groupby("OP_UNIQUE_CARRIER")["DEP_DELAY"].mean().reset_index()
plt.figure(figsize=(10,5))
sns.barplot(x="OP_UNIQUE_CARRIER", y="DEP_DELAY", data=avg_delay_carrier,hue="OP_UNIQUE_CARRIER", palette="coolwarm")
plt.title("Average Departure Delay per Carrier")
plt.xlabel("Carrier")
plt.ylabel("Average Delay (minutes)")
save_chart("avg_delay_per_carrier")

✅ Chart saved: charts\avg_delay_per_carrier.png


In [7]:
# -------------------------------------------------------------
# 2. Total flights per destination
# -------------------------------------------------------------
flights_per_dest = data["DEST"].value_counts().reset_index()
flights_per_dest.columns = ["DEST", "Total_Flights"]
plt.figure(figsize=(10,5))
sns.barplot(x="DEST", y="Total_Flights", data=flights_per_dest.head(15))
plt.title("Top 15 Destinations by Flight Count")
save_chart("flights_per_destination")

✅ Chart saved: charts\flights_per_destination.png


In [9]:
# -------------------------------------------------------------
# 3. Average flight distance per carrier
# -------------------------------------------------------------
avg_distance_carrier = data.groupby("OP_UNIQUE_CARRIER")["DISTANCE"].mean().reset_index()
plt.figure(figsize=(10,5))
sns.barplot(x="OP_UNIQUE_CARRIER", y="DISTANCE", data=avg_distance_carrier,hue="OP_UNIQUE_CARRIER" ,palette="Blues_d")
plt.title("Average Flight Distance per Carrier")
plt.xlabel("Carrier")
plt.ylabel("Average Distance (miles)")
save_chart("avg_distance_per_carrier")

✅ Chart saved: charts\avg_distance_per_carrier.png


In [10]:
# -------------------------------------------------------------
# 4. Delay vs Flight Type (Short/Medium/Long Haul)
# -------------------------------------------------------------
def categorize_distance(d):
    if d < 500:
        return "Short Haul"
    elif 500 <= d <= 1500:
        return "Medium Haul"
    else:
        return "Long Haul"

data["Flight_Type"] = data["DISTANCE"].apply(categorize_distance)
avg_delay_type = data.groupby("Flight_Type")["DEP_DELAY"].mean().reset_index()
plt.figure(figsize=(6,4))
sns.barplot(x="Flight_Type", y="DEP_DELAY", data=avg_delay_type,hue="Flight_Type" ,palette="Set2")
plt.title("Average Delay by Flight Type")
plt.xlabel("Flight Type")
plt.ylabel("Average Delay (minutes)")
save_chart("avg_delay_by_flight_type")

✅ Chart saved: charts\avg_delay_by_flight_type.png


In [11]:
# -------------------------------------------------------------
# 5. Average delay by day of week
# -------------------------------------------------------------
avg_delay_weekday = data.groupby("DAY_OF_WEEK")["DEP_DELAY"].mean().reset_index()
plt.figure(figsize=(8,4))
sns.lineplot(x="DAY_OF_WEEK", y="DEP_DELAY", data=avg_delay_weekday, marker="o")
plt.title("Average Delay by Day of Week")
plt.xlabel("Day of Week")
plt.ylabel("Average Delay (minutes)")
save_chart("avg_delay_by_weekday")

✅ Chart saved: charts\avg_delay_by_weekday.png


In [12]:
# -------------------------------------------------------------
# 6. On-time vs Delayed flights
# -------------------------------------------------------------
delayed = (data["DEP_DELAY"] > 15).mean() * 100
on_time = 100 - delayed
plt.figure(figsize=(5,5))
plt.pie([on_time, delayed], labels=["On Time", "Delayed"], autopct="%.1f%%", colors=["#8BC34A", "#F44336"])
plt.title("On-time vs Delayed Flights")
save_chart("on_time_vs_delayed")

✅ Chart saved: charts\on_time_vs_delayed.png


In [13]:
# -------------------------------------------------------------
# 7. Average taxi-out time per destination
# -------------------------------------------------------------
avg_taxi_dest = data.groupby("DEST")["TAXI_OUT"].mean().reset_index()
plt.figure(figsize=(10,5))
sns.barplot(x="DEST", y="TAXI_OUT", data=avg_taxi_dest.head(15),hue="DEST", palette="Purples")
plt.title("Average Taxi-Out Time per Destination")
plt.xlabel("Destination")
plt.ylabel("Average Taxi-Out (minutes)")
save_chart("avg_taxi_out_destination")

✅ Chart saved: charts\avg_taxi_out_destination.png


In [14]:
# -------------------------------------------------------------
# 8. Weather condition impact on delay
# -------------------------------------------------------------
avg_delay_weather = data.groupby("Condition")["DEP_DELAY"].mean().reset_index()
plt.figure(figsize=(10,5))
sns.barplot(x="DEP_DELAY", y="Condition", data=avg_delay_weather,hue="DEP_DELAY" ,palette="rocket")
plt.title("Average Delay by Weather Condition")
plt.xlabel("Average Delay (minutes)")
plt.ylabel("Weather Condition")
save_chart("avg_delay_by_weather")

✅ Chart saved: charts\avg_delay_by_weather.png


In [15]:
# -------------------------------------------------------------
# 9. Top 10 longest scheduled flights
# -------------------------------------------------------------
top_long = data.nlargest(10, "CRS_ELAPSED_TIME")
plt.figure(figsize=(10,5))
sns.barplot(y="DEST", x="CRS_ELAPSED_TIME", data=top_long, hue="OP_UNIQUE_CARRIER")
plt.title("Top 10 Longest Scheduled Flights")
plt.xlabel("Scheduled Time (minutes)")
plt.ylabel("Destination")
save_chart("longest_scheduled_flights")

✅ Chart saved: charts\longest_scheduled_flights.png


In [16]:
# -------------------------------------------------------------
# 10. Average departure deviation per carrier
# -------------------------------------------------------------
data["Deviation"] = data["DEP_TIME_M"] - data["CRS_DEP_M"]
avg_dev = data.groupby("OP_UNIQUE_CARRIER")["Deviation"].mean().reset_index()
plt.figure(figsize=(10,5))
sns.barplot(x="OP_UNIQUE_CARRIER", y="Deviation", data=avg_dev)
plt.title("Average Departure Deviation per Carrier")
plt.xlabel("Carrier")
plt.ylabel("Deviation (minutes)")
save_chart("departure_deviation_per_carrier")

✅ Chart saved: charts\departure_deviation_per_carrier.png


In [17]:
# -------------------------------------------------------------
# 11. Monthly delay trend
# -------------------------------------------------------------
avg_delay_month = data.groupby("MONTH")["DEP_DELAY"].mean().reset_index()
plt.figure(figsize=(8,4))
sns.lineplot(x="MONTH", y="DEP_DELAY", data=avg_delay_month, marker="o", color="darkorange")
plt.title("Monthly Average Departure Delay")
plt.xlabel("Month")
plt.ylabel("Average Delay (minutes)")
save_chart("monthly_avg_delay")

✅ Chart saved: charts\monthly_avg_delay.png


In [21]:
# -------------------------------------------------------------
# 12. Wind speed vs taxi-out time
# -------------------------------------------------------------
plt.figure(figsize=(8,5))
sns.scatterplot(x="Wind Speed", y="TAXI_OUT", data=data, alpha=0.6)
plt.title("Wind Speed vs Taxi-Out Time")
plt.xlabel("Wind Speed (mph)")
plt.ylabel("Taxi-Out (minutes)")
save_chart("wind_vs_taxiout")

✅ Chart saved: charts\wind_vs_taxiout.png


In [22]:
# -------------------------------------------------------------
# 13. Humidity vs delay by weather condition
# -------------------------------------------------------------
avg_hum_delay = data.groupby("Condition")[["Humidity","DEP_DELAY"]].mean().reset_index()
plt.figure(figsize=(10,5))
sns.scatterplot(x="Humidity", y="DEP_DELAY", hue="Condition", data=avg_hum_delay, s=100)
plt.title("Humidity vs Delay by Weather Condition")
plt.xlabel("Average Humidity (%)")
plt.ylabel("Average Delay (minutes)")
save_chart("humidity_vs_delay_condition")

✅ Chart saved: charts\humidity_vs_delay_condition.png


In [23]:
# -------------------------------------------------------------
# 14. Pressure vs delay
# -------------------------------------------------------------
plt.figure(figsize=(8,5))
sns.scatterplot(x="Pressure", y="DEP_DELAY", data=data, alpha=0.6)
plt.title("Pressure vs Departure Delay")
plt.xlabel("Pressure (inHg)")
plt.ylabel("Departure Delay (minutes)")
save_chart("pressure_vs_delay")

✅ Chart saved: charts\pressure_vs_delay.png


In [25]:
# -------------------------------------------------------------
# 15. On-time performance by carrier
# -------------------------------------------------------------
on_time_rate = data.groupby("OP_UNIQUE_CARRIER").apply(
    lambda x: (x["DEP_DELAY"] <= 0).mean() * 100
).reset_index(name="OnTime_Percentage")

plt.figure(figsize=(10,5))
sns.barplot(x="OP_UNIQUE_CARRIER", y="OnTime_Percentage", data=on_time_rate,hue="OP_UNIQUE_CARRIER" ,palette="Greens_r")
plt.title("On-time Performance per Carrier")
plt.xlabel("Carrier")
plt.ylabel("On-time Percentage (%)")
save_chart("on_time_performance_carrier")

  on_time_rate = data.groupby("OP_UNIQUE_CARRIER").apply(


✅ Chart saved: charts\on_time_performance_carrier.png


In [26]:
# -------------------------------------------------------------
# 16. Average delay by scheduled departure hour
# -------------------------------------------------------------
avg_delay_hour = data.groupby("sch_dep")["DEP_DELAY"].mean().reset_index()
plt.figure(figsize=(10,5))
sns.lineplot(x="sch_dep", y="DEP_DELAY", data=avg_delay_hour, marker="o")
plt.title("Average Delay by Scheduled Departure Hour")
plt.xlabel("Scheduled Departure Hour")
plt.ylabel("Average Delay (minutes)")
save_chart("avg_delay_by_hour")

✅ Chart saved: charts\avg_delay_by_hour.png


In [27]:
# -------------------------------------------------------------
# 17. Worst days (average delay > 15 mins)
# -------------------------------------------------------------
worst_days = data.groupby(["MONTH", "DAY_OF_MONTH"])["DEP_DELAY"].mean().reset_index()
worst_days = worst_days[worst_days["DEP_DELAY"] > 15]
plt.figure(figsize=(10,5))
sns.histplot(worst_days["DEP_DELAY"], bins=20, color="red")
plt.title("Distribution of High Delay Days (>15 min avg)")
plt.xlabel("Average Daily Delay (minutes)")
plt.ylabel("Frequency")
save_chart("worst_days_delay")

✅ Chart saved: charts\worst_days_delay.png
