In [1]:
%%writefile flight_analysis20.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns


st.set_page_config(
    page_title='Flight Delays and Cancellations 2015 Analysis',
    layout="wide" 
)

st.title('Flight Delays and Cancellations 2015')
st.header('Flight Analysis Dashboard')

#  Data Loading 
try:
   
    df1 = pd.read_csv('C:/Users/Sara/Desktop/project/flight_cleaned.csv')

    #colunm in table
    required_columns = [
        'airline', 'flight_number', 'tail_number', 'origin_airport',
        'destination_airport', 'departure_delay', 'elapsed_time', 'air_time',
        'distance', 'arrival_delay', 'diverted', 'cancelled', 'date',
        'month_name', 'day_name', 'week_number', 'season'
    ]

    # make sure that all required columns exist in the DataFrame
    for col in required_columns:
        if col not in df1.columns:
            st.error(f"Error: Required column '{col}' not found in 'flight_cleaned.csv'. "
                     "Please ensure your CSV file contains all expected columns.")
            st.stop() 

except FileNotFoundError:
    st.error("Error: 'flight_cleaned.csv' not found. "
             "Please ensure the file is in the specified path: `C:/Users/Sara/Desktop/project/`")
    st.stop()
except pd.errors.EmptyDataError:
    st.error("Error: 'flight_cleaned.csv' is empty. Please check your data file.")
    st.stop()
except Exception as e:
    st.error(f"An unexpected error occurred while loading or processing data: {e}")
    st.stop()



# Calculate statistics
num = df1.describe()
cat = df1.describe(include='O')


tab1, tab2 = st.tabs(['Descriptive Statistics', 'Charts & Filtered Data'])

with tab1:
    st.subheader('Numerical Descriptive Data')
    st.dataframe(num, use_container_width=True) 

    st.subheader('Categorical Descriptive Data')
    st.dataframe(cat, use_container_width=True) 

with tab2:
    # --- Sidebar Filter Options ---
    st.sidebar.header("Filter Options")

    day_options = sorted(df1['day_name'].unique().tolist())
    season_options = sorted(df1['season'].unique().tolist())
    month_options = sorted(df1['month_name'].unique().tolist())

    selected_day = st.sidebar.selectbox('Select Day', day_options, index=day_options.index(df1['day_name'].mode()[0]) if day_options else 0)
    selected_season = st.sidebar.selectbox('Select Season', season_options, index=season_options.index(df1['season'].mode()[0]) if season_options else 0)
    selected_month = st.sidebar.selectbox('Select Month', month_options, index=month_options.index(df1['month_name'].mode()[0]) if month_options else 0)
    
# filter
    filtered_df = df1[
        (df1['day_name'] == selected_day) &
        (df1['season'] == selected_season) &
        (df1['month_name'] == selected_month)
    ].copy()
  

    # --- Add 'Number of Flights
    if not filtered_df.empty:
        # Calculate flight counts per origin airport in the *filtered* data
        flights_per_origin = filtered_df['origin_airport'].value_counts().reset_index()
        flights_per_origin.columns = ['origin_airport', 'Number of Flights from Origin']

        # Merge new column back into the filtered_df
        
        filtered_df = pd.merge(
            filtered_df,
            flights_per_origin,
            on='origin_airport',
            how='left'
        )


    # --- Display Filtered DataFrame with New Column ---
    st.subheader('Filtered Data Preview with Flight Counts')
    if not filtered_df.empty:
        # Displaying the DataFrame with the newly added column
        st.dataframe(filtered_df, use_container_width=True)
    else:
        st.info("No rows to display in the filtered table for the current selections.")
    # --- End Display Filtered DataFrame ---

    st.markdown("------------") 


    st.subheader('Relationship between Flight Distance and Air Time')
    if not filtered_df.empty:
        # Ensure 'distance' and 'air_time' are numeric and handle potential non-numeric data
        filtered_df['distance'] = pd.to_numeric(filtered_df['distance'], errors='coerce')
        filtered_df['air_time'] = pd.to_numeric(filtered_df['air_time'], errors='coerce')
        # Drop rows where conversion resulted in NaN
        plot1_data = filtered_df.dropna(subset=['distance', 'air_time']).copy()

        if not plot1_data.empty:
            fig_scatter, ax_scatter = plt.subplots(figsize=(10, 6))

            sns.scatterplot(
                x='distance',
                y='air_time',
                data=plot1_data, 
                alpha=0.6,
                s=50,
                ax=ax_scatter
            )
            ax_scatter.set_title('Relationship between Flight Distance and Air Time', fontsize=16)
            ax_scatter.set_xlabel('Distance (miles)', fontsize=12)
            ax_scatter.set_ylabel('Air Time (minutes)', fontsize=12)
            ax_scatter.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()

            st.pyplot(fig_scatter)
            plt.close(fig_scatter) 
        else:
            st.warning("No valid numeric data for Distance vs. Air Time after filtering.")
    else:
        st.warning("No data available for Distance vs. Air Time with the current filters.")
    

    st.markdown("------") #فاصل


    st.subheader('Number of Flights by Origin Airport')
    if not filtered_df.empty:
      
        origin_counts_for_chart = filtered_df['origin_airport'].value_counts().reset_index()
        origin_counts_for_chart.columns = ['Origin Airport', 'Number of Flights']

        fig_bar_origin = px.bar(
            origin_counts_for_chart,
            x='Origin Airport',
            y='Number of Flights',
            title=f'Number of Flights by Origin Airport (Filtered by {selected_month}, {selected_day}, {selected_season})',
            labels={'Origin Airport': 'Origin Airport', 'Number of Flights': 'Number of Flights'},
            color='Number of Flights', 
            color_continuous_scale=px.colors.sequential.Viridis 
        )
        fig_bar_origin.update_layout(
            xaxis_title="Origin Airport",
            yaxis_title="Number of Flights",
            xaxis={'categoryorder':'total descending'} 
        )
        st.plotly_chart(fig_bar_origin, use_container_width=True)
    else:
        st.warning("No data available for Flight Counts by Origin Airport with the current filters.")
  

Overwriting flight_analysis20.py


In [2]:
! streamlit run flight_analysis20.py

^C
