Strongly inspired from : https://www.kaggle.com/code/anandhuh/7-basic-plotly-charts-for-beginners

# Plotly Charts

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
url = 'https://bit.ly/3AVBLcJ'
dataframe =  pd.read_csv(url,encoding = "ISO-8859-1",
                    dtype={'Div1Airport': str, 'Div1TailNum': str, 
                    'Div2Airport': str, 'Div2TailNum': str}
                    )

In [3]:
dataframe.head()

Unnamed: 0.1,Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,IATA_CODE_Reporting_Airline,...,Div4WheelsOff,Div4TailNum,Div5Airport,Div5AirportID,Div5AirportSeqID,Div5WheelsOn,Div5TotalGTime,Div5LongestGTime,Div5WheelsOff,Div5TailNum
0,1295781,1998,2,4,2,4,1998-04-02,AS,19930,AS,...,,,,,,,,,,
1,1125375,2013,2,5,13,1,2013-05-13,EV,20366,EV,...,,,,,,,,,,
2,118824,1993,3,9,25,6,1993-09-25,UA,19977,UA,...,,,,,,,,,,
3,634825,1994,4,11,12,6,1994-11-12,HP,19991,HP,...,,,,,,,,,,
4,1888125,2017,3,8,17,4,2017-08-17,UA,19977,UA,...,,,,,,,,,,


In [4]:
dataframe.dtypes

Unnamed: 0            int64
Year                  int64
Quarter               int64
Month                 int64
DayofMonth            int64
                     ...   
Div5WheelsOn        float64
Div5TotalGTime      float64
Div5LongestGTime    float64
Div5WheelsOff       float64
Div5TailNum         float64
Length: 110, dtype: object

In [5]:
dataframe.shape

(27000, 110)

In [6]:
[col for col in dataframe.columns]

['Unnamed: 0',
 'Year',
 'Quarter',
 'Month',
 'DayofMonth',
 'DayOfWeek',
 'FlightDate',
 'Reporting_Airline',
 'DOT_ID_Reporting_Airline',
 'IATA_CODE_Reporting_Airline',
 'Tail_Number',
 'Flight_Number_Reporting_Airline',
 'OriginAirportID',
 'OriginAirportSeqID',
 'OriginCityMarketID',
 'Origin',
 'OriginCityName',
 'OriginState',
 'OriginStateFips',
 'OriginStateName',
 'OriginWac',
 'DestAirportID',
 'DestAirportSeqID',
 'DestCityMarketID',
 'Dest',
 'DestCityName',
 'DestState',
 'DestStateFips',
 'DestStateName',
 'DestWac',
 'CRSDepTime',
 'DepTime',
 'DepDelay',
 'DepDelayMinutes',
 'DepDel15',
 'DepartureDelayGroups',
 'DepTimeBlk',
 'TaxiOut',
 'WheelsOff',
 'WheelsOn',
 'TaxiIn',
 'CRSArrTime',
 'ArrTime',
 'ArrDelay',
 'ArrDelayMinutes',
 'ArrDel15',
 'ArrivalDelayGroups',
 'ArrTimeBlk',
 'Cancelled',
 'CancellationCode',
 'Diverted',
 'CRSElapsedTime',
 'ActualElapsedTime',
 'AirTime',
 'Flights',
 'Distance',
 'DistanceGroup',
 'CarrierDelay',
 'WeatherDelay',
 'NASDela

## Heatmap

In [7]:
numerical_df = dataframe.select_dtypes(include=['int64'])

correlation_matrix = numerical_df.corr()

fig = px.imshow(correlation_matrix, 
                color_continuous_scale='RdBu_r', 
                title='Correlation Matrix Heatmap')
fig.show()

## Scatter plot

In [8]:
fig = go.Figure(data=go.Scatter(x=dataframe['Distance'],
                                y=dataframe['AirTime'],
                                mode='markers',
                               marker=dict(color='red')))

fig.update_layout(title='Distance vs Departure Time',
                  xaxis_title='Distance',
                  yaxis_title='AirTime')

fig.show()

## Line Plot


In [9]:
line_data = dataframe.groupby('Month').agg({'ArrDelay': 'mean', 'DepDelay': 'mean'}).reset_index()

In [10]:
line_data

Unnamed: 0,Month,ArrDelay,DepDelay
0,1,7.097321,9.648889
1,2,6.530025,8.611001
2,3,5.429533,8.199462
3,4,4.040055,6.717631
4,5,5.874889,8.206117
5,6,9.347515,10.261841
6,7,8.851436,10.346619
7,8,7.40263,9.432387
8,9,2.198975,5.985575
9,10,3.242506,5.623046


In [11]:
fig = go.Figure(data=go.Scatter(x=line_data['Month'],
                                y=line_data['ArrDelay'],
                                mode='lines',
                                marker=dict(color='green'),
                                name="ArrDelay"))

fig.add_trace(go.Scatter(x=line_data['Month'],
                         y=line_data['DepDelay'],
                         mode='lines',
                         marker=dict(color='blue'),
                         name="DepDelay"))

fig.update_layout(title='Month vs Average Flight Delay Time',
                  xaxis_title='Month',
                  yaxis_title='Average Time')

fig.show()

## Bar Chart


In [12]:
bar_data = dataframe.groupby(['Reporting_Airline'])['Flights'].sum().reset_index()

In [13]:
fig = px.bar(bar_data,
             x="Reporting_Airline",
             y="Flights",
             title='Total number of flights to the destination state split by reporting airline') 

fig.show()

## Cleaned line plot

In [14]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=line_data['Month'],
    y=line_data['ArrDelay'],
    mode='lines',
    line=dict(color='green'),
    name="Arrival Delay"
))

fig.add_trace(go.Scatter(
    x=line_data['Month'],
    y=line_data['DepDelay'],
    mode='lines',
    line=dict(color='blue'),
    name="Departure Delay"
))

fig.update_layout(
    title='Month vs Average Flight Delay Time',
    title_x=0.5,
    xaxis_title='Month',
    yaxis_title='Average Time (minutes)',
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False),
    plot_bgcolor='white',
    legend=dict(
        title=None,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.show()

## Variantes

### Option 1

In [15]:
line_data['Cumulative_ArrDelay'] = line_data['ArrDelay'].cumsum()
line_data['Cumulative_DepDelay'] = line_data['DepDelay'].cumsum()

fig = go.Figure(data=go.Scatter(x=line_data['Month'],
                                y=line_data['Cumulative_ArrDelay'],
                                mode='lines',
                                line=dict(color='green'),
                                marker=dict(symbol='circle', size=8),
                                name="Cumulative Arrival Delay"))

fig.add_trace(go.Scatter(x=line_data['Month'],
                         y=line_data['Cumulative_DepDelay'],
                         mode='lines',
                         line=dict(color='blue'),
                         marker=dict(symbol='square', size=8),
                         name="Cumulative Departure Delay"))


fig.update_layout(
    title='Cumulative Flight Delay Time by Month',
    title_x=0.5,
    xaxis_title='Month',
    yaxis_title='Cumulative Delay Time (minutes)',
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False),
    plot_bgcolor='white',
    legend=dict(
        title=None,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.show()

### Option 2

In [16]:
line_data = dataframe.groupby('Year').agg({'ArrDelay': 'mean', 'DepDelay': 'mean'}).reset_index()

In [17]:
line_data

Unnamed: 0,Year,ArrDelay,DepDelay
0,1987,11.803468,9.103448
1,1988,7.599727,7.405184
2,1989,8.659123,8.345557
3,1990,6.039548,6.523944
4,1991,4.912553,5.561972
5,1992,5.057665,5.644163
6,1993,5.19494,6.293333
7,1994,5.683824,6.853157
8,1995,7.290014,8.09153
9,1996,10.046921,11.130307


In [18]:
dataframe[dataframe["Year"]==2020][["ArrDelay", "FlightDate", "Flight_Number_Reporting_Airline"]]

Unnamed: 0,ArrDelay,FlightDate,Flight_Number_Reporting_Airline
135,-17.0,2020-02-18,4123
162,26.0,2020-01-03,3508
270,7.0,2020-01-05,1555
715,,2020-03-27,1038
742,-22.0,2020-01-01,6079
...,...,...,...
26154,-5.0,2020-01-24,4565
26235,3.0,2020-01-04,3618
26281,-21.0,2020-03-11,4111
26349,-8.0,2020-02-04,2435


In [19]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=line_data['Year'],
    y=line_data['ArrDelay'],
    mode='lines',
    line=dict(color='green'),
    name="Arrival Delay"
))

fig.add_trace(go.Scatter(
    x=line_data['Year'],
    y=line_data['DepDelay'],
    mode='lines',
    line=dict(color='blue'),
    name="Departure Delay"
))

fig.update_layout(
    title='Year vs Average Flight Delay Time',
    title_x=0.5,
    xaxis_title='Year',
    yaxis_title='Average Time (minutes)',
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False),
    plot_bgcolor='white',
    legend=dict(
        title=None,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.show()

## Option 3



In [20]:
line_data = dataframe.groupby('DayOfWeek').agg({'ArrDelay': 'mean', 'DepDelay': 'mean'}).reset_index()

In [21]:
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=day_names,
    y=line_data['ArrDelay'],
    mode='lines',
    line=dict(color='green'),
    name="Arrival Delay"
))

fig.add_trace(go.Scatter(
    x=day_names,
    y=line_data['DepDelay'],
    mode='lines',
    line=dict(color='blue'),
    name="Departure Delay"
))

fig.update_layout(
    title='DayOfWeek vs Average Flight Delay Time',
    title_x=0.5,
    xaxis_title='DayOfWeek',
    yaxis_title='Average Time (minutes)',
    xaxis=dict(
        showgrid=False,
        categoryorder='array',
        categoryarray=day_names 
    ),
    yaxis=dict(showgrid=False),
    plot_bgcolor='white',
    legend=dict(
        title=None,
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.show()