# Plots

In [10]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np

In [3]:
df = pd.read_csv('../data/processed/squawk7700_processed.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 868 entries, 0 to 867
Data columns (total 40 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   flight_id         868 non-null    object 
 1   callsign          868 non-null    object 
 2   number            818 non-null    object 
 3   icao24            868 non-null    object 
 4   registration      868 non-null    object 
 5   typecode          868 non-null    object 
 6   origin            832 non-null    object 
 7   landing           630 non-null    object 
 8   destination       827 non-null    object 
 9   diverted          313 non-null    object 
 10  tweet_problem     441 non-null    object 
 11  tweet_result      441 non-null    object 
 12  tweet_fueldump    441 non-null    object 
 13  avh_id            98 non-null     object 
 14  avh_problem       98 non-null     object 
 15  avh_result        98 non-null     object 
 16  avh_fueldump      98 non-null     object 
 1

## Age BAR

##### Data prep

In [28]:
bins = np.arange(0, df['ageYears'].max() + 5, 5)
df['age_categories'] = pd.cut(df['ageYears'], bins=bins, right=False)

In [30]:
df.head()

Unnamed: 0,flight_id,callsign,number,icao24,registration,typecode,origin,landing,destination,diverted,...,typeName,numEngines,engineType,isFreighter,productionLine,ageYears,verified,numRegistrations,registrations,age_categories
0,ARG1511_20180101,ARG1511,AR1511,e06442,LV-FQB,B738,SACO,SABE,SABE,,...,Boeing 737-800,2.0,Jet,False,Boeing 737 NG,13.2,True,3,"[{'reg': 'N814VL', 'active': True, 'hexIcao': ...","[10.0, 15.0)"
1,DAL14_20180101,DAL14,DL14,a14c29,N183DN,B763,KATL,,EDDF,,...,Boeing 767-300,2.0,Jet,False,Boeing 767,31.6,True,1,"[{'reg': 'N183DN', 'active': False, 'hexIcao':...","[30.0, 35.0)"
2,JBU263_20180108,JBU263,B6263,aa600a,N768JB,A320,KJFK,,KSEA,,...,Airbus A320,2.0,Jet,False,Airbus A320,15.9,True,1,"[{'reg': 'N768JB', 'active': True, 'hexIcao': ...","[15.0, 20.0)"
3,DAL65_20180108,DAL65,DL65,ab2855,N818NW,A333,KATL,KLAX,KLAX,,...,Airbus A330,2.0,Jet,False,Airbus A330,17.4,True,2,"[{'reg': 'N818NW', 'active': True, 'hexIcao': ...","[15.0, 20.0)"
4,EDW24_20180111,EDW24,WK24,4b1901,HB-JMF,A343,LSZH,LSZH,MMUN,LSZH,...,Airbus A340,4.0,Jet,False,Airbus A340,21.1,True,2,"[{'reg': 'HB-JMF', 'active': True, 'hexIcao': ...","[20.0, 25.0)"


In [31]:
age_category_counts = df['age_categories'].value_counts().sort_index().reset_index()
age_category_counts['age_categories_str'] = age_category_counts['age_categories'].astype(str)
age_category_counts

Unnamed: 0,age_categories,count,age_categories_str
0,"[0.0, 5.0)",0,"[0.0, 5.0)"
1,"[5.0, 10.0)",195,"[5.0, 10.0)"
2,"[10.0, 15.0)",160,"[10.0, 15.0)"
3,"[15.0, 20.0)",159,"[15.0, 20.0)"
4,"[20.0, 25.0)",165,"[20.0, 25.0)"
5,"[25.0, 30.0)",87,"[25.0, 30.0)"
6,"[30.0, 35.0)",43,"[30.0, 35.0)"
7,"[35.0, 40.0)",7,"[35.0, 40.0)"
8,"[40.0, 45.0)",0,"[40.0, 45.0)"
9,"[45.0, 50.0)",2,"[45.0, 50.0)"


In [32]:
age_category_counts['age_categories'][0]

Interval(0.0, 5.0, closed='left')

##### Plot

In [None]:
fig = go.Figure(data=[
    go.Bar(x=age_category_counts['age_categories_str'], y=age_category_counts['count'], name="Age", marker_color="red")
])

fig.update_layout(
    title={'text': "Number of IFE Aircrafts per Age Category", 'font': {"size": 24}},
    xaxis_title="Age Category (Years)",
    yaxis_title="IFE Aircraft Frequency"
)

## Frequency per type BAR

##### Data prep

In [57]:
type_name_frequency = df.groupby('typeName').flight_id.count().sort_values(ascending=False)
type_name_frequency.index

Index(['Airbus A320', 'Boeing 737-800', 'Airbus A319', 'Boeing 737',
       'Embraer 175', 'Boeing 767-300', 'Airbus A321', 'Boeing 757-200',
       'Boeing 737-700', 'Boeing 787-8', 'Boeing 777-200', 'Boeing 777',
       'Airbus A330', 'Canadair CRJ 900', 'Airbus A330-200', 'Boeing 737-900',
       'Boeing 777-300', 'Boeing 787-9', 'Boeing 747-400',
       'Airbus A320 (Sharklets)', 'De Havilland Canada DHC-8-400 Dash 8Q',
       'Embraer 145', 'Airbus A330-300', 'Boeing 747',
       'McDonnell Douglas MD-11', 'Canadair CRJ 200', 'Boeing 767-400',
       'Airbus A340-300', 'Embraer 170', 'Airbus A300-600', 'Airbus A380-800',
       'Airbus A321 (Sharklets)', 'Boeing 737-600', 'Boeing 767', 'Boeing 757',
       'Embraer Pheom 300', 'De Havilland Canada DHC-8-200 Dash 8 / 8Q',
       'Cessna 172', 'ATR 72', 'Boeing 747-8', 'Airbus A300', 'Boeing 717',
       'Canadair CRJ-701ER', 'BAe Avro RJ85', 'Bombardier Challenger 300',
       'Airbus A220-300', 'Sukhoi Superjet 100', 'Boeing 737-4

In [52]:
df['typeName']

0      Boeing 737-800
1      Boeing 767-300
2         Airbus A320
3         Airbus A330
4         Airbus A340
            ...      
863      Boeing 787-8
864      Boeing 787-8
865    Boeing 737-800
866    Boeing 737-800
867       Embraer 175
Name: typeName, Length: 868, dtype: object

##### Plot

In [None]:
fig2 = go.Figure(data=[
    go.Bar(x=type_name_frequency.head(10).index, y=type_name_frequency.head(10).values, name="Type", marker_color="green")
])

fig2.update_layout(
    title={'text': "Top 10 Most Frequent IFE Aircrafts", 'font': {"size": 24}},
    xaxis_title="Aircraft Type",
    yaxis_title="IFE Aircraft Frequency",
)

## Age + Frequency SCATTER

##### Data Prep

In [76]:
type_name_frequency_df = type_name_frequency.reset_index()
type_name_frequency_df.rename(columns={'flight_id':'frequency'},inplace=True)
type_name_frequency_df

Unnamed: 0,typeName,frequency
0,Airbus A320,115
1,Boeing 737-800,112
2,Airbus A319,55
3,Boeing 737,47
4,Embraer 175,40
...,...,...
94,Challenger 300,1
95,Airbus A340-600,1
96,Beechcraft 200 Super King Air,1
97,Airbus A340-500,1


In [74]:
median_age = df.groupby('typeName').ageYears.median()
median_age.dropna(inplace=True)
median_age_df = median_age.reset_index()
median_age_df.rename(columns={'ageYears':'ageYears_median'}, inplace=True)
median_age_df

Unnamed: 0,typeName,ageYears_median
0,ATR 72,10.90
1,Airbus A220-300,6.80
2,Airbus A300,27.05
3,Airbus A300-600,31.80
4,Airbus A300-600F,26.10
...,...,...
72,Hawker 400XP,19.40
73,McDonnell Douglas MD-11,32.40
74,McDonnell Douglas MD-88,37.70
75,McDonnell Douglas MD-90,25.65


In [78]:
freq_medianage_merged = type_name_frequency_df.merge(median_age_df, on='typeName')
freq_medianage_merged

Unnamed: 0,typeName,frequency,ageYears_median
0,Airbus A320,115,15.0
1,Boeing 737-800,112,13.6
2,Airbus A319,55,19.7
3,Boeing 737,47,8.4
4,Embraer 175,40,8.0
...,...,...,...
72,Cessna 510 Citation Mustang,1,15.5
73,Cessna 560 Citation Excel,1,16.6
74,Challenger 300,1,16.1
75,Airbus A340-600,1,18.0


##### Plot

In [None]:
fig3 = go.Figure(data=[
    go.Scatter(x=freq_medianage_merged['ageYears_median'], y=freq_medianage_merged['frequency'], name="Type",mode='markers',hovertext=freq_medianage_merged['typeName'], marker_color="blue")
])

fig3.update_layout(
    title={'text': "Age and Frequency per Aircraft Type", 'font': {"size": 24}},
    xaxis_title="Median Age (Years)",
    yaxis_title="IFE Aircraft Frequency",
)