In [None]:
# Load the airline csv to explore the data
import pandas as pd

pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

airlines = pd.read_csv('data/airlines.csv')

In [None]:
# Load the validation csv to explore the data
validation = pd.read_csv('data/validation.csv')
# validation

In [None]:
# Join validation and airlines on iata code, not equal attribute names
validation = validation.merge(airlines, left_on='organization_code_iata', right_on='airline_code_iata')
# Sort on passenger_load_factor_pct
validation.sort_values('passenger_load_factor_pct', ascending=True)


In [10]:
import matplotlib.pyplot as plt
# Import covid data
covid = pd.read_csv('data/covid.csv')

# Plot stringency_index over time for a given country
def plot_country(country):
    country_data = covid[covid['location'] == country]
    country_data.plot(x='date', y='stringency_index')
    plt.show()

# Overlayed on the same plot, plot stringency_index over time for two countries
def plot_countries(countries):
    ax = None
    
    for country in countries:
        country_data = covid[covid['location'] == country]
        ax = country_data.plot(x='date', y='stringency_index', ax=ax, label=country)
    
    ax.legend()

    plt.show()

plot_countries(covid['location'].unique().to_list()[:10])

In [None]:
# Import load factor data
load_factor = pd.read_csv('data/load_factor_data.csv')

# Plot passenger_load_factor_pct over time for a given airline
def plot_airline_load_factor(airline, start, end):
    airline_data = load_factor[load_factor['organization_code_iata'] == airline]

    # Plot between start and end dates
    airline_data = airline_data[(airline_data['period_end_date'] >= start) & (airline_data['period_end_date'] <= end)]
    # Add dots at each data point
    airline_data.plot(x='period_end_date', y='passenger_load_factor_pct', style='o-')


    airline_type = airlines[airlines['airline_code_iata'] == airline]['airline_type'].values[0]
    # Plot title for the airline
    plt.title("Airline Load Factor: " + airline + " (" + airline_type + ")")
    # space out the x axis labels
    plt.xticks(rotation=45)

    plt.show()

# Plot 5O
plot_airline_load_factor('5O', '2020-01-01', '2020-12-31')

In [None]:
# Convert date from YYYY-MM-DD to (Year, Month)
def convert_date(start, end):
    start_year = int(start[0:4])
    start_month = int(start[5:7])
    end_year = int(end[0:4])
    end_month = int(end[5:7])
    return (start_year, start_month), (end_year, end_month)

In [None]:
# Import the seats data
seats = pd.read_csv('data/seats.csv')

def plot_seats(airline, start, end):
    airline_type = airlines[airlines['airline_code_iata'] == airline]['airline_type'].values[0]

    # Convert date from YYYY-MM-DD to (Year, Month)
    start, end = convert_date(start, end)

    # Filter by operating airline
    test = seats[seats['operating_airline'] == airline]  

    # Group by pair seats_year and seats_month
    test = test.groupby(['seats_year', 'seats_month']).sum()

    
    # Date in format (year, month)
    # Plot the number of sseats over time, add dots at each point between start and end dates
    test.loc[start:end].plot(y='total_seats', marker='o')

    #test.plot(y='total_seats', marker='o')

    # Plot title for the airline
    plt.title("Airline Seats: " + airline + " (" + airline_type + ")")
    plt.show()

airline = 'G4'


start = '2020-01-01'
end = '2020-12-31'

plot_seats(airline, start, end)
plot_airline_load_factor(airline , start, end)